Changeset - 26235543d64d
[Not reviewed]
default
0 2 0
Mads Kiilerich - 8 years ago 2017-09-08 04:12:51
mads@kiilerich.com
pygments: reimplement get_lem under the name get_extension_descriptions

The old implementation was cryptic and over-engineered. And reduce() is not a
builtin in Python 3.

This function works on static input, and it was verified that this
implementation returns exactly the same as the old one. And is simpler and
slightly more readable. In my opinion.

Inspired by change by Lars Kruse.
2 files changed with 19 insertions and 27 deletions:
0 comments (0 inline, 0 general)
kallithea/config/conf.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.config.conf
 
~~~~~~~~~~~~~~~~~~~~~
 

	
 
Various config settings for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Mar 7, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
from kallithea.lib import pygmentsutils
 

	
 

	
 
# language map is also used by whoosh indexer, which for those specified
 
# extensions will index it's content
 
LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_lem()
 
LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_extension_descriptions()
 

	
 
# Whoosh index targets
 

	
 
# Extensions we want to index content of using whoosh
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
# Filenames we want to index content of using whoosh
 
INDEX_FILENAMES = pygmentsutils.get_index_filenames()
 

	
 
# list of readme files to search in file tree and display in summary
 
# attached weights defines the search  order lower is first
 
ALL_READMES = [
 
    ('readme', 0), ('README', 0), ('Readme', 0),
 
    ('doc/readme', 1), ('doc/README', 1), ('doc/Readme', 1),
 
    ('Docs/readme', 2), ('Docs/README', 2), ('Docs/Readme', 2),
 
    ('DOCS/readme', 2), ('DOCS/README', 2), ('DOCS/Readme', 2),
 
    ('docs/readme', 2), ('docs/README', 2), ('docs/Readme', 2),
 
]
 

	
 
# extension together with weights to search lower is first
 
RST_EXTS = [
 
    ('', 0), ('.rst', 1), ('.rest', 1),
 
    ('.RST', 2), ('.REST', 2),
 
    ('.txt', 3), ('.TXT', 3)
 
]
 

	
 
MARKDOWN_EXTS = [
 
    ('.md', 1), ('.MD', 1),
 
    ('.mkdn', 2), ('.MKDN', 2),
 
    ('.mdown', 3), ('.MDOWN', 3),
 
    ('.markdown', 4), ('.MARKDOWN', 4)
 
]
 

	
 
PLAIN_EXTS = [('.text', 2), ('.TEXT', 2)]
 

	
 
ALL_EXTS = MARKDOWN_EXTS + RST_EXTS + PLAIN_EXTS
kallithea/lib/pygmentsutils.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.pygmentsutils
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Functions for extracting internal Pygments data.
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Jan 5, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
from collections import defaultdict
 
from itertools import ifilter
 
from string import lower
 

	
 
from pygments import lexers
 

	
 

	
 
def get_lem():
 
    """
 
    Get language extension map based on what's inside pygments lexers
 
def get_extension_descriptions():
 
    """
 
    d = defaultdict(lambda: [])
 

	
 
    def __clean(s):
 
        s = s.lstrip('*')
 
        s = s.lstrip('.')
 

	
 
        if s.find('[') != -1:
 
            exts = []
 
            start, stop = s.find('['), s.find(']')
 

	
 
            for suffix in s[start + 1:stop]:
 
                exts.append(s[:s.find('[')] + suffix)
 
            return map(lower, exts)
 
        else:
 
            return map(lower, [s])
 
    Based on what's inside pygments lexers, return a mapping from lowercase
 
    extensions to lists of very brief descriptions.
 
    """
 
    ext_descs = defaultdict(list)
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        m = map(__clean, t[-2])
 
        if m:
 
            m = reduce(lambda x, y: x + y, m)
 
            for ext in m:
 
                desc = lx.replace('Lexer', '')
 
                d[ext].append(desc)
 
        desc = lx.replace('Lexer', '')
 
        for glob in t[-2]:
 
            s = glob.lstrip('*').lstrip('.').lower()
 
            start = s.find('[')
 
            if start > -1 and s.endswith(']'):
 
                # expand trailing [] range
 
                prefix = s[:start]
 
                for char in s[start + 1:-1]:
 
                    ext_descs[prefix + char].append(desc)
 
            else:
 
                # use stripped glob as extension
 
                ext_descs[s].append(desc)
 

	
 
    return dict(d)
 
    return dict(ext_descs)
 

	
 

	
 
def get_index_filenames():
 
    """
 
    Get list of known indexable filenames from pygment lexer internals
 
    """
 

	
 
    filenames = []
 

	
 
    def likely_filename(s):
 
        return s.find('*') == -1 and s.find('[') == -1
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        for f in ifilter(likely_filename, t[-2]):
 
            filenames.append(f)
 

	
 
    return filenames
 

	
 

	
 
def get_custom_lexer(extension):
 
    """
 
    returns a custom lexer if it's defined in rcextensions module, or None
 
    if there's no custom lexer defined
 
    """
 
    import kallithea
 
    lexer_name = getattr(kallithea.EXTENSIONS, 'EXTRA_LEXERS', {}).get(extension)
 
    if lexer_name is None:
 
        return None
 
    return lexers.get_lexer_by_name(lexer_name)
0 comments (0 inline, 0 general)