Changeset - 26235543d64d
[Not reviewed]
default
0 2 0
Mads Kiilerich - 8 years ago 2017-09-08 04:12:51
mads@kiilerich.com
pygments: reimplement get_lem under the name get_extension_descriptions

The old implementation was cryptic and over-engineered. And reduce() is not a
builtin in Python 3.

This function works on static input, and it was verified that this
implementation returns exactly the same as the old one. And is simpler and
slightly more readable. In my opinion.

Inspired by change by Lars Kruse.
2 files changed with 19 insertions and 27 deletions:
0 comments (0 inline, 0 general)
kallithea/config/conf.py
Show inline comments
 
@@ -21,25 +21,25 @@ This file was forked by the Kallithea pr
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Mar 7, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
from kallithea.lib import pygmentsutils
 

	
 

	
 
# language map is also used by whoosh indexer, which for those specified
 
# extensions will index it's content
 
LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_lem()
 
LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_extension_descriptions()
 

	
 
# Whoosh index targets
 

	
 
# Extensions we want to index content of using whoosh
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
# Filenames we want to index content of using whoosh
 
INDEX_FILENAMES = pygmentsutils.get_index_filenames()
 

	
 
# list of readme files to search in file tree and display in summary
 
# attached weights defines the search  order lower is first
 
ALL_READMES = [
kallithea/lib/pygmentsutils.py
Show inline comments
 
@@ -18,58 +18,50 @@ kallithea.lib.pygmentsutils
 
Functions for extracting internal Pygments data.
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Jan 5, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
from collections import defaultdict
 
from itertools import ifilter
 
from string import lower
 

	
 
from pygments import lexers
 

	
 

	
 
def get_lem():
 
    """
 
    Get language extension map based on what's inside pygments lexers
 
def get_extension_descriptions():
 
    """
 
    d = defaultdict(lambda: [])
 

	
 
    def __clean(s):
 
        s = s.lstrip('*')
 
        s = s.lstrip('.')
 

	
 
        if s.find('[') != -1:
 
            exts = []
 
            start, stop = s.find('['), s.find(']')
 

	
 
            for suffix in s[start + 1:stop]:
 
                exts.append(s[:s.find('[')] + suffix)
 
            return map(lower, exts)
 
        else:
 
            return map(lower, [s])
 
    Based on what's inside pygments lexers, return a mapping from lowercase
 
    extensions to lists of very brief descriptions.
 
    """
 
    ext_descs = defaultdict(list)
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        m = map(__clean, t[-2])
 
        if m:
 
            m = reduce(lambda x, y: x + y, m)
 
            for ext in m:
 
                desc = lx.replace('Lexer', '')
 
                d[ext].append(desc)
 
        desc = lx.replace('Lexer', '')
 
        for glob in t[-2]:
 
            s = glob.lstrip('*').lstrip('.').lower()
 
            start = s.find('[')
 
            if start > -1 and s.endswith(']'):
 
                # expand trailing [] range
 
                prefix = s[:start]
 
                for char in s[start + 1:-1]:
 
                    ext_descs[prefix + char].append(desc)
 
            else:
 
                # use stripped glob as extension
 
                ext_descs[s].append(desc)
 

	
 
    return dict(d)
 
    return dict(ext_descs)
 

	
 

	
 
def get_index_filenames():
 
    """
 
    Get list of known indexable filenames from pygment lexer internals
 
    """
 

	
 
    filenames = []
 

	
 
    def likely_filename(s):
 
        return s.find('*') == -1 and s.find('[') == -1
 

	
0 comments (0 inline, 0 general)