Changeset - f0e904651f21
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 15 years ago 2011-05-02 14:42:51
marcin@python-works.com
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
4 files changed with 59 insertions and 56 deletions:
0 comments (0 inline, 0 general)
docs/changelog.rst
Show inline comments
 
@@ -36,7 +36,7 @@ news
 
- #109 Repository can be created from external Mercurial link (aka. remote 
 
  repository, and manually updated (via pull) from admin panel
 
- beta git support - push/pull server + basic view for git repos
 
- added followers page
 
- added followers page and forks page
 

	
 
fixes
 
-----
 
@@ -54,6 +54,8 @@ fixes
 
- #150 fixes for errors on repositories mapped in db but corrupted in 
 
  filesystem
 
- fixed problem with ascendant characters in realm #181
 
- fixed problem with sqlite file based database connection pool
 
- whoosh indexer and code stats share the same dynamic extensions map
 

	
 
1.1.8 (**2011-04-12**)
 
======================
rhodecode/lib/__init__.py
Show inline comments
 
@@ -24,6 +24,48 @@
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 

	
 

	
 
def __get_lem():
 
    from pygments import lexers
 
    from string import lower
 
    from collections import defaultdict
 

	
 
    d = defaultdict(lambda: [])
 

	
 
    def __clean(s):
 
        s = s.lstrip('*')
 
        s = s.lstrip('.')
 

	
 
        if s.find('[') != -1:
 
            exts = []
 
            start, stop = s.find('['), s.find(']')
 

	
 
            for suffix in s[start + 1:stop]:
 
                exts.append(s[:s.find('[')] + suffix)
 
            return map(lower, exts)
 
        else:
 
            return map(lower, [s])
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        m = map(__clean, t[-2])
 
        if m:
 
            m = reduce(lambda x, y: x + y, m)
 
            for ext in m:
 
                desc = lx.replace('Lexer', '')
 
                d[ext].append(desc)
 

	
 
    return dict(d)
 

	
 
# language map is also used by whoosh indexer, which for those specified
 
# extensions will index it's content
 
LANGUAGES_EXTENSIONS_MAP = __get_lem()
 

	
 
#Additional mappings that are not present in the pygments lexers
 
# NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 
ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 

	
 
LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 

	
 
def str2bool(_str):
 
    """
 
    returs True/False value from given string, it tries to translate the
rhodecode/lib/celerylib/tasks.py
Show inline comments
 
@@ -31,12 +31,12 @@ import logging
 

	
 
from time import mktime
 
from operator import itemgetter
 
from pygments import lexers
 
from string import lower
 

	
 
from pylons import config
 
from pylons.i18n.translation import _
 

	
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 
from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
 
    __get_lockkey, LockHeld, DaemonLock
 
from rhodecode.lib.helpers import person
 
@@ -63,41 +63,6 @@ __all__ = ['whoosh_index', 'get_commits_
 

	
 
CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
 

	
 
LANGUAGES_EXTENSIONS_MAP = {}
 

	
 

	
 
def __clean(s):
 

	
 
    s = s.lstrip('*')
 
    s = s.lstrip('.')
 

	
 
    if s.find('[') != -1:
 
        exts = []
 
        start, stop = s.find('['), s.find(']')
 

	
 
        for suffix in s[start + 1:stop]:
 
            exts.append(s[:s.find('[')] + suffix)
 
        return map(lower, exts)
 
    else:
 
        return map(lower, [s])
 

	
 
for lx, t in sorted(lexers.LEXERS.items()):
 
    m = map(__clean, t[-2])
 
    if m:
 
        m = reduce(lambda x, y: x + y, m)
 
        for ext in m:
 
            desc = lx.replace('Lexer', '')
 
            if ext in LANGUAGES_EXTENSIONS_MAP:
 
                if desc not in LANGUAGES_EXTENSIONS_MAP[ext]:
 
                    LANGUAGES_EXTENSIONS_MAP[ext].append(desc)
 
            else:
 
                LANGUAGES_EXTENSIONS_MAP[ext] = [desc]
 

	
 
#Additional mappings that are not present in the pygments lexers
 
# NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 
ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 

	
 
LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 

	
 

	
 
def get_session():
rhodecode/lib/indexers/__init__.py
Show inline comments
 
@@ -31,17 +31,7 @@ from os.path import dirname as dn, join 
 
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
 

	
 
from string import strip
 

	
 
from rhodecode.model import init_model
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.config.environment import load_environment
 
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 

	
 
from shutil import rmtree
 
from webhelpers.html.builder import escape
 
from vcs.utils.lazy import LazyProperty
 

	
 
from sqlalchemy import engine_from_config
 

	
 
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 
@@ -49,15 +39,18 @@ from whoosh.index import create_in, open
 
from whoosh.formats import Characters
 
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
 

	
 
from webhelpers.html.builder import escape
 
from sqlalchemy import engine_from_config
 
from vcs.utils.lazy import LazyProperty
 

	
 
from rhodecode.model import init_model
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.config.environment import load_environment
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 

	
 
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
 
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
 
                    'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
 
                    'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
 
                    'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
 
                    'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
 
                    'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
 
                    'yaws']
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
#CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 
@@ -203,8 +196,9 @@ class ResultWrapper(object):
 
        Smart function that implements chunking the content
 
        but not overlap chunks so it doesn't highlight the same
 
        close occurrences twice.
 
        @param matcher:
 
        @param size:
 
        
 
        :param matcher:
 
        :param size:
 
        """
 
        memory = [(0, 0)]
 
        for span in self.matcher.spans():
0 comments (0 inline, 0 general)