Changeset - f0e904651f21
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 15 years ago 2011-05-02 14:42:51
marcin@python-works.com
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
4 files changed with 59 insertions and 56 deletions:
0 comments (0 inline, 0 general)
docs/changelog.rst
Show inline comments
 
@@ -33,13 +33,13 @@ news
 
- fixed many issues with international characters and unicode. It uses utf8
 
  decode with replace to provide less errors even with non utf8 encoded strings
 
- #125 added API KEY access to feeds
 
- #109 Repository can be created from external Mercurial link (aka. remote 
 
  repository, and manually updated (via pull) from admin panel
 
- beta git support - push/pull server + basic view for git repos
 
- added followers page
 
- added followers page and forks page
 

	
 
fixes
 
-----
 

	
 
- fixed file browser bug, when switching into given form revision the url was 
 
  not changing
 
@@ -51,12 +51,14 @@ fixes
 
- removed issue with space inside renamed repository after deletion
 
- fixed strange issue on formencode imports
 
- fixed #126 Deleting repository on Windows, rename used incompatible chars. 
 
- #150 fixes for errors on repositories mapped in db but corrupted in 
 
  filesystem
 
- fixed problem with ascendant characters in realm #181
 
- fixed problem with sqlite file based database connection pool
 
- whoosh indexer and code stats share the same dynamic extensions map
 

	
 
1.1.8 (**2011-04-12**)
 
======================
 

	
 
news
 
----
rhodecode/lib/__init__.py
Show inline comments
 
@@ -21,12 +21,54 @@
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 

	
 

	
 
def __get_lem():
 
    from pygments import lexers
 
    from string import lower
 
    from collections import defaultdict
 

	
 
    d = defaultdict(lambda: [])
 

	
 
    def __clean(s):
 
        s = s.lstrip('*')
 
        s = s.lstrip('.')
 

	
 
        if s.find('[') != -1:
 
            exts = []
 
            start, stop = s.find('['), s.find(']')
 

	
 
            for suffix in s[start + 1:stop]:
 
                exts.append(s[:s.find('[')] + suffix)
 
            return map(lower, exts)
 
        else:
 
            return map(lower, [s])
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        m = map(__clean, t[-2])
 
        if m:
 
            m = reduce(lambda x, y: x + y, m)
 
            for ext in m:
 
                desc = lx.replace('Lexer', '')
 
                d[ext].append(desc)
 

	
 
    return dict(d)
 

	
 
# language map is also used by whoosh indexer, which for those specified
 
# extensions will index it's content
 
LANGUAGES_EXTENSIONS_MAP = __get_lem()
 

	
 
#Additional mappings that are not present in the pygments lexers
 
# NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 
ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 

	
 
LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 

	
 
def str2bool(_str):
 
    """
 
    returs True/False value from given string, it tries to translate the
 
    string into boolean
 

	
 
    :param _str: string value to translate into boolean
rhodecode/lib/celerylib/tasks.py
Show inline comments
 
@@ -28,18 +28,18 @@ from celery.decorators import task
 
import os
 
import traceback
 
import logging
 

	
 
from time import mktime
 
from operator import itemgetter
 
from pygments import lexers
 
from string import lower
 

	
 
from pylons import config
 
from pylons.i18n.translation import _
 

	
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 
from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
 
    __get_lockkey, LockHeld, DaemonLock
 
from rhodecode.lib.helpers import person
 
from rhodecode.lib.smtp_mailer import SmtpMailer
 
from rhodecode.lib.utils import OrderedDict, add_cache
 
from rhodecode.model import init_model
 
@@ -60,47 +60,12 @@ except ImportError:
 

	
 
__all__ = ['whoosh_index', 'get_commits_stats',
 
           'reset_user_password', 'send_email']
 

	
 
CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
 

	
 
LANGUAGES_EXTENSIONS_MAP = {}
 

	
 

	
 
def __clean(s):
 

	
 
    s = s.lstrip('*')
 
    s = s.lstrip('.')
 

	
 
    if s.find('[') != -1:
 
        exts = []
 
        start, stop = s.find('['), s.find(']')
 

	
 
        for suffix in s[start + 1:stop]:
 
            exts.append(s[:s.find('[')] + suffix)
 
        return map(lower, exts)
 
    else:
 
        return map(lower, [s])
 

	
 
for lx, t in sorted(lexers.LEXERS.items()):
 
    m = map(__clean, t[-2])
 
    if m:
 
        m = reduce(lambda x, y: x + y, m)
 
        for ext in m:
 
            desc = lx.replace('Lexer', '')
 
            if ext in LANGUAGES_EXTENSIONS_MAP:
 
                if desc not in LANGUAGES_EXTENSIONS_MAP[ext]:
 
                    LANGUAGES_EXTENSIONS_MAP[ext].append(desc)
 
            else:
 
                LANGUAGES_EXTENSIONS_MAP[ext] = [desc]
 

	
 
#Additional mappings that are not present in the pygments lexers
 
# NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 
ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 

	
 
LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 

	
 

	
 
def get_session():
 
    if CELERY_ON:
 
        engine = engine_from_config(config, 'sqlalchemy.db1.')
 
        init_model(engine)
rhodecode/lib/indexers/__init__.py
Show inline comments
 
@@ -28,39 +28,32 @@ import traceback
 
from os.path import dirname as dn, join as jn
 

	
 
#to get the rhodecode import
 
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
 

	
 
from string import strip
 

	
 
from rhodecode.model import init_model
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.config.environment import load_environment
 
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 

	
 
from shutil import rmtree
 
from webhelpers.html.builder import escape
 
from vcs.utils.lazy import LazyProperty
 

	
 
from sqlalchemy import engine_from_config
 

	
 
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 
from whoosh.index import create_in, open_dir
 
from whoosh.formats import Characters
 
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
 

	
 
from webhelpers.html.builder import escape
 
from sqlalchemy import engine_from_config
 
from vcs.utils.lazy import LazyProperty
 

	
 
from rhodecode.model import init_model
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.config.environment import load_environment
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 

	
 
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
 
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
 
                    'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
 
                    'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
 
                    'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
 
                    'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
 
                    'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
 
                    'yaws']
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
#CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 

	
 

	
 
#INDEX SCHEMA DEFINITION
 
@@ -200,14 +193,15 @@ class ResultWrapper(object):
 

	
 
    def get_chunks(self):
 
        """
 
        Smart function that implements chunking the content
 
        but not overlap chunks so it doesn't highlight the same
 
        close occurrences twice.
 
        @param matcher:
 
        @param size:
 
        
 
        :param matcher:
 
        :param size:
 
        """
 
        memory = [(0, 0)]
 
        for span in self.matcher.spans():
 
            start = span.startchar or 0
 
            end = span.endchar or 0
 
            start_offseted = max(0, start - self.fragment_size)
0 comments (0 inline, 0 general)