kallithea Changeset - f0e904651f21

Changeset - f0e904651f21

Parent rev.

Child rev.

[Not reviewed]

beta

0 4 0

Marcin Kuzminski - 15 years ago 2011-05-02 14:42:51
marcin@python-works.com

moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map

4 files changed with 59 insertions and 56 deletions:

docs/changelog.rst

rhodecode/lib/__init__.py

rhodecode/lib/celerylib/tasks.py

rhodecode/lib/indexers/__init__.py

0 comments (0 inline, 0 general)

docs/changelog.rst

➞

Show inline comments

@@ @@ -15,66 +15,68 @@ news @@
 - implemented #89 Can setup google analytics code from settings menu
 - implemented #91 added nicer looking archive urls with more download options
   like tags, branches
 - implemented #44 into file browsing, and added follow branch option
 - implemented #84 downloads can be enabled/disabled for each repository
 - anonymous repository can be cloned without having to pass default:default
   into clone url
 - fixed #90 whoosh indexer can index chooses repositories passed in command
   line
 - extended journal with day aggregates and paging
 - implemented #107 source code lines highlight ranges
 - implemented #93 customizable changelog on combined revision ranges -
   equivalent of githubs compare view
 - implemented #108 extended and more powerful LDAP configuration
 - implemented #56 users groups
 - major code rewrites optimized codes for speed and memory usage
 - raw and diff downloads are now in git format
 - setup command checks for write access to given path
 - fixed many issues with international characters and unicode. It uses utf8
   decode with replace to provide less errors even with non utf8 encoded strings
 - #125 added API KEY access to feeds
 - #109 Repository can be created from external Mercurial link (aka. remote
   repository, and manually updated (via pull) from admin panel
 - beta git support - push/pull server + basic view for git repos
 - added followers page
+- added followers page and forks page
 fixes
 -----
 - fixed file browser bug, when switching into given form revision the url was
   not changing
 - fixed propagation to error controller on simplehg and simplegit middlewares
 - fixed error when trying to make a download on empty repository
 - fixed problem with '[' chars in commit messages in journal
 - fixed #99 Unicode errors, on file node paths with non utf-8 characters
 - journal fork fixes
 - removed issue with space inside renamed repository after deletion
 - fixed strange issue on formencode imports
 - fixed #126 Deleting repository on Windows, rename used incompatible chars.
 - #150 fixes for errors on repositories mapped in db but corrupted in
   filesystem
 - fixed problem with ascendant characters in realm #181
 - fixed problem with sqlite file based database connection pool
 - whoosh indexer and code stats share the same dynamic extensions map
 .1.8 (**2011-04-12**)
 ======================
 news
 ----
 - improved windows support
 fixes
 -----
 - fixed #140 freeze of python dateutil library, since new version is python2.x
   incompatible
 - setup-app will check for write permission in given path
 - cleaned up license info issue #149
 - fixes for issues #137,#116 and problems with unicode and accented characters.
 - fixes crashes on gravatar, when passed in email as unicode
 - fixed tooltip flickering problems
 - fixed came_from redirection on windows
 - fixed logging modules, and sql formatters
 - windows fixes for os.kill issue #133
 - fixes path splitting for windows issues #148
 - fixed issue #143 wrong import on migration to 1.1.X

rhodecode/lib/__init__.py

➞

Show inline comments

@@ @@ -3,48 +3,90 @@ @@
     rhodecode.lib.__init__
     ~~~~~~~~~~~~~~~~~~~~~~~
     Some simple helper functions
     :created_on: Jan 5, 2011
     :author: marcink
     :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 def __get_lem():
     from pygments import lexers
     from string import lower
     from collections import defaultdict
     d = defaultdict(lambda: [])
     def __clean(s):
         s = s.lstrip('*')
         s = s.lstrip('.')
         if s.find('[') != -1:
             exts = []
             start, stop = s.find('['), s.find(']')
             for suffix in s[start + 1:stop]:
                 exts.append(s[:s.find('[')] + suffix)
             return map(lower, exts)
         else:
             return map(lower, [s])
     for lx, t in sorted(lexers.LEXERS.items()):
         m = map(__clean, t[-2])
         if m:
             m = reduce(lambda x, y: x + y, m)
             for ext in m:
                 desc = lx.replace('Lexer', '')
                 d[ext].append(desc)
     return dict(d)
 # language map is also used by whoosh indexer, which for those specified
 # extensions will index it's content
 LANGUAGES_EXTENSIONS_MAP = __get_lem()
 #Additional mappings that are not present in the pygments lexers
 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 def str2bool(_str):
     """
     returs True/False value from given string, it tries to translate the
     string into boolean
     :param _str: string value to translate into boolean
     :rtype: boolean
     :returns: boolean from given string
     """
     if _str is None:
         return False
     if _str in (True, False):
         return _str
     _str = str(_str).strip().lower()
     return _str in ('t', 'true', 'y', 'yes', 'on', '1')
 def generate_api_key(username, salt=None):
     """
     Generates unique API key for given username,if salt is not given
     it'll be generated from some random string
     :param username: username as string
     :param salt: salt to hash generate KEY

rhodecode/lib/celerylib/tasks.py

➞

Show inline comments

@@ @@ -10,115 +10,80 @@ @@
     :author: marcink
     :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from celery.decorators import task
 import os
 import traceback
 import logging
 from time import mktime
 from operator import itemgetter
 from pygments import lexers
 from string import lower
 from pylons import config
 from pylons.i18n.translation import _
 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
     __get_lockkey, LockHeld, DaemonLock
 from rhodecode.lib.helpers import person
 from rhodecode.lib.smtp_mailer import SmtpMailer
 from rhodecode.lib.utils import OrderedDict, add_cache
 from rhodecode.model import init_model
 from rhodecode.model import meta
 from rhodecode.model.db import RhodeCodeUi, Statistics, Repository
 from vcs.backends import get_repo
 from sqlalchemy import engine_from_config
 add_cache(config)
 try:
     import json
 except ImportError:
     #python 2.5 compatibility
     import simplejson as json
 __all__ = ['whoosh_index', 'get_commits_stats',
            'reset_user_password', 'send_email']
 CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
 LANGUAGES_EXTENSIONS_MAP = {}
 def __clean(s):
     s = s.lstrip('*')
     s = s.lstrip('.')
     if s.find('[') != -1:
         exts = []
         start, stop = s.find('['), s.find(']')
         for suffix in s[start + 1:stop]:
             exts.append(s[:s.find('[')] + suffix)
         return map(lower, exts)
     else:
         return map(lower, [s])
 for lx, t in sorted(lexers.LEXERS.items()):
     m = map(__clean, t[-2])
     if m:
         m = reduce(lambda x, y: x + y, m)
         for ext in m:
             desc = lx.replace('Lexer', '')
             if ext in LANGUAGES_EXTENSIONS_MAP:
                 if desc not in LANGUAGES_EXTENSIONS_MAP[ext]:
                     LANGUAGES_EXTENSIONS_MAP[ext].append(desc)
             else:
                 LANGUAGES_EXTENSIONS_MAP[ext] = [desc]
 #Additional mappings that are not present in the pygments lexers
 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
 def get_session():
     if CELERY_ON:
         engine = engine_from_config(config, 'sqlalchemy.db1.')
         init_model(engine)
     sa = meta.Session()
     return sa
 def get_repos_path():
     sa = get_session()
     q = sa.query(RhodeCodeUi).filter(RhodeCodeUi.ui_key == '/').one()
     return q.ui_value
 @task(ignore_result=True)
 @locked_task
 def whoosh_index(repo_location, full_index):
     #log = whoosh_index.get_logger()
     from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
     index_location = config['index_dir']
     WhooshIndexingDaemon(index_location=index_location,
                          repo_location=repo_location, sa=get_session())\

rhodecode/lib/indexers/__init__.py

➞

Show inline comments

@@ @@ -10,75 +10,68 @@ @@
     :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import os
 import sys
 import traceback
 from os.path import dirname as dn, join as jn
 #to get the rhodecode import
 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
 from string import strip
 from rhodecode.model import init_model
 from rhodecode.model.scm import ScmModel
 from rhodecode.config.environment import load_environment
 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 from shutil import rmtree
 from webhelpers.html.builder import escape
 from vcs.utils.lazy import LazyProperty
 from sqlalchemy import engine_from_config
 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 from whoosh.index import create_in, open_dir
 from whoosh.formats import Characters
 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
 from webhelpers.html.builder import escape
 from sqlalchemy import engine_from_config
 from vcs.utils.lazy import LazyProperty
 from rhodecode.model import init_model
 from rhodecode.model.scm import ScmModel
 from rhodecode.config.environment import load_environment
 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
                     'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
                     'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
                     'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
                     'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
                     'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
                     'yaws']
 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 #CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(owner=TEXT(),
                 repository=TEXT(stored=True),
                 path=TEXT(stored=True),
                 content=FieldType(format=Characters(ANALYZER),
                              scorable=True, stored=True),
                 modtime=STORED(), extension=TEXT(stored=True))
 IDX_NAME = 'HG_INDEX'
 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 FRAGMENTER = SimpleFragmenter(200)
 class MakeIndex(BasePasterCommand):
     max_args = 1
     min_args = 1
@@ @@ -182,49 +175,50 @@ class ResultWrapper(object): @@
         return slice
     def get_full_content(self, docid):
         res = self.searcher.stored_fields(docid[0])
         f_path = res['path'][res['path'].find(res['repository']) \
                              + len(res['repository']):].lstrip('/')
         content_short = self.get_short_content(res, docid[1])
         res.update({'content_short':content_short,
                     'content_short_hl':self.highlight(content_short),
                     'f_path':f_path})
         return res
     def get_short_content(self, res, chunks):
         return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
     def get_chunks(self):
         """
         Smart function that implements chunking the content
         but not overlap chunks so it doesn't highlight the same
         close occurrences twice.
         @param matcher:
         @param size:
         :param matcher:
         :param size:
         """
         memory = [(0, 0)]
         for span in self.matcher.spans():
             start = span.startchar or 0
             end = span.endchar or 0
             start_offseted = max(0, start - self.fragment_size)
             end_offseted = end + self.fragment_size
             if start_offseted < memory[-1][1]:
                 start_offseted = memory[-1][1]
             memory.append((start_offseted, end_offseted,))
             yield (start_offseted, end_offseted,)
     def highlight(self, content, top=5):
         if self.search_type != 'content':
             return ''
         hl = highlight(escape(content),
                  self.highlight_items,
                  analyzer=ANALYZER,
                  fragmenter=FRAGMENTER,
                  formatter=FORMATTER,
                  top=top)
         return hl

0 comments (0 inline, 0 general)