diff --git a/rhodecode/lib/indexers/__init__.py b/rhodecode/lib/indexers/__init__.py --- a/rhodecode/lib/indexers/__init__.py +++ b/rhodecode/lib/indexers/__init__.py @@ -1,26 +1,28 @@ +import os +import sys +import traceback from os.path import dirname as dn, join as jn + +#to get the rhodecode import +sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) + +from rhodecode.model import init_model +from rhodecode.model.scm import ScmModel from rhodecode.config.environment import load_environment -from rhodecode.model.hg_model import HgModel +from rhodecode.lib.utils import BasePasterCommand, Command, add_cache + from shutil import rmtree from webhelpers.html.builder import escape from vcs.utils.lazy import LazyProperty +from sqlalchemy import engine_from_config + from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter from whoosh.fields import TEXT, ID, STORED, Schema, FieldType from whoosh.index import create_in, open_dir from whoosh.formats import Characters -from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter - -import os -import sys -import traceback +from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter -#to get the rhodecode import -sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) - - -#LOCATION WE KEEP THE INDEX -IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index') #EXTENSIONS WE WANT TO INDEX CONTENT OFF INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c', @@ -45,9 +47,58 @@ SCHEMA = Schema(owner=TEXT(), IDX_NAME = 'HG_INDEX' -FORMATTER = HtmlFormatter('span', between='\n...\n') +FORMATTER = HtmlFormatter('span', between='\n...\n') FRAGMENTER = SimpleFragmenter(200) - + + +class MakeIndex(BasePasterCommand): + + max_args = 1 + min_args = 1 + + usage = "CONFIG_FILE" + summary = "Creates index for full text search given configuration file" + group_name = "RhodeCode" + takes_config_file = -1 + parser = Command.standard_parser(verbose=True) + + def command(self): + + from pylons import config + add_cache(config) + engine = engine_from_config(config, 'sqlalchemy.db1.') + init_model(engine) + + index_location = config['index_dir'] + repo_location = self.options.repo_location + + #====================================================================== + # WHOOSH DAEMON + #====================================================================== + from rhodecode.lib.pidlock import LockHeld, DaemonLock + from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon + try: + l = DaemonLock() + WhooshIndexingDaemon(index_location=index_location, + repo_location=repo_location)\ + .run(full_index=self.options.full_index) + l.release() + except LockHeld: + sys.exit(1) + + def update_parser(self): + self.parser.add_option('--repo-location', + action='store', + dest='repo_location', + help="Specifies repositories location to index REQUIRED", + ) + self.parser.add_option('-f', + action='store_true', + dest='full_index', + help="Specifies that index should be made full i.e" + " destroy old and build from scratch", + default=False) + class ResultWrapper(object): def __init__(self, search_type, searcher, matcher, highlight_items): self.search_type = search_type @@ -55,7 +106,7 @@ class ResultWrapper(object): self.matcher = matcher self.highlight_items = highlight_items self.fragment_size = 200 / 2 - + @LazyProperty def doc_ids(self): docs_id = [] @@ -64,8 +115,8 @@ class ResultWrapper(object): chunks = [offsets for offsets in self.get_chunks()] docs_id.append([docnum, chunks]) self.matcher.next() - return docs_id - + return docs_id + def __str__(self): return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids)) @@ -91,32 +142,32 @@ class ResultWrapper(object): slice = [] for docid in self.doc_ids[i:j]: slice.append(self.get_full_content(docid)) - return slice - + return slice + def get_full_content(self, docid): res = self.searcher.stored_fields(docid[0]) f_path = res['path'][res['path'].find(res['repository']) \ + len(res['repository']):].lstrip('/') - + content_short = self.get_short_content(res, docid[1]) res.update({'content_short':content_short, 'content_short_hl':self.highlight(content_short), 'f_path':f_path}) - - return res - + + return res + def get_short_content(self, res, chunks): - + return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks]) - + def get_chunks(self): """ Smart function that implements chunking the content but not overlap chunks so it doesn't highlight the same close occurrences twice. - :param matcher: - :param size: + @param matcher: + @param size: """ memory = [(0, 0)] for span in self.matcher.spans(): @@ -124,12 +175,12 @@ class ResultWrapper(object): end = span.endchar or 0 start_offseted = max(0, start - self.fragment_size) end_offseted = end + self.fragment_size - + if start_offseted < memory[-1][1]: start_offseted = memory[-1][1] - memory.append((start_offseted, end_offseted,)) - yield (start_offseted, end_offseted,) - + memory.append((start_offseted, end_offseted,)) + yield (start_offseted, end_offseted,) + def highlight(self, content, top=5): if self.search_type != 'content': return '' @@ -139,4 +190,4 @@ class ResultWrapper(object): fragmenter=FRAGMENTER, formatter=FORMATTER, top=top) - return hl + return hl