kallithea Changeset - df96adcbb1f7

Changeset - df96adcbb1f7

Parent rev.

Child rev.

[Not reviewed]

beta

0 1 0

Marcin Kuzminski - 14 years ago 2012-03-04 20:57:42
marcin@python-works.com

code garden

1 file changed with 13 insertions and 24 deletions:

rhodecode/lib/indexers/daemon.py

0 comments (0 inline, 0 general)

rhodecode/lib/indexers/daemon.py

➞

Show inline comments

@@ @@ -27,153 +27,142 @@ import os @@
 import sys
 import logging
 import traceback
 from shutil import rmtree
 from time import mktime
 from os.path import dirname as dn
 from os.path import join as jn
 #to get the rhodecode import
 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 from rhodecode.model.scm import ScmModel
 from rhodecode.lib import safe_unicode
 from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
     NodeDoesNotExistError
 from whoosh.index import create_in, open_dir
 log = logging.getLogger('whooshIndexer')
 # create logger
 log.setLevel(logging.DEBUG)
 log.propagate = False
 # create console handler and set level to debug
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 # create formatter
 formatter = logging.Formatter("%(asctime)s - %(name)s -"
                               " %(levelname)s - %(message)s")
 # add formatter to ch
 ch.setFormatter(formatter)
 # add ch to logger
 log.addHandler(ch)
 log = logging.getLogger('whoosh_indexer')
 class WhooshIndexingDaemon(object):
     """
     Daemon for atomic jobs
     """
     def __init__(self, indexname=IDX_NAME, index_location=None,
                  repo_location=None, sa=None, repo_list=None):
         self.indexname = indexname
         self.index_location = index_location
         if not index_location:
             raise Exception('You have to provide index location')
         self.repo_location = repo_location
         if not repo_location:
             raise Exception('You have to provide repositories location')
         self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
         if repo_list:
             filtered_repo_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
                     filtered_repo_paths[repo_name] = repo
             self.repo_paths = filtered_repo_paths
         self.initial = False
         if not os.path.isdir(self.index_location):
             os.makedirs(self.index_location)
             log.info('Cannot run incremental index since it does not'
                      ' yet exist running full build')
             self.initial = True
     def get_paths(self, repo):
         """recursive walk in root dir and return a set of all path in that dir
         """
         recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         try:
             tip = repo.get_changeset('tip')
             for topnode, dirs, files in tip.walk('/'):
                 for f in files:
                     index_paths_.add(jn(repo.path, f.path))
         except RepositoryError, e:
             log.debug(traceback.format_exc())
             pass
         return index_paths_
     def get_node(self, repo, path):
         n_path = path[len(repo.path) + 1:]
         node = repo.get_changeset().get_node(n_path)
         return node
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo, repo_name):
         """Adding doc to writer this function itself fetches data from
         the instance of vcs backend"""
         """
         Adding doc to writer this function itself fetches data from
         the instance of vcs backend
         """
         node = self.get_node(repo, path)
         #we just index the content of chosen files, and skip binary files
         if node.extension in INDEX_EXTENSIONS and not node.is_binary:
             u_content = node.content
             if not isinstance(u_content, unicode):
                 log.warning('  >> %s Could not get this content as unicode '
-                          'replacing with empty content', path)
+                            'replacing with empty content' % path)
                 u_content = u''
             else:
                 log.debug('    >> %s [WITH CONTENT]' % path)
         else:
             log.debug('    >> %s' % path)
             #just index file name without it's content
             u_content = u''
         writer.add_document(owner=unicode(repo.contact),
         writer.add_document(
             owner=unicode(repo.contact),
                         repository=safe_unicode(repo_name),
                         path=safe_unicode(path),
                         content=u_content,
                         modtime=self.get_node_mtime(node),
                         extension=node.extension)
             extension=node.extension
+        )
     def build_index(self):
         if os.path.exists(self.index_location):
             log.debug('removing previous index')
             rmtree(self.index_location)
         if not os.path.exists(self.index_location):
             os.mkdir(self.index_location)
         idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
         writer = idx.writer()
         for repo_name, repo in self.repo_paths.items():
             log.debug('building index @ %s' % repo.path)
             for idx_path in self.get_paths(repo):
                 self.add_doc(writer, idx_path, repo, repo_name)
         log.debug('>> COMMITING CHANGES <<')
         writer.commit(merge=True)
         log.debug('>>> FINISHED BUILDING INDEX <<<')
     def update_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE')

0 comments (0 inline, 0 general)