kallithea Changeset - 200e6631787e

Changeset - 200e6631787e

Parent rev.

Child rev.

[Not reviewed]

beta

0 1 0

Marcin Kuzminski - 14 years ago 2011-08-27 16:55:58
marcin@python-works.com

removed buggy iteration over file nodes in whoosh indexer, this greatly improves speed of indexing.

1 file changed with 0 insertions and 3 deletions:

rhodecode/lib/indexers/daemon.py

0 comments (0 inline, 0 general)

rhodecode/lib/indexers/daemon.py

➞

Show inline comments

@@ @@ -91,51 +91,48 @@ class WhooshIndexingDaemon(object): @@
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
                     filtered_repo_paths[repo_name] = repo
             self.repo_paths = filtered_repo_paths
         self.initial = False
         if not os.path.isdir(self.index_location):
             os.makedirs(self.index_location)
             log.info('Cannot run incremental index since it does not'
                      ' yet exist running full build')
             self.initial = True
     def get_paths(self, repo):
         """recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         try:
             tip = repo.get_changeset('tip')
             for topnode, dirs, files in tip.walk('/'):
                 for f in files:
                     index_paths_.add(jn(repo.path, f.path))
                 for dir in dirs:
                     for f in files:
                         index_paths_.add(jn(repo.path, f.path))
         except RepositoryError, e:
             log.debug(traceback.format_exc())
             pass
         return index_paths_
     def get_node(self, repo, path):
         n_path = path[len(repo.path) + 1:]
         node = repo.get_changeset().get_node(n_path)
         return node
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo, repo_name):
         """Adding doc to writer this function itself fetches data from
         the instance of vcs backend"""
         node = self.get_node(repo, path)
         #we just index the content of chosen files, and skip binary files
         if node.extension in INDEX_EXTENSIONS and not node.is_binary:
             u_content = node.content
             if not isinstance(u_content, unicode):

0 comments (0 inline, 0 general)