Changeset - 200e6631787e
[Not reviewed]
beta
0 1 0
Marcin Kuzminski - 14 years ago 2011-08-27 16:55:58
marcin@python-works.com
removed buggy iteration over file nodes in whoosh indexer, this greatly improves speed of indexing.
1 file changed with 0 insertions and 3 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/indexers/daemon.py
Show inline comments
 
@@ -91,51 +91,48 @@ class WhooshIndexingDaemon(object):
 
            for repo_name, repo in self.repo_paths.items():
 
                if repo_name in repo_list:
 
                    filtered_repo_paths[repo_name] = repo
 

	
 
            self.repo_paths = filtered_repo_paths
 

	
 

	
 
        self.initial = False
 
        if not os.path.isdir(self.index_location):
 
            os.makedirs(self.index_location)
 
            log.info('Cannot run incremental index since it does not'
 
                     ' yet exist running full build')
 
            self.initial = True
 

	
 
    def get_paths(self, repo):
 
        """recursive walk in root dir and return a set of all path in that dir
 
        based on repository walk function
 
        """
 
        index_paths_ = set()
 
        try:
 
            tip = repo.get_changeset('tip')
 
            for topnode, dirs, files in tip.walk('/'):
 
                for f in files:
 
                    index_paths_.add(jn(repo.path, f.path))
 
                for dir in dirs:
 
                    for f in files:
 
                        index_paths_.add(jn(repo.path, f.path))
 

	
 
        except RepositoryError, e:
 
            log.debug(traceback.format_exc())
 
            pass
 
        return index_paths_
 

	
 
    def get_node(self, repo, path):
 
        n_path = path[len(repo.path) + 1:]
 
        node = repo.get_changeset().get_node(n_path)
 
        return node
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo, repo_name):
 
        """Adding doc to writer this function itself fetches data from
 
        the instance of vcs backend"""
 
        node = self.get_node(repo, path)
 

	
 
        #we just index the content of chosen files, and skip binary files
 
        if node.extension in INDEX_EXTENSIONS and not node.is_binary:
 

	
 
            u_content = node.content
 
            if not isinstance(u_content, unicode):
0 comments (0 inline, 0 general)