Changeset - b369bec5d468
[Not reviewed]
beta
0 1 0
Marcin Kuzminski - 14 years ago 2011-11-22 13:10:33
marcin@python-works.com
fixes issue with whoosh reindexing files that were removed or renamed
1 file changed with 3 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/indexers/daemon.py
Show inline comments
 
@@ -22,49 +22,50 @@
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import os
 
import sys
 
import logging
 
import traceback
 

	
 
from shutil import rmtree
 
from time import mktime
 

	
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the rhodecode import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 

	
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.lib import safe_unicode
 
from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
 

	
 
from vcs.exceptions import ChangesetError, RepositoryError
 
from vcs.exceptions import ChangesetError, RepositoryError, \
 
    NodeDoesNotExistError
 

	
 
from whoosh.index import create_in, open_dir
 

	
 

	
 

	
 
log = logging.getLogger('whooshIndexer')
 
# create logger
 
log.setLevel(logging.DEBUG)
 
log.propagate = False
 
# create console handler and set level to debug
 
ch = logging.StreamHandler()
 
ch.setLevel(logging.DEBUG)
 

	
 
# create formatter
 
formatter = logging.Formatter("%(asctime)s - %(name)s -"
 
                              " %(levelname)s - %(message)s")
 

	
 
# add formatter to ch
 
ch.setFormatter(formatter)
 

	
 
# add ch to logger
 
log.addHandler(ch)
 

	
 
class WhooshIndexingDaemon(object):
 
@@ -177,49 +178,49 @@ class WhooshIndexingDaemon(object):
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 

	
 

	
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
 

	
 
        idx = open_dir(self.index_location, indexname=self.indexname)
 
        # The set of all paths in the index
 
        indexed_paths = set()
 
        # The set of all paths we need to re-index
 
        to_index = set()
 

	
 
        reader = idx.reader()
 
        writer = idx.writer()
 

	
 
        # Loop over the stored fields in the index
 
        for fields in reader.all_stored_fields():
 
            indexed_path = fields['path']
 
            indexed_paths.add(indexed_path)
 

	
 
            repo = self.repo_paths[fields['repository']]
 

	
 
            try:
 
                node = self.get_node(repo, indexed_path)
 
            except ChangesetError:
 
            except (ChangesetError, NodeDoesNotExistError):
 
                # This file was deleted since it was indexed
 
                log.debug('removing from index %s' % indexed_path)
 
                writer.delete_by_term('path', indexed_path)
 

	
 
            else:
 
                # Check if this file was changed since it was indexed
 
                indexed_time = fields['modtime']
 
                mtime = self.get_node_mtime(node)
 
                if mtime > indexed_time:
 
                    # The file has changed, delete it and add it to the list of
 
                    # files to reindex
 
                    log.debug('adding to reindex list %s' % indexed_path)
 
                    writer.delete_by_term('path', indexed_path)
 
                    to_index.add(indexed_path)
 

	
 
        # Loop over the files in the filesystem
 
        # Assume we have a function that gathers the filenames of the
 
        # documents to be indexed
 
        for repo_name, repo in self.repo_paths.items():
 
            for path in self.get_paths(repo):
 
                if path in to_index or path not in indexed_paths:
 
                    # This is either a file that's changed, or a new file
 
                    # that wasn't indexed before. So index it!
 
                    self.add_doc(writer, path, repo, repo_name)
0 comments (0 inline, 0 general)