Changeset - 38a833e742ea
[Not reviewed]
default
0 1 0
Mads Kiilerich - 6 years ago 2020-03-12 17:07:28
mads@kiilerich.com
cli: fix indexing of repos where last indexed revision has been stripped
1 file changed with 5 insertions and 1 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -19,49 +19,49 @@ A daemon will read from task table and r
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Jan 26, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import os
 
import sys
 
import traceback
 
from os.path import dirname
 
from shutil import rmtree
 
from time import mktime
 

	
 
from whoosh.index import create_in, exists_in, open_dir
 
from whoosh.qparser import QueryParser
 

	
 
from kallithea.config.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
 
from kallithea.lib.indexers import CHGSET_IDX_NAME, CHGSETS_SCHEMA, IDX_NAME, SCHEMA
 
from kallithea.lib.utils2 import safe_str
 
from kallithea.lib.vcs.exceptions import ChangesetError, NodeDoesNotExistError, RepositoryError
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, ChangesetError, NodeDoesNotExistError, RepositoryError
 
from kallithea.model.db import Repository
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
# Add location of top level folder to sys.path
 
project_path = dirname(dirname(dirname(dirname(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 

	
 

	
 

	
 
log = logging.getLogger('whoosh_indexer')
 

	
 

	
 
class WhooshIndexingDaemon(object):
 
    """
 
    Daemon for atomic indexing jobs
 
    """
 

	
 
    def __init__(self, indexname=IDX_NAME, index_location=None,
 
                 repo_location=None, repo_list=None,
 
                 repo_update_list=None):
 
        self.indexname = indexname
 

	
 
@@ -278,49 +278,53 @@ class WhooshIndexingDaemon(object):
 
            writer_is_dirty = False
 
            try:
 
                indexed_total = 0
 
                repo_name = None
 
                for repo_name, repo in sorted(self.repo_paths.items()):
 
                    log.debug('Updating changeset index for repo %s', repo_name)
 
                    # skip indexing if there aren't any revs in the repo
 
                    num_of_revs = len(repo)
 
                    if num_of_revs < 1:
 
                        continue
 

	
 
                    qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
 
                    q = qp.parse("last:t AND %s" % repo_name)
 

	
 
                    results = searcher.search(q)
 

	
 
                    # default to scanning the entire repo
 
                    last_rev = 0
 
                    start_id = None
 

	
 
                    if len(results) > 0:
 
                        # assuming that there is only one result, if not this
 
                        # may require a full re-index.
 
                        start_id = results[0]['raw_id']
 
                        try:
 
                        last_rev = repo.get_changeset(revision=start_id).revision
 
                        except ChangesetDoesNotExistError:
 
                            log.error('previous last revision %s not found - indexing from scratch', start_id)
 
                            start_id = None
 

	
 
                    # there are new changesets to index or a new repo to index
 
                    if last_rev == 0 or num_of_revs > last_rev + 1:
 
                        # delete the docs in the index for the previous
 
                        # last changeset(s)
 
                        for hit in results:
 
                            q = qp.parse("last:t AND %s AND raw_id:%s" %
 
                                            (repo_name, hit['raw_id']))
 
                            writer.delete_by_query(q)
 

	
 
                        # index from the previous last changeset + all new ones
 
                        indexed_total += self.index_changesets(writer,
 
                                                repo_name, repo, start_id)
 
                        writer_is_dirty = True
 
                log.debug('indexed %s changesets for repo %s',
 
                          indexed_total, repo_name
 
                )
 
            finally:
 
                if writer_is_dirty:
 
                    log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
 
                    writer.commit(merge=True)
 
                    log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
 
                else:
 
                    log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
0 comments (0 inline, 0 general)