Changeset - 8df1e9edd68f
[Not reviewed]
default
0 3 0
FUJIWARA Katsunori - 9 years ago 2017-01-30 11:09:45
foozy@lares.dti.ne.jp
indexers: use correct full repository name, which contains group name, at indexing

Before this revision, searching under the specific repository could
cause unexpected result, because repository names used for indexing didn't
contain the group name.

This issue was introduced by 8b7c0ef62427, which uses
repo.name_unicode as repository name instead of
safe_unicode(repo_name) to reduce unicode conversion cost while
repetition at indexing.

To use correct repository name at indexing, this revision replaces
repo.name_unicode by safe_unicode(repo_name). Reducing cost of repeated
unicode conversion cost while will (and should) be addressed in the
future.

This revision also adds a comment to BaseRepository.name property, to
avoid similar misunderstandings in the future.
3 files changed with 24 insertions and 2 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -194,25 +194,25 @@ class WhooshIndexingDaemon(object):
 
                indexed_w_content += 1
 

	
 
        else:
 
            log.debug('    >> %s', path)
 
            # just index file name without it's content
 
            u_content = u''
 
            indexed += 1
 

	
 
        p = safe_unicode(path)
 
        writer.add_document(
 
            fileid=p,
 
            owner=unicode(repo.contact),
 
            repository_rawname=repo.name_unicode,
 
            repository_rawname=safe_unicode(repo_name),
 
            repository=safe_unicode(repo_name),
 
            path=p,
 
            content=u_content,
 
            modtime=self.get_node_mtime(node),
 
            extension=node.extension
 
        )
 
        return indexed, indexed_w_content
 

	
 
    def index_changesets(self, writer, repo_name, repo, start_rev=None):
 
        """
 
        Add all changeset in the vcs repo starting at start_rev
 
        to the index writer
 
@@ -233,25 +233,25 @@ class WhooshIndexingDaemon(object):
 
        log.debug('indexing changesets in %s starting at rev: %s',
 
                  repo_name, start_rev)
 

	
 
        indexed = 0
 
        cs_iter = repo.get_changesets(start=start_rev)
 
        total = len(cs_iter)
 
        for cs in cs_iter:
 
            log.debug('    >> %s/%s', cs, total)
 
            writer.add_document(
 
                raw_id=unicode(cs.raw_id),
 
                owner=unicode(repo.contact),
 
                date=cs._timestamp,
 
                repository_rawname=repo.name_unicode,
 
                repository_rawname=safe_unicode(repo_name),
 
                repository=safe_unicode(repo_name),
 
                author=cs.author,
 
                message=cs.message,
 
                last=cs.last,
 
                added=u' '.join([safe_unicode(node.path) for node in cs.added]).lower(),
 
                removed=u' '.join([safe_unicode(node.path) for node in cs.removed]).lower(),
 
                changed=u' '.join([safe_unicode(node.path) for node in cs.changed]).lower(),
 
                parents=u' '.join([cs.raw_id for cs in cs.parents]),
 
            )
 
            indexed += 1
 

	
 
        log.debug('indexed %d changesets for repo %s', indexed, repo_name)
kallithea/lib/vcs/backends/base.py
Show inline comments
 
@@ -88,24 +88,27 @@ class BaseRepository(object):
 

	
 
    def __ne__(self, other):
 
        return not self.__eq__(other)
 

	
 
    @LazyProperty
 
    def alias(self):
 
        for k, v in settings.BACKENDS.items():
 
            if v.split('.')[-1] == str(self.__class__.__name__):
 
                return k
 

	
 
    @LazyProperty
 
    def name(self):
 
        """
 
        Return repository name (without group name)
 
        """
 
        raise NotImplementedError
 

	
 
    @property
 
    def name_unicode(self):
 
        return safe_unicode(self.name)
 

	
 
    @LazyProperty
 
    def owner(self):
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def description(self):
kallithea/tests/functional/test_search_indexing.py
Show inline comments
 
@@ -116,24 +116,43 @@ class TestSearchControllerIndexing(TestC
 
        ('content', 'this_should_be_unique_content', 1),
 
        ('commit', 'this_should_be_unique_commit_log', 1),
 
        ('path', 'this_should_be_unique_filename.txt', 1),
 
    ])
 
    def test_repository_tokenization(self, reponame, searchtype, query, hit):
 
        self.log_user()
 

	
 
        q = 'repository:%s %s' % (reponame, query)
 
        response = self.app.get(url(controller='search', action='index'),
 
                                {'q': q, 'type': searchtype})
 
        response.mustcontain('>%d results' % hit)
 

	
 
    @parametrize('reponame', [
 
        (u'indexing_test'),
 
        (u'indexing_test-fork'),
 
        (u'group/indexing_test'),
 
        (u'this-is-it'),
 
    ])
 
    @parametrize('searchtype,query,hit', [
 
        ('content', 'this_should_be_unique_content', 1),
 
        ('commit', 'this_should_be_unique_commit_log', 1),
 
        ('path', 'this_should_be_unique_filename.txt', 1),
 
    ])
 
    def test_searching_under_repository(self, reponame, searchtype, query, hit):
 
        self.log_user()
 

	
 
        response = self.app.get(url(controller='search', action='index',
 
                                    repo_name=reponame),
 
                                {'q': query, 'type': searchtype})
 
        response.mustcontain('>%d results' % hit)
 

	
 
    @parametrize('searchtype,query,hit', [
 
        ('content', 'this_should_be_unique_content', 1),
 
        ('commit', 'this_should_be_unique_commit_log', 1),
 
        ('path', 'this_should_be_unique_filename.txt', 1),
 
    ])
 
    def test_repository_case_sensitivity(self, searchtype, query, hit):
 
        self.log_user()
 

	
 
        lname = u'indexing_test-foo'
 
        uname = u'indexing_test-FOO'
 

	
 
        # (1) "repository:REPONAME" condition should match against
0 comments (0 inline, 0 general)