Files
@ 8df1e9edd68f
Branch filter:
Location: kallithea/kallithea/tests/functional/test_search_indexing.py
8df1e9edd68f
8.8 KiB
text/x-python
indexers: use correct full repository name, which contains group name, at indexing
Before this revision, searching under the specific repository could
cause unexpected result, because repository names used for indexing didn't
contain the group name.
This issue was introduced by 8b7c0ef62427, which uses
repo.name_unicode as repository name instead of
safe_unicode(repo_name) to reduce unicode conversion cost while
repetition at indexing.
To use correct repository name at indexing, this revision replaces
repo.name_unicode by safe_unicode(repo_name). Reducing cost of repeated
unicode conversion cost while will (and should) be addressed in the
future.
This revision also adds a comment to BaseRepository.name property, to
avoid similar misunderstandings in the future.
Before this revision, searching under the specific repository could
cause unexpected result, because repository names used for indexing didn't
contain the group name.
This issue was introduced by 8b7c0ef62427, which uses
repo.name_unicode as repository name instead of
safe_unicode(repo_name) to reduce unicode conversion cost while
repetition at indexing.
To use correct repository name at indexing, this revision replaces
repo.name_unicode by safe_unicode(repo_name). Reducing cost of repeated
unicode conversion cost while will (and should) be addressed in the
future.
This revision also adds a comment to BaseRepository.name property, to
avoid similar misunderstandings in the future.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | import mock
from kallithea import CONFIG
from kallithea.config.conf import INDEX_FILENAMES
from kallithea.model.meta import Session
from kallithea.model.repo import RepoModel
from kallithea.model.repo_group import RepoGroupModel
from kallithea.tests.base import *
from kallithea.tests.fixture import create_test_index, Fixture
fixture = Fixture()
def init_indexing_test(repo):
prev = fixture.commit_change(repo.repo_name,
filename='this_should_be_unique_filename.txt',
content='this_should_be_unique_content\n',
message='this_should_be_unique_commit_log',
vcs_type='hg',
newfile=True)
def init_stopword_test(repo):
prev = fixture.commit_change(repo.repo_name,
filename='this/is/it',
content='def test\n',
message='bother to ask where - in folder',
vcs_type='hg',
newfile=True)
prev = fixture.commit_change(repo.repo_name,
filename='join.us',
content='def test\n',
message='bother to ask where - top level',
author='this is it <this-is-it@foo.bar.com>',
vcs_type='hg',
parent=prev,
newfile=True)
repos = [
# reponame, init func or fork base, groupname
(u'indexing_test', init_indexing_test, None),
(u'indexing_test-fork', u'indexing_test', None),
(u'group/indexing_test', u'indexing_test', u'group'),
(u'this-is-it', u'indexing_test', None),
(u'indexing_test-foo', u'indexing_test', None),
(u'indexing_test-FOO', u'indexing_test', None),
(u'stopword_test', init_stopword_test, None),
]
# map: name => id
repoids = {}
groupids = {}
def rebuild_index(full_index):
with mock.patch('kallithea.lib.indexers.daemon.log.debug',
lambda *args, **kwargs: None):
# The more revisions managed repositories have, the more
# memory capturing "log.debug()" output in "indexers.daemon"
# requires. This may cause unintentional failure of subsequent
# tests, if ENOMEM at forking "git" prevents from rebuilding
# index for search.
# Therefore, "log.debug()" is disabled regardless of logging
# level while rebuilding index.
# (FYI, ENOMEM occurs at forking "git" with python 2.7.3,
# Linux 3.2.78-1 x86_64, 3GB memory, and no ulimit
# configuration for memory)
create_test_index(TESTS_TMP_PATH, CONFIG, full_index=full_index)
class TestSearchControllerIndexing(TestController):
@classmethod
def setup_class(cls):
for reponame, init_or_fork, groupname in repos:
if groupname and groupname not in groupids:
group = fixture.create_repo_group(groupname)
groupids[groupname] = group.group_id
if callable(init_or_fork):
repo = fixture.create_repo(reponame,
repo_group=groupname)
init_or_fork(repo)
else:
repo = fixture.create_fork(init_or_fork, reponame,
repo_group=groupname)
repoids[reponame] = repo.repo_id
# treat "it" as indexable filename
filenames_mock = list(INDEX_FILENAMES)
filenames_mock.append('it')
with mock.patch('kallithea.lib.indexers.daemon.INDEX_FILENAMES',
filenames_mock):
rebuild_index(full_index=False) # only for newly added repos
@classmethod
def teardown_class(cls):
# delete in reversed order, to delete fork destination at first
for reponame, init_or_fork, groupname in reversed(repos):
RepoModel().delete(repoids[reponame])
for reponame, init_or_fork, groupname in reversed(repos):
if groupname in groupids:
RepoGroupModel().delete(groupids.pop(groupname),
force_delete=True)
Session().commit()
Session.remove()
rebuild_index(full_index=True) # rebuild fully for subsequent tests
@parametrize('reponame', [
(u'indexing_test'),
(u'indexing_test-fork'),
(u'group/indexing_test'),
(u'this-is-it'),
(u'*-fork'),
(u'group/*'),
])
@parametrize('searchtype,query,hit', [
('content', 'this_should_be_unique_content', 1),
('commit', 'this_should_be_unique_commit_log', 1),
('path', 'this_should_be_unique_filename.txt', 1),
])
def test_repository_tokenization(self, reponame, searchtype, query, hit):
self.log_user()
q = 'repository:%s %s' % (reponame, query)
response = self.app.get(url(controller='search', action='index'),
{'q': q, 'type': searchtype})
response.mustcontain('>%d results' % hit)
@parametrize('reponame', [
(u'indexing_test'),
(u'indexing_test-fork'),
(u'group/indexing_test'),
(u'this-is-it'),
])
@parametrize('searchtype,query,hit', [
('content', 'this_should_be_unique_content', 1),
('commit', 'this_should_be_unique_commit_log', 1),
('path', 'this_should_be_unique_filename.txt', 1),
])
def test_searching_under_repository(self, reponame, searchtype, query, hit):
self.log_user()
response = self.app.get(url(controller='search', action='index',
repo_name=reponame),
{'q': query, 'type': searchtype})
response.mustcontain('>%d results' % hit)
@parametrize('searchtype,query,hit', [
('content', 'this_should_be_unique_content', 1),
('commit', 'this_should_be_unique_commit_log', 1),
('path', 'this_should_be_unique_filename.txt', 1),
])
def test_repository_case_sensitivity(self, searchtype, query, hit):
self.log_user()
lname = u'indexing_test-foo'
uname = u'indexing_test-FOO'
# (1) "repository:REPONAME" condition should match against
# repositories case-insensitively
q = 'repository:%s %s' % (lname, query)
response = self.app.get(url(controller='search', action='index'),
{'q': q, 'type': searchtype})
response.mustcontain('>%d results' % (hit * 2))
# (2) on the other hand, searching under the specific
# repository should return results only for that repository,
# even if specified name matches against another repository
# case-insensitively.
response = self.app.get(url(controller='search', action='index',
repo_name=uname),
{'q': query, 'type': searchtype})
response.mustcontain('>%d results' % hit)
# confirm that there is no matching against lower name repository
assert uname in response
assert lname not in response
@parametrize('searchtype,query,hit', [
('content', 'path:this/is/it def test', 1),
('commit', 'added:this/is/it bother to ask where', 1),
# this condition matches against files below, because
# "path:" condition is also applied on "repository path".
# - "this/is/it" in "stopword_test" repo
# - "this_should_be_unique_filename.txt" in "this-is-it" repo
('path', 'this/is/it', 2),
('content', 'extension:us', 1),
('path', 'extension:us', 1),
])
def test_filename_stopword(self, searchtype, query, hit):
response = self.app.get(url(controller='search', action='index'),
{'q': query, 'type': searchtype})
response.mustcontain('>%d results' % hit)
@parametrize('searchtype,query,hit', [
# matching against both 2 files
('content', 'owner:"this is it"', 0),
('content', 'owner:this-is-it', 0),
('path', 'owner:"this is it"', 0),
('path', 'owner:this-is-it', 0),
# matching against both 2 revisions
('commit', 'owner:"this is it"', 0),
('commit', 'owner:"this-is-it"', 0),
# matching against only 1 revision
('commit', 'author:"this is it"', 1),
('commit', 'author:"this-is-it"', 1),
])
def test_mailaddr_stopword(self, searchtype, query, hit):
response = self.app.get(url(controller='search', action='index'),
{'q': query, 'type': searchtype})
response.mustcontain('>%d results' % hit)
|