Changeset - e6224a7c3d4e
[Not reviewed]
default
0 2 1
FUJIWARA Katsunori - 9 years ago 2017-01-22 18:17:38
foozy@lares.dti.ne.jp
tests: introduce more test coverage of whoosh filtering

This has been extracted from other changesets by Mads Kiilerich to establish a
test baseline so we clearly can see what the following fixes are fixing.

Some of these tests will thus demonstrate bad behaviour - that will be fixed
later.
3 files changed with 262 insertions and 16 deletions:
0 comments (0 inline, 0 general)
kallithea/tests/fixtures/journal_dump.csv
Show inline comments
 
@@ -2033,3 +2033,5 @@ user_log_id,user_id,username,repository_
 
2097,5,natosha.bard,414,natosha-repo1-fork,"",user_created_fork:natosha-repo1-fork,2012-12-05 16:47:24.113866
 
2098,5,natosha.bard,414,natosha-repo1-fork,62.116.219.97,"push:a25c825fd81d069596d614efcf92505aed46227a,ac513595518923aca8b39f0a1c33c4c6e0f9d83a,d395e22e8e16373a1fffbe66b322581d69a7db17",2012-12-05 16:47:44.701535
 
2099,2,admin,38,code-review-test,"",user_commented_pull_request:73,2012-12-05 17:23:18.059481
 
2100,390,this-is-it,415,this,"",user_created_repo,2017-01-22 00:00:00.000000
 
2101,390,this-is-it,416,this/is-it,"",user_created_repo,2017-01-22 00:00:01.000000
kallithea/tests/functional/test_admin.py
Show inline comments
 
@@ -56,94 +56,140 @@ class TestAdminController(TestController
 
    def test_filter_all_entries(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',))
 
        response.mustcontain('2034 Entries')
 
        response.mustcontain(' 2036 Entries')
 

	
 
    def test_filter_journal_filter_exact_match_on_repository(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:xxx'))
 
        response.mustcontain('3 Entries')
 
        response.mustcontain(' 3 Entries')
 

	
 
    def test_filter_journal_filter_exact_match_on_repository_CamelCase(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:XxX'))
 
        response.mustcontain('3 Entries')
 
        response.mustcontain(' 3 Entries')
 

	
 
    def test_filter_journal_filter_wildcard_on_repository(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:*test*'))
 
        response.mustcontain('862 Entries')
 
        response.mustcontain(' 862 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_repository(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:test*'))
 
        response.mustcontain('257 Entries')
 
        response.mustcontain(' 257 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_repository_CamelCase(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:Test*'))
 
        response.mustcontain('257 Entries')
 
        response.mustcontain(' 257 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_repository_and_user(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:test* AND username:demo'))
 
        response.mustcontain('130 Entries')
 
        response.mustcontain(' 130 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_repository_or_other_repo(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='repository:test* OR repository:xxx'))
 
        response.mustcontain('260 Entries')  # 257 + 3
 
        response.mustcontain(' 260 Entries')  # 257 + 3
 

	
 
    def test_filter_journal_filter_exact_match_on_username(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='username:demo'))
 
        response.mustcontain('1087 Entries')
 
        response.mustcontain(' 1087 Entries')
 

	
 
    def test_filter_journal_filter_exact_match_on_username_camelCase(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='username:DemO'))
 
        response.mustcontain('1087 Entries')
 
        response.mustcontain(' 1087 Entries')
 

	
 
    def test_filter_journal_filter_wildcard_on_username(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='username:*test*'))
 
        response.mustcontain('100 Entries')
 
        response.mustcontain(' 100 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_username(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='username:demo*'))
 
        response.mustcontain('1101 Entries')
 
        response.mustcontain(' 1101 Entries')
 

	
 
    def test_filter_journal_filter_prefix_on_user_or_other_user(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='username:demo OR username:volcan'))
 
        response.mustcontain('1095 Entries')  # 1087 + 8
 
        response.mustcontain(' 1095 Entries')  # 1087 + 8
 

	
 
    def test_filter_journal_filter_wildcard_on_action(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='action:*pull_request*'))
 
        response.mustcontain('187 Entries')
 
        response.mustcontain(' 187 Entries')
 

	
 
    def test_filter_journal_filter_on_date(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='date:20121010'))
 
        response.mustcontain('47 Entries')
 
        response.mustcontain(' 47 Entries')
 

	
 
    def test_filter_journal_filter_on_date_2(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter='date:20121020'))
 
        response.mustcontain('17 Entries')
 
        response.mustcontain(' 17 Entries')
 

	
 
    @parametrize('filter,hit', [
 
        #### "repository:" filtering
 
        # "/" is used for grouping
 
        ('repository:group/test', 0),
 
        # "-" is often used for "-fork"
 
        ('repository:fork-test1', 0),
 
        # using "stop words"
 
        ('repository:this', 2036),
 
        ('repository:this/is-it', 2036),
 

	
 
        ## additional tests to quickly find out regression in the future
 
        ## (and check case-insensitive search, too)
 
        # non-ascii character "." and "-"
 
        ('repository:TESTIES1.2.3', 4),
 
        ('repository:test_git_repo', 2),
 
        # combination with wildcard "*"
 
        ('repository:GROUP/*', 182),
 
        ('repository:*/test', 7),
 
        ('repository:fork-*', 273),
 
        ('repository:*-Test1', 5),
 

	
 
        #### "username:" filtering
 
        # "-" is valid character
 
        ('username:peso-xxx', 0),
 
        # using "stop words"
 
        ('username:this-is-it', 2036),
 

	
 
        ## additional tests to quickly find out regression in the future
 
        ## (and check case-insensitive search, too)
 
        # non-ascii character "." and "-"
 
        ('username:ADMIN_xanroot', 6),
 
        ('username:robert.Zaremba', 3),
 
        # combination with wildcard "*"
 
        ('username:THIS-*', 2),
 
        ('username:*-IT', 2),
 
    ])
 
    def test_filter_journal_filter_tokenization(self, filter, hit):
 
        self.log_user()
 

	
 
        response = self.app.get(url(controller='admin/admin', action='index',
 
                                    filter=filter))
 
        if hit != 1:
 
            response.mustcontain(' %s Entries' % hit)
 
        else:
 
            response.mustcontain(' 1 Entry')
kallithea/tests/functional/test_search_indexing.py
Show inline comments
 
new file 100644
 
import mock
 

	
 
from kallithea import CONFIG
 
from kallithea.config.conf import INDEX_FILENAMES
 
from kallithea.model.meta import Session
 
from kallithea.model.repo import RepoModel
 
from kallithea.model.repo_group import RepoGroupModel
 
from kallithea.tests.base import *
 
from kallithea.tests.fixture import create_test_index, Fixture
 

	
 
fixture = Fixture()
 

	
 
def init_indexing_test(repo):
 
    prev = fixture.commit_change(repo.repo_name,
 
                                 filename='this_should_be_unique_filename.txt',
 
                                 content='this_should_be_unique_content\n',
 
                                 message='this_should_be_unique_commit_log',
 
                                 vcs_type='hg',
 
                                 newfile=True)
 

	
 
def init_stopword_test(repo):
 
    prev = fixture.commit_change(repo.repo_name,
 
                                 filename='this/is/it',
 
                                 content='def test\n',
 
                                 message='bother to ask where - in folder',
 
                                 vcs_type='hg',
 
                                 newfile=True)
 
    prev = fixture.commit_change(repo.repo_name,
 
                                 filename='join.us',
 
                                 content='def test\n',
 
                                 message='bother to ask where - top level',
 
                                 author='this is it <this-is-it@foo.bar.com>',
 
                                 vcs_type='hg',
 
                                 parent=prev,
 
                                 newfile=True)
 

	
 
repos = [
 
    # reponame,              init func or fork base, groupname
 
    (u'indexing_test',       init_indexing_test,     None),
 
    (u'indexing_test-fork',  u'indexing_test',       None),
 
    (u'group/indexing_test', u'indexing_test',       u'group'),
 
    (u'this-is-it',          u'indexing_test',       None),
 
    (u'indexing_test-foo',   u'indexing_test',       None),
 
    (u'indexing_test-FOO',   u'indexing_test',       None),
 
    (u'stopword_test',       init_stopword_test,     None),
 
]
 

	
 
# map: name => id
 
repoids = {}
 
groupids = {}
 

	
 
def rebuild_index(full_index):
 
    with mock.patch('kallithea.lib.indexers.daemon.log.debug',
 
                    lambda *args, **kwargs: None):
 
        # The more revisions managed repositories have, the more
 
        # memory capturing "log.debug()" output in "indexers.daemon"
 
        # requires. This may cause unintentional failure of subsequent
 
        # tests, if ENOMEM at forking "git" prevents from rebuilding
 
        # index for search.
 
        # Therefore, "log.debug()" is disabled regardless of logging
 
        # level while rebuilding index.
 
        # (FYI, ENOMEM occurs at forking "git" with python 2.7.3,
 
        # Linux 3.2.78-1 x86_64, 3GB memory, and no ulimit
 
        # configuration for memory)
 
        create_test_index(TESTS_TMP_PATH, CONFIG, full_index=full_index)
 

	
 

	
 
class TestSearchControllerIndexing(TestController):
 
    @classmethod
 
    def setup_class(cls):
 
        for reponame, init_or_fork, groupname in repos:
 
            if groupname and groupname not in groupids:
 
                group = fixture.create_repo_group(groupname)
 
                groupids[groupname] = group.group_id
 
            if callable(init_or_fork):
 
                repo = fixture.create_repo(reponame,
 
                                           repo_group=groupname)
 
                init_or_fork(repo)
 
            else:
 
                repo = fixture.create_fork(init_or_fork, reponame,
 
                                           repo_group=groupname)
 
            repoids[reponame] = repo.repo_id
 

	
 
        # treat "it" as indexable filename
 
        filenames_mock = list(INDEX_FILENAMES)
 
        filenames_mock.append('it')
 
        with mock.patch('kallithea.lib.indexers.daemon.INDEX_FILENAMES',
 
                        filenames_mock):
 
            rebuild_index(full_index=False) # only for newly added repos
 

	
 
    @classmethod
 
    def teardown_class(cls):
 
        # delete in reversed order, to delete fork destination at first
 
        for reponame, init_or_fork, groupname in reversed(repos):
 
            RepoModel().delete(repoids[reponame])
 

	
 
        for reponame, init_or_fork, groupname in reversed(repos):
 
            if groupname in groupids:
 
                RepoGroupModel().delete(groupids.pop(groupname),
 
                                        force_delete=True)
 

	
 
        Session().commit()
 
        Session.remove()
 

	
 
        rebuild_index(full_index=True) # rebuild fully for subsequent tests
 

	
 
    @parametrize('reponame', [
 
        (u'indexing_test'),
 
        (u'indexing_test-fork'),
 
        (u'group/indexing_test'),
 
        (u'this-is-it'),
 
        (u'*-fork'),
 
        (u'group/*'),
 
    ])
 
    @parametrize('searchtype,query,hit', [
 
        #('content', 'this_should_be_unique_content', 1),
 
        ('commit', 'this_should_be_unique_commit_log', 1),
 
        #('path', 'this_should_be_unique_filename.txt', 1),
 
    ])
 
    def test_repository_tokenization(self, reponame, searchtype, query, hit):
 
        self.log_user()
 

	
 
        q = 'repository:%s %s' % (reponame, query)
 
        response = self.app.get(url(controller='search', action='index'),
 
                                {'q': q, 'type': searchtype})
 
        response.mustcontain('>%d results' % hit)
 

	
 
    @parametrize('searchtype,query,hit', [
 
        ('content', 'this_should_be_unique_content', 2),
 
        ('commit', 'this_should_be_unique_commit_log', 1),
 
        ('path', 'this_should_be_unique_filename.txt', 2),
 
    ])
 
    def test_repository_case_sensitivity(self, searchtype, query, hit):
 
        self.log_user()
 

	
 
        lname = u'indexing_test-foo'
 
        uname = u'indexing_test-FOO'
 

	
 
        # (1) "repository:REPONAME" condition should match against
 
        # repositories case-insensitively
 
        q = 'repository:%s %s' % (lname, query)
 
        response = self.app.get(url(controller='search', action='index'),
 
                                {'q': q, 'type': searchtype})
 

	
 
        response.mustcontain('>%d results' % hit)
 

	
 
        # (2) on the other hand, searching under the specific
 
        # repository should return results only for that repository,
 
        # even if specified name matches against another repository
 
        # case-insensitively.
 
        response = self.app.get(url(controller='search', action='index',
 
                                    repo_name=uname),
 
                                {'q': query, 'type': searchtype})
 

	
 
        response.mustcontain('>%d results' % hit)
 

	
 
        # confirm that there is no matching against lower name repository
 
        assert uname in response
 
        #assert lname not in response
 

	
 
    @parametrize('searchtype,query,hit', [
 
        ('content', 'path:this/is/it def test', 37),
 
        ('commit', 'added:this/is/it bother to ask where', 4),
 
        # this condition matches against files below, because
 
        # "path:" condition is also applied on "repository path".
 
        # - "this/is/it" in "stopword_test" repo
 
        # - "this_should_be_unique_filename.txt" in "this-is-it" repo
 
        ('path', 'this/is/it', 0),
 

	
 
        ('content', 'extension:us', 0),
 
        ('path', 'extension:us', 0),
 
    ])
 
    def test_filename_stopword(self, searchtype, query, hit):
 
        response = self.app.get(url(controller='search', action='index'),
 
                                {'q': query, 'type': searchtype})
 

	
 
        response.mustcontain('>%d results' % hit)
 

	
 
    @parametrize('searchtype,query,hit', [
 
        # matching against both 2 files
 
        ('content', 'owner:"this is it"', 0),
 
        ('content', 'owner:this-is-it', 0),
 
        ('path', 'owner:"this is it"', 0),
 
        ('path', 'owner:this-is-it', 0),
 

	
 
        # matching against both 2 revisions
 
        ('commit', 'owner:"this is it"', 0),
 
        ('commit', 'owner:"this-is-it"', 0),
 

	
 
        # matching against only 1 revision
 
        ('commit', 'author:"this is it"', 0),
 
        ('commit', 'author:"this-is-it"', 0),
 
    ])
 
    def test_mailaddr_stopword(self, searchtype, query, hit):
 
        response = self.app.get(url(controller='search', action='index'),
 
                                {'q': query, 'type': searchtype})
 

	
 
        response.mustcontain('>%d results' % hit)
0 comments (0 inline, 0 general)