diff --git a/kallithea/lib/indexers/__init__.py b/kallithea/lib/indexers/__init__.py --- a/kallithea/lib/indexers/__init__.py +++ b/kallithea/lib/indexers/__init__.py @@ -33,7 +33,7 @@ from os.path import dirname # Add location of top level folder to sys.path sys.path.append(dirname(dirname(dirname(os.path.realpath(__file__))))) -from whoosh.analysis import RegexTokenizer, LowercaseFilter +from whoosh.analysis import RegexTokenizer, LowercaseFilter, IDTokenizer from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME from whoosh.formats import Characters from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter @@ -44,11 +44,20 @@ log = logging.getLogger(__name__) # CUSTOM ANALYZER wordsplit + lowercase filter ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() +# CUSTOM ANALYZER raw-string + lowercase filter +# +# This is useful to: +# - avoid tokenization +# - avoid removing "stop words" from text +# - search case-insensitively +# +ICASEIDANALYZER = IDTokenizer() | LowercaseFilter() + #INDEX SCHEMA DEFINITION SCHEMA = Schema( fileid=ID(unique=True), owner=TEXT(), - repository=TEXT(stored=True), + repository=TEXT(stored=True, analyzer=ICASEIDANALYZER), path=TEXT(stored=True), content=FieldType(format=Characters(), analyzer=ANALYZER, scorable=True, stored=True), diff --git a/kallithea/tests/functional/test_search_indexing.py b/kallithea/tests/functional/test_search_indexing.py --- a/kallithea/tests/functional/test_search_indexing.py +++ b/kallithea/tests/functional/test_search_indexing.py @@ -113,9 +113,9 @@ class TestSearchControllerIndexing(TestC (u'group/*'), ]) @parametrize('searchtype,query,hit', [ - #('content', 'this_should_be_unique_content', 1), + ('content', 'this_should_be_unique_content', 1), ('commit', 'this_should_be_unique_commit_log', 1), - #('path', 'this_should_be_unique_filename.txt', 1), + ('path', 'this_should_be_unique_filename.txt', 1), ]) def test_repository_tokenization(self, reponame, searchtype, query, hit): self.log_user()