diff --git a/kallithea/lib/indexers/__init__.py b/kallithea/lib/indexers/__init__.py --- a/kallithea/lib/indexers/__init__.py +++ b/kallithea/lib/indexers/__init__.py @@ -44,6 +44,14 @@ log = logging.getLogger(__name__) # CUSTOM ANALYZER wordsplit + lowercase filter ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() +# CUSTOM ANALYZER wordsplit + lowercase filter, for emailaddr-like text +# +# This is useful to: +# - avoid removing "stop words" from text +# - search case-insensitively +# +EMAILADDRANALYZER = RegexTokenizer() | LowercaseFilter() + # CUSTOM ANALYZER raw-string + lowercase filter # # This is useful to: @@ -72,7 +80,7 @@ PATHANALYZER = RegexTokenizer() | Lowerc #INDEX SCHEMA DEFINITION SCHEMA = Schema( fileid=ID(unique=True), - owner=TEXT(), + owner=TEXT(analyzer=EMAILADDRANALYZER), # this field preserves case of repository name for exact matching repository_rawname=TEXT(analyzer=IDANALYZER), repository=TEXT(stored=True, analyzer=ICASEIDANALYZER), @@ -91,12 +99,12 @@ CHGSETS_SCHEMA = Schema( raw_id=ID(unique=True, stored=True), date=NUMERIC(stored=True), last=BOOLEAN(), - owner=TEXT(), + owner=TEXT(analyzer=EMAILADDRANALYZER), # this field preserves case of repository name for exact matching # and unique-ness in index table repository_rawname=ID(unique=True), repository=ID(stored=True, analyzer=ICASEIDANALYZER), - author=TEXT(stored=True), + author=TEXT(stored=True, analyzer=EMAILADDRANALYZER), message=FieldType(format=Characters(), analyzer=ANALYZER, scorable=True, stored=True), parents=TEXT(), diff --git a/kallithea/tests/functional/test_search_indexing.py b/kallithea/tests/functional/test_search_indexing.py --- a/kallithea/tests/functional/test_search_indexing.py +++ b/kallithea/tests/functional/test_search_indexing.py @@ -188,8 +188,8 @@ class TestSearchControllerIndexing(TestC ('commit', 'owner:"this-is-it"', 0), # matching against only 1 revision - ('commit', 'author:"this is it"', 0), - ('commit', 'author:"this-is-it"', 0), + ('commit', 'author:"this is it"', 1), + ('commit', 'author:"this-is-it"', 1), ]) def test_mailaddr_stopword(self, searchtype, query, hit): response = self.app.get(url(controller='search', action='index'),