kallithea Changeset - 2ff913970025

Changeset - 2ff913970025

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

FUJIWARA Katsunori - 9 years ago 2017-01-22 18:17:38
foozy@lares.dti.ne.jp

journal: make "username:" filtering condition work as expected

As described in previous revision, using TEXT in JOURNAL_SCHEMA causes
unexpected results for "username:", too.

- tokenization by non-alphanumeric characters
- removing "stop words"

To make "username:" filtering condition work as expected, this
revision uses ID instead of TEST for "username" of JOURNAL_COLUMN.

2 files changed with 3 insertions and 3 deletions:

kallithea/lib/indexers/__init__.py

kallithea/tests/functional/test_admin.py

0 comments (0 inline, 0 general)

kallithea/lib/indexers/__init__.py

➞

Show inline comments

@@ @@ -34,97 +34,97 @@ from os.path import dirname @@
 sys.path.append(dirname(dirname(dirname(os.path.realpath(__file__)))))
 from whoosh.analysis import RegexTokenizer, LowercaseFilter
 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
 from whoosh.formats import Characters
 from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter
 from kallithea.lib.utils2 import LazyProperty
 log = logging.getLogger(__name__)
 # CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
     fileid=ID(unique=True),
     owner=TEXT(),
     repository=TEXT(stored=True),
     path=TEXT(stored=True),
     content=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
     modtime=STORED(),
     extension=TEXT(stored=True)
+)
 IDX_NAME = 'HG_INDEX'
 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 FRAGMENTER = ContextFragmenter(200)
 CHGSETS_SCHEMA = Schema(
     raw_id=ID(unique=True, stored=True),
     date=NUMERIC(stored=True),
     last=BOOLEAN(),
     owner=TEXT(),
     repository=ID(unique=True, stored=True),
     author=TEXT(stored=True),
     message=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
     parents=TEXT(),
     added=TEXT(),
     removed=TEXT(),
     changed=TEXT(),
+)
 CHGSET_IDX_NAME = 'CHGSET_INDEX'
 # used only to generate queries in journal
 JOURNAL_SCHEMA = Schema(
-    username=TEXT(),
+    username=ID(),
     date=DATETIME(),
     action=TEXT(),
     repository=ID(),
     ip=TEXT(),
+)
 class WhooshResultWrapper(object):
     def __init__(self, search_type, searcher, matcher, highlight_items,
                  repo_location):
         self.search_type = search_type
         self.searcher = searcher
         self.matcher = matcher
         self.highlight_items = highlight_items
         self.fragment_size = 200
         self.repo_location = repo_location
     @LazyProperty
     def doc_ids(self):
         docs_id = []
         while self.matcher.is_active():
             docnum = self.matcher.id()
             chunks = [offsets for offsets in self.get_chunks()]
             docs_id.append([docnum, chunks])
             self.matcher.next()
         return docs_id
     def __str__(self):
         return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
     def __repr__(self):
         return self.__str__()
     def __len__(self):
         return len(self.doc_ids)
     def __iter__(self):
         """
         Allows Iteration over results,and lazy generate content
         *Requires* implementation of ``__getitem__`` method.
         """
         for docid in self.doc_ids:
             yield self.get_full_content(docid)
     def __getitem__(self, key):
         """
         Slicing of resultWrapper

kallithea/tests/functional/test_admin.py

➞

Show inline comments

@@ @@ -126,70 +126,70 @@ class TestAdminController(TestController @@
     def test_filter_journal_filter_prefix_on_user_or_other_user(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:demo OR username:volcan'))
         response.mustcontain(' 1095 Entries')  # 1087 + 8
     def test_filter_journal_filter_wildcard_on_action(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='action:*pull_request*'))
         response.mustcontain(' 187 Entries')
     def test_filter_journal_filter_on_date(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='date:20121010'))
         response.mustcontain(' 47 Entries')
     def test_filter_journal_filter_on_date_2(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='date:20121020'))
         response.mustcontain(' 17 Entries')
     @parametrize('filter,hit', [
         #### "repository:" filtering
         # "/" is used for grouping
         ('repository:group/test', 4),
         # "-" is often used for "-fork"
         ('repository:fork-test1', 5),
         # using "stop words"
         ('repository:this', 1),
         ('repository:this/is-it', 1),
         ## additional tests to quickly find out regression in the future
         ## (and check case-insensitive search, too)
         # non-ascii character "." and "-"
         ('repository:TESTIES1.2.3', 4),
         ('repository:test_git_repo', 2),
         # combination with wildcard "*"
         ('repository:GROUP/*', 182),
         ('repository:*/test', 7),
         ('repository:fork-*', 273),
         ('repository:*-Test1', 5),
         #### "username:" filtering
         # "-" is valid character
-        ('username:peso-xxx', 0),
+        ('username:peso-xxx', 4),
         # using "stop words"
-        ('username:this-is-it', 2036),
+        ('username:this-is-it', 2),
         ## additional tests to quickly find out regression in the future
         ## (and check case-insensitive search, too)
         # non-ascii character "." and "-"
         ('username:ADMIN_xanroot', 6),
         ('username:robert.Zaremba', 3),
         # combination with wildcard "*"
         ('username:THIS-*', 2),
         ('username:*-IT', 2),
     ])
     def test_filter_journal_filter_tokenization(self, filter, hit):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter=filter))
         if hit != 1:
             response.mustcontain(' %s Entries' % hit)
         else:
             response.mustcontain(' 1 Entry')

0 comments (0 inline, 0 general)