Files @ 168cc92c1b53
Branch filter:

Location: kallithea/scripts/logformat.py

FUJIWARA Katsunori
search: prevent pathname related conditions from removing "stop words"

Before this revision, pathname related conditions below cause
unintentional ignorance of "stop words".

- path:,extension: (for "File contents" or "File names")
- added:, removed:, changed: (for "Commit messages")

Therefore, pathname related conditions with "this", "a", "you", and so
on are completely ignored, even if they are valid pathname components.

To prevent pathname related conditions from removing "stop words",
this revision explicitly specifies "analyzer" for pathname related
fields of SCHEMA and CHGSETS_SCHEMA.

Difference between PATHANALYZER and default analyzer of TEXT is
whether "stop words" are preserved or not. Tokenization is still
applied on pathnames.

This revision requires full re-building index tables, because indexing
schemas are changed.
#!/usr/bin/env python2

import re
import sys

if len(sys.argv) < 2:
    print 'Cleanup of superfluous % formatting of log statements.'
    print 'Usage:'
    print '''  hg revert `hg loc '*.py'|grep -v logformat.py` && scripts/logformat.py `hg loc '*.py'` && hg diff'''
    raise SystemExit(1)


logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''
res = [
    # handle % () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # handle % without () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on next line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on same line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+  () (   [\n ]+)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove trailing , and space
    (re.compile(logre % r'''("[^"]*"|'[^']*') ,       () (   [\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    ]

for f in sys.argv[1:]:
    s = file(f).read()
    for r, t in res:
        s = r.sub(t, s)
    file(f, 'w').write(s)