Files @ e965ff6f8cb3
Branch filter:

Location: kallithea/scripts/logformat.py

Mads Kiilerich
setup: avoid bleach 3.1.4 for now - it seems to deliberately cause regressions

See https://github.com/mozilla/bleach/blob/master/CHANGES and
https://github.com/mozilla/bleach/commit/d6018f2539d271963c3e7f54f36ef11900363c69
... which adds xfails for use cases similar to how we use bleach.

It would completely remove style attributes instead of dropping bad parts of
them, as shown by the markup_renderer.py doctest it made fail:

>>> MarkupRenderer.render('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''&apos;, '.md')
Expected:
'<p><img id="a" src="http://example.com/test.jpg"; style="color: red;"></p>'
Got:
'<p><img id="a" src="http://example.com/test.jpg"; style=""></p>'

Until a better solution is found, stick to 3.1.3 and accept the potential
ReDoS.
#!/usr/bin/env python3

import re
import sys


logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''


res = [
    # handle % () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # handle % without () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on next line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on same line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+  () (   [\n ]+)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove trailing , and space
    (re.compile(logre % r'''("[^"]*"|'[^']*') ,       () (   [\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    ]


def rewrite(f):
    s = open(f).read()
    for r, t in res:
        s = r.sub(t, s)
    open(f, 'w').write(s)


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Cleanup of superfluous % formatting of log statements.')
        print('Usage:')
        print('''  hg revert `hg loc '*.py'|grep -v logformat.py` && scripts/logformat.py `hg loc '*.py'` && hg diff''')
        raise SystemExit(1)

    for f in sys.argv[1:]:
        rewrite(f)