Files @ c7728c5736fd
Branch filter:

Location: kallithea/scripts/logformat.py

Thomas De Schampheleire
templates: narrow down scope of webhelpers.html.literal for HTML injection

When using webhelpers.html.literal to inject some explicit HTML code with
some variable data, there are two approaches:
h.literal('some <html> code with %s data' % foobar)
or
h.literal('some <html> code with %s data') % foobar

In the first case, the literal also applies to the contents of variable
'foobar' which may be influenceable by users and thus potentially malicious.
In the second case, this term will be escaped by webhelpers.

See also the documentation:
https://docs.pylonsproject.org/projects/webhelpers/en/latest/modules/html/builder.html#webhelpers.html.builder.literal
"Also, if you add another string to this string, the other string will
be quoted and you will get back another literal object. Also
literal(...) % obj will quote any value(s) from obj."

In files_browser.html, the correction of this scope of literal() also means
that explicit escaping of node.name can be removed. The escaping is now done
automatically by webhelpers as mentioned above.
#!/usr/bin/env python2

import re
import sys

if len(sys.argv) < 2:
    print 'Cleanup of superfluous % formatting of log statements.'
    print 'Usage:'
    print '''  hg revert `hg loc '*.py'|grep -v logformat.py` && scripts/logformat.py `hg loc '*.py'` && hg diff'''
    raise SystemExit(1)


logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''
res = [
    # handle % () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # handle % without () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on next line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on same line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+  () (   [\n ]+)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove trailing , and space
    (re.compile(logre % r'''("[^"]*"|'[^']*') ,       () (   [\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE|re.VERBOSE), r'\1\2,\3\4\5\6'),
    ]

for f in sys.argv[1:]:
    s = open(f).read()
    for r, t in res:
        s = r.sub(t, s)
    open(f, 'w').write(s)