# HG changeset patch # User Mads Kiilerich # Date 2017-10-03 00:14:40 # Node ID e708b26819cd5ffb7c7a28a986a47dacac810cc4 # Parent 22074446ac5b9f8beb737e6c481c7e52a3b31b31 diffs: move _escaper to a pure function It used to count diff size - now it is a clean function. diff --git a/kallithea/lib/diffs.py b/kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py +++ b/kallithea/lib/diffs.py @@ -305,8 +305,6 @@ class DiffProcessor(object): (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) - _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)') - def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True): """ :param diff: a text in diff format @@ -326,28 +324,6 @@ class DiffProcessor(object): self.vcs = vcs self.parsed = self._parse_gitdiff(inline_diff=inline_diff) - def _escaper(self, string): - """ - Do HTML escaping/markup - """ - def substitute(m): - groups = m.groups() - if groups[0]: - return '&' - if groups[1]: - return '<' - if groups[2]: - return '>' - if groups[3]: - return '\t' - if groups[4]: - return '' - if groups[5]: - return ' ' - assert False - - return self._escape_re.sub(substitute, safe_unicode(string)) - def _get_header(self, diff_chunk): """ Parses a Git diff for a single file (header and chunks) and returns a tuple with: @@ -371,7 +347,7 @@ class DiffProcessor(object): rest = diff_chunk[match.end():] if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000])) - diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do + diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do return meta_info, diff_lines def _parse_gitdiff(self, inline_diff): @@ -624,6 +600,33 @@ class DiffProcessor(object): return self.adds, self.removes +_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)') + + +def _escaper(string): + """ + Do HTML escaping/markup + """ + + def substitute(m): + groups = m.groups() + if groups[0]: + return '&' + if groups[1]: + return '<' + if groups[2]: + return '>' + if groups[3]: + return '\t' + if groups[4]: + return '' + if groups[5]: + return ' ' + assert False + + return _escape_re.sub(substitute, safe_unicode(string)) + + # Used for inline highlighter word split, must match the substitutions in _escaper _token_re = re.compile(r'()(&|<|>|\t|| |\W+?)')