kallithea Changeset - 84d2df525238

Changeset - 84d2df525238

Parent rev.

Child rev.

[Not reviewed]

default

0 1 0

Mads Kiilerich - 5 years ago 2020-10-28 16:08:04
mads@kiilerich.com

Grafted from: 5afdce6eb7d4

diff: polish _escaper function - clarify purpose and pass it strings

1 file changed with 4 insertions and 4 deletions:

kallithea/lib/diffs.py

0 comments (0 inline, 0 general)

kallithea/lib/diffs.py

➞

Show inline comments

@@ @@ -403,185 +403,185 @@ class DiffProcessor(object): @@
             })
         if not inline_diff:
             return _files
         # highlight inline changes when one del is followed by one add
         for diff_data in _files:
             for chunk in diff_data['chunks']:
                 lineiter = iter(chunk)
                 try:
                     peekline = next(lineiter)
                     while True:
                         # find a first del line
                         while peekline['action'] != 'del':
                             peekline = next(lineiter)
                         delline = peekline
                         peekline = next(lineiter)
                         # if not followed by add, eat all following del lines
                         if peekline['action'] != 'add':
                             while peekline['action'] == 'del':
                                 peekline = next(lineiter)
                             continue
                         # found an add - make sure it is the only one
                         addline = peekline
                         try:
                             peekline = next(lineiter)
                         except StopIteration:
                             # add was last line - ok
                             _highlight_inline_diff(delline, addline)
                             raise
                         if peekline['action'] != 'add':
                             # there was only one add line - ok
                             _highlight_inline_diff(delline, addline)
                 except StopIteration:
                     pass
         return _files
     def stat(self):
         """
         Returns tuple of added, and removed lines for this instance
         """
         return self.adds, self.removes
 _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)|(\t\n|\t$)')
-def _escaper(string):
+def _escaper(diff_line):
     """
     Do HTML escaping/markup
+    Do HTML escaping/markup of a single diff line (including first +/- column)
     """
     def substitute(m):
         groups = m.groups()
         if groups[0]:
             return '&amp;'
         if groups[1]:
             return '&lt;'
         if groups[2]:
             return '&gt;'
         if groups[3]:
             return '<u>\t</u>'  # Note: trailing tabs will get a longer match later
         if groups[4]:
             return '<u class="cr"></u>'
         if groups[5]:
             return ' <i></i>'
         if groups[6]:
             return '<u>\t</u><i></i>'
         assert False
-    return _escape_re.sub(substitute, safe_str(string))
+    return _escape_re.sub(substitute, diff_line)
 _git_header_re = re.compile(br"""
     ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
        ^rename[ ]from[ ](?P<rename_from>.+)\n
        ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _hg_header_re = re.compile(br"""
     ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
     (?:^rename[ ]from[ ](?P<rename_from>.+)\n
        ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
     (?:^copy[ ]from[ ](?P<copy_from>.+)\n
        ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 def _get_header(vcs, diff_chunk):
     """
     Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
         a_path, b_path, similarity_index, rename_from, rename_to,
         old_mode, new_mode, new_file_mode, deleted_file_mode,
         a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
     """
     match = None
     if vcs == 'git':
         match = _git_header_re.match(diff_chunk)
     elif vcs == 'hg':
         match = _hg_header_re.match(diff_chunk)
     if match is None:
         raise Exception('diff not recognized as valid %s diff' % vcs)
     meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
     rest = diff_chunk[match.end():]
     if rest and _header_next_check.match(rest):
         raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
     diff_lines = (_escaper(m.group(0)) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
+    diff_lines = (_escaper(safe_str(m.group(0))) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
     return meta_info, diff_lines
 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 _newline_marker = re.compile(r'^\\ No newline at end of file')
 def _parse_lines(diff_lines):
     """
     Given an iterator of diff body lines, parse them and return a dict per
     line and added/removed totals.
     """
     added = deleted = 0
     old_line = old_end = new_line = new_end = None
     chunks = []
     try:
         line = next(diff_lines)
         while True:
             lines = []
             chunks.append(lines)
             match = _chunk_re.match(line)
             if not match:
                 raise Exception('error parsing diff @@ line %r' % line)
             gr = match.groups()
             (old_line, old_end,
              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
             old_line -= 1
             new_line -= 1
             context = len(gr) == 5
             old_end += old_line
             new_end += new_line
             if context:
                 # skip context only if it's first line
                 if int(gr[0]) > 1:
                     lines.append({
                         'old_lineno': '',
                         'new_lineno': '',
                         'action':     'context',
                         'line':       line,
                     })

0 comments (0 inline, 0 general)