kallithea Changeset - 0c19e4661b71

Changeset - 0c19e4661b71

Parent rev.

Child rev.

[Not reviewed]

default

0 1 0

Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com

diffs: inline / remove _clean_line

1 file changed with 2 insertions and 8 deletions:

kallithea/lib/diffs.py

0 comments (0 inline, 0 general)

kallithea/lib/diffs.py

➞

Show inline comments

@@ @@ -121,587 +121,581 @@ def get_gitdiff(filenode_old, filenode_n @@
         if not isinstance(filenode, FileNode):
             raise VCSError("Given object should be FileNode object, not %s"
                 % filenode.__class__)
     repo = filenode_new.changeset.repository
     old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
     new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
     vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                 ignore_whitespace, context)
     return vcs_gitdiff
 NEW_FILENODE = 1
 DEL_FILENODE = 2
 MOD_FILENODE = 3
 RENAMED_FILENODE = 4
 COPIED_FILENODE = 5
 CHMOD_FILENODE = 6
 BIN_FILENODE = 7
 class DiffLimitExceeded(Exception):
     pass
 class LimitedDiffContainer(object):
     def __init__(self, diff_limit, cur_diff_size, diff):
         self.diff = diff
         self.diff_limit = diff_limit
         self.cur_diff_size = cur_diff_size
     def __iter__(self):
         for l in self.diff:
             yield l
 class DiffProcessor(object):
     """
     Give it a unified or git diff and it returns a list of the files that were
     mentioned in the diff together with a dict of meta information that
     can be used to render it in a HTML template.
     """
     _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
     _newline_marker = re.compile(r'^\\ No newline at end of file')
     _git_header_re = re.compile(r"""
         # has already been split on this:
         # ^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
         (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
            ^rename[ ]from[ ](?P<rename_from>.+)\n
            ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
         (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
         (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
         (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
             \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
         (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
         (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     _hg_header_re = re.compile(r"""
         # has already been split on this:
         # ^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
         (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
         (?:^rename[ ]from[ ](?P<rename_from>.+)\n
            ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
         (?:^copy[ ]from[ ](?P<copy_from>.+)\n
            ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
         (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
         (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
         (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
             \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
         (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
         (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     # Used for inline highlighter word split, must match the substitutions in _escaper
     _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
     _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
     def __init__(self, diff, vcs='hg', diff_limit=None):
         """
         :param diff:   a text in diff format
         :param vcs: type of version control hg or git
         :param diff_limit: define the size of diff that is considered "big"
             based on that parameter cut off will be triggered, set to None
             to show full diff
         """
         if not isinstance(diff, basestring):
             raise Exception('Diff must be a basestring got %s instead' % type(diff))
         self._diff = diff
         self.adds = 0
         self.removes = 0
         # calculate diff size
         self.diff_size = len(diff)
         self.diff_limit = diff_limit
         self.cur_diff_size = 0
         self.parsed = False
         self.parsed_diff = []
         self.vcs = vcs
     def _escaper(self, string):
         """
         Do HTML escaping/markup and check the diff limit
         """
         self.cur_diff_size += len(string)
         # escaper gets iterated on each .next() call and it checks if each
         # parsed line doesn't exceed the diff limit
         if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
             raise DiffLimitExceeded('Diff Limit Exceeded')
         def substitute(m):
             groups = m.groups()
             if groups[0]:
                 return '&amp;'
             if groups[1]:
                 return '&lt;'
             if groups[2]:
                 return '&gt;'
             if groups[3]:
                 return '<u>\t</u>'
             if groups[4]:
                 return '<u class="cr"></u>'
             if groups[5]:
                 return ' <i></i>'
             assert False
         return self._escape_re.sub(substitute, safe_unicode(string))
     def _highlight_inline_diff(self, old, new):
         """
         Highlight simple add/remove in two lines given as info dicts. They are
         modified in place and given markup with <del>/<ins>.
         """
         assert old['action'] == 'del'
         assert new['action'] == 'add'
         oldwords = self._token_re.split(old['line'])
         newwords = self._token_re.split(new['line'])
         sequence = difflib.SequenceMatcher(None, oldwords, newwords)
         oldfragments, newfragments = [], []
         for tag, i1, i2, j1, j2 in sequence.get_opcodes():
             oldfrag = ''.join(oldwords[i1:i2])
             newfrag = ''.join(newwords[j1:j2])
             if tag != 'equal':
                 if oldfrag:
                     oldfrag = '<del>%s</del>' % oldfrag
                 if newfrag:
                     newfrag = '<ins>%s</ins>' % newfrag
             oldfragments.append(oldfrag)
             newfragments.append(newfrag)
         old['line'] = "".join(oldfragments)
         new['line'] = "".join(newfragments)
     def _get_header(self, diff_chunk):
         """
         Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
             a_path, b_path, similarity_index, rename_from, rename_to,
             old_mode, new_mode, new_file_mode, deleted_file_mode,
             a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
         """
         match = None
         if self.vcs == 'git':
             match = self._git_header_re.match(diff_chunk)
         elif self.vcs == 'hg':
             match = self._hg_header_re.match(diff_chunk)
         if match is None:
             raise Exception('diff not recognized as valid %s diff' % self.vcs)
         meta_info = match.groupdict()
         rest = diff_chunk[match.end():]
         if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
         difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
         return meta_info, difflines
     def _clean_line(self, line, command):
         """Given a diff line, strip the leading character if it is a plus/minus/context line."""
         if command in ['+', '-', ' ']:
             line = line[1:]
         return line
     def _parse_gitdiff(self, inline_diff=True):
         """Parse self._diff and return a list of dicts with meta info and chunks for each file.
         If diff is truncated, wrap it in LimitedDiffContainer.
         Optionally, do an extra pass and to extra markup of one-liner changes.
         """
         _files = [] # list of dicts with meta info and chunks
         diff_container = lambda arg: arg
         # split the diff in chunks of separate --git a/file b/file chunks
         for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
             head, diff = self._get_header(raw_diff)
             op = None
             stats = {
                 'added': 0,
                 'deleted': 0,
                 'binary': False,
                 'ops': {},
+            }
             if head['deleted_file_mode']:
                 op = 'D'
                 stats['binary'] = True
                 stats['ops'][DEL_FILENODE] = 'deleted file'
             elif head['new_file_mode']:
                 op = 'A'
                 stats['binary'] = True
                 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
             else:  # modify operation, can be cp, rename, chmod
                 # CHMOD
                 if head['new_mode'] and head['old_mode']:
                     op = 'M'
                     stats['binary'] = True
                     stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
                                         % (head['old_mode'], head['new_mode']))
                 # RENAME
                 if (head['rename_from'] and head['rename_to']
                       and head['rename_from'] != head['rename_to']):
                     op = 'R'
                     stats['binary'] = True
                     stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
                                     % (head['rename_from'], head['rename_to']))
                 # COPY
                 if head.get('copy_from') and head.get('copy_to'):
                     op = 'M'
                     stats['binary'] = True
                     stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
                                         % (head['copy_from'], head['copy_to']))
                 # FALL BACK: detect missed old style add or remove
                 if op is None:
                     if not head['a_file'] and head['b_file']:
                         op = 'A'
                         stats['binary'] = True
                         stats['ops'][NEW_FILENODE] = 'new file'
                     elif head['a_file'] and not head['b_file']:
                         op = 'D'
                         stats['binary'] = True
                         stats['ops'][DEL_FILENODE] = 'deleted file'
                 # it's not ADD not DELETE
                 if op is None:
                     op = 'M'
                     stats['binary'] = True
                     stats['ops'][MOD_FILENODE] = 'modified file'
             # a real non-binary diff
             if head['a_file'] or head['b_file']:
                 try:
                     chunks, added, deleted = self._parse_lines(diff)
                     stats['binary'] = False
                     stats['added'] = added
                     stats['deleted'] = deleted
                     # explicit mark that it's a modified file
                     if op == 'M':
                         stats['ops'][MOD_FILENODE] = 'modified file'
                 except DiffLimitExceeded:
                     diff_container = lambda _diff: \
                         LimitedDiffContainer(self.diff_limit,
                                             self.cur_diff_size, _diff)
                     break
             else:  # Git binary patch (or empty diff)
                 # Git binary patch
                 if head['bin_patch']:
                     stats['ops'][BIN_FILENODE] = 'binary diff not shown'
                 chunks = []
             if op == 'D' and chunks:
                 # a way of seeing deleted content could perhaps be nice - but
                 # not with the current UI
                 chunks = []
             chunks.insert(0, [{
                 'old_lineno': '',
                 'new_lineno': '',
                 'action':     'context',
                 'line':       msg,
                 } for _op, msg in stats['ops'].iteritems()
                   if _op not in [MOD_FILENODE]])
             _files.append({
                 'old_filename':     head['a_path'],
                 'filename':         head['b_path'],
                 'old_revision':     head['a_blob_id'],
                 'new_revision':     head['b_blob_id'],
                 'chunks':           chunks,
                 'operation':        op,
                 'stats':            stats,
             })
         if not inline_diff:
             return diff_container(_files)
         # highlight inline changes when one del is followed by one add
         for diff_data in _files:
             for chunk in diff_data['chunks']:
                 lineiter = iter(chunk)
                 try:
                     peekline = lineiter.next()
                     while True:
                         # find a first del line
                         while peekline['action'] != 'del':
                             peekline = lineiter.next()
                         delline = peekline
                         peekline = lineiter.next()
                         # if not followed by add, eat all following del lines
                         if peekline['action'] != 'add':
                             while peekline['action'] == 'del':
                                 peekline = lineiter.next()
                             continue
                         # found an add - make sure it is the only one
                         addline = peekline
                         try:
                             peekline = lineiter.next()
                         except StopIteration:
                             # add was last line - ok
                             self._highlight_inline_diff(delline, addline)
                             raise
                         if peekline['action'] != 'add':
                             # there was only one add line - ok
                             self._highlight_inline_diff(delline, addline)
                 except StopIteration:
                     pass
         return diff_container(_files)
     def _parse_lines(self, diff):
         """
         Given an iterator of diff body lines, parse them and return a dict per
         line and added/removed totals.
         """
         added = deleted = 0
         old_line = old_end = new_line = new_end = None
         try:
             chunks = []
             line = diff.next()
             while True:
                 lines = []
                 chunks.append(lines)
                 match = self._chunk_re.match(line)
                 if not match:
                     raise Exception('error parsing diff @@ line %r' % line)
                 gr = match.groups()
                 (old_line, old_end,
                  new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                 old_line -= 1
                 new_line -= 1
                 context = len(gr) == 5
                 old_end += old_line
                 new_end += new_line
                 if context:
                     # skip context only if it's first line
                     if int(gr[0]) > 1:
                         lines.append({
                             'old_lineno': '...',
                             'new_lineno': '...',
                             'action':     'context',
                             'line':       line,
                         })
                 line = diff.next()
                 while old_line < old_end or new_line < new_end:
                     if not line:
                         raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
                     affects_old = affects_new = False
                     command = line[0]
                     if command == '+':
                         affects_new = True
                         action = 'add'
                         added += 1
                     elif command == '-':
                         affects_old = True
                         action = 'del'
                         deleted += 1
                     elif command == ' ':
                         affects_old = affects_new = True
                         action = 'unmod'
                     else:
                         raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
                     if not self._newline_marker.match(line):
                         old_line += affects_old
                         new_line += affects_new
                         lines.append({
                             'old_lineno':   affects_old and old_line or '',
                             'new_lineno':   affects_new and new_line or '',
                             'action':       action,
-                            'line':         self._clean_line(line, command)
+                            'line':         line[1:],
                         })
                     line = diff.next()
                     if self._newline_marker.match(line):
                         # we need to append to lines, since this is not
                         # counted in the line specs of diff
                         lines.append({
                             'old_lineno':   '...',
                             'new_lineno':   '...',
                             'action':       'context',
-                            'line':         self._clean_line(line, command)
                             'line':         line,
                         })
                         line = diff.next()
                 if old_line > old_end:
                     raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
                 if new_line > new_end:
                     raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
         except StopIteration:
             pass
         if old_line != old_end or new_line != new_end:
             raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
         return chunks, added, deleted
     def _safe_id(self, idstring):
         """Make a string safe for including in an id attribute.
         The HTML spec says that id attributes 'must begin with
         a letter ([A-Za-z]) and may be followed by any number
         of letters, digits ([0-9]), hyphens ("-"), underscores
         ("_"), colons (":"), and periods (".")'. These regexps
         are slightly over-zealous, in that they remove colons
         and periods unnecessarily.
         Whitespace is transformed into underscores, and then
         anything which is not a hyphen or a character that
         matches \w (alphanumerics and underscore) is removed.
         """
         # Transform all whitespace to underscore
         idstring = re.sub(r'\s', "_", idstring)
         # Remove everything that is not a hyphen or a member of \w
         idstring = re.sub(r'(?!-)\W', "", idstring).lower()
         return idstring
     def prepare(self, inline_diff=True):
         """
         Prepare the passed udiff for HTML rendering. It'll return a list
         of dicts with diff information
         """
         parsed = self._parse_gitdiff(inline_diff=inline_diff)
         self.parsed = True
         self.parsed_diff = parsed
         return parsed
     def as_html(self, table_class='code-difftable', line_class='line',
                 old_lineno_class='lineno old', new_lineno_class='lineno new',
                 no_lineno_class='lineno',
                 code_class='code', enable_comments=False, parsed_lines=None):
         """
         Return given diff as html table with customized css classes
         """
         def _link_to_if(condition, label, url):
             """
             Generates a link if condition is meet or just the label if not.
             """
             if condition:
                 return '''<a href="%(url)s">%(label)s</a>''' % {
                     'url': url,
                     'label': label
+                }
             else:
                 return label
         if not self.parsed:
             self.prepare()
         diff_lines = self.parsed_diff
         if parsed_lines:
             diff_lines = parsed_lines
         _html_empty = True
         _html = []
         _html.append('''<table class="%(table_class)s">\n''' % {
             'table_class': table_class
         })
         for diff in diff_lines:
             for line in diff['chunks']:
                 _html_empty = False
                 for change in line:
                     _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                         'lc': line_class,
                         'action': change['action']
                     })
                     anchor_old_id = ''
                     anchor_new_id = ''
                     anchor_old = "%(filename)s_o%(oldline_no)s" % {
                         'filename': self._safe_id(diff['filename']),
                         'oldline_no': change['old_lineno']
+                    }
                     anchor_new = "%(filename)s_n%(oldline_no)s" % {
                         'filename': self._safe_id(diff['filename']),
                         'oldline_no': change['new_lineno']
+                    }
                     cond_old = (change['old_lineno'] != '...' and
                                 change['old_lineno'])
                     cond_new = (change['new_lineno'] != '...' and
                                 change['new_lineno'])
                     no_lineno = (change['old_lineno'] == '...' and
                                  change['new_lineno'] == '...')
                     if cond_old:
                         anchor_old_id = 'id="%s"' % anchor_old
                     if cond_new:
                         anchor_new_id = 'id="%s"' % anchor_new
                     ###########################################################
                     # OLD LINE NUMBER
                     ###########################################################
                     _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
                         'a_id': anchor_old_id,
                         'olc': no_lineno_class if no_lineno else old_lineno_class,
                         'colspan': 'colspan="2"' if no_lineno else ''
                     })
                     _html.append('''%(link)s''' % {
                         'link': _link_to_if(not no_lineno, change['old_lineno'],
                                             '#%s' % anchor_old)
                     })
                     _html.append('''</td>\n''')
                     ###########################################################
                     # NEW LINE NUMBER
                     ###########################################################
                     if not no_lineno:
                         _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                             'a_id': anchor_new_id,
                             'nlc': new_lineno_class
                         })
                         _html.append('''%(link)s''' % {
                             'link': _link_to_if(True, change['new_lineno'],
                                                 '#%s' % anchor_new)
                         })
                         _html.append('''</td>\n''')
                     ###########################################################
                     # CODE
                     ###########################################################
                     comments = '' if enable_comments else 'no-comment'
                     _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
                         'cc': code_class,
                         'inc': comments
                     })
                     _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
                         'code': change['line']
                     })
                     _html.append('''\t</td>''')
                     _html.append('''\n</tr>\n''')
         _html.append('''</table>''')
         if _html_empty:
             return None
         return ''.join(_html)
     def stat(self):
         """
         Returns tuple of added, and removed lines for this instance
         """
         return self.adds, self.removes

0 comments (0 inline, 0 general)