kallithea Changeset - 46d5811262b6

Changeset - 46d5811262b6

Parent rev.

Child rev.

[Not reviewed]

beta

0 1 0

Mads Kiilerich - 13 years ago 2013-04-10 02:58:36
madski@unity3d.com

diff parser: show multiple messages ... and fix stats for non-git chunks

1 file changed with 18 insertions and 15 deletions:

rhodecode/lib/diffs.py

0 comments (0 inline, 0 general)

rhodecode/lib/diffs.py

➞

Show inline comments

@@ @@ -169,439 +169,442 @@ class DiffProcessor(object): @@
         (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     _hg_header_re = re.compile(r"""
         #^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
         (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
         (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
            ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
         (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
         (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
         (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
             \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
         (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     #used for inline highlighter word split
     _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
     def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
         """
         :param diff:   a text in diff format
         :param vcs: type of version controll hg or git
         :param format: format of diff passed, `udiff` or `gitdiff`
         :param diff_limit: define the size of diff that is considered "big"
             based on that parameter cut off will be triggered, set to None
             to show full diff
         """
         if not isinstance(diff, basestring):
             raise Exception('Diff must be a basestring got %s instead' % type(diff))
         self._diff = diff
         self._format = format
         self.adds = 0
         self.removes = 0
         # calculate diff size
         self.diff_size = len(diff)
         self.diff_limit = diff_limit
         self.cur_diff_size = 0
         self.parsed = False
         self.parsed_diff = []
         self.vcs = vcs
         if format == 'gitdiff':
             self.differ = self._highlight_line_difflib
             self._parser = self._parse_gitdiff
         else:
             self.differ = self._highlight_line_udiff
             self._parser = self._parse_udiff
     def _copy_iterator(self):
         """
         make a fresh copy of generator, we should not iterate thru
         an original as it's needed for repeating operations on
         this instance of DiffProcessor
         """
         self.__udiff, iterator_copy = tee(self.__udiff)
         return iterator_copy
     def _escaper(self, string):
         """
         Escaper for diff escapes special chars and checks the diff limit
         :param string:
         :type string:
         """
         self.cur_diff_size += len(string)
         # escaper get's iterated on each .next() call and it checks if each
         # parsed line doesn't exceed the diff limit
         if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
             raise DiffLimitExceeded('Diff Limit Exceeded')
         return safe_unicode(string).replace('&', '&amp;')\
                 .replace('<', '&lt;')\
                 .replace('>', '&gt;')
     def _line_counter(self, l):
         """
         Checks each line and bumps total adds/removes for this diff
         :param l:
         """
         if l.startswith('+') and not l.startswith('+++'):
             self.adds += 1
         elif l.startswith('-') and not l.startswith('---'):
             self.removes += 1
         return safe_unicode(l)
     def _highlight_line_difflib(self, line, next_):
         """
         Highlight inline changes in both lines.
         """
         if line['action'] == 'del':
             old, new = line, next_
         else:
             old, new = next_, line
         oldwords = self._token_re.split(old['line'])
         newwords = self._token_re.split(new['line'])
         sequence = difflib.SequenceMatcher(None, oldwords, newwords)
         oldfragments, newfragments = [], []
         for tag, i1, i2, j1, j2 in sequence.get_opcodes():
             oldfrag = ''.join(oldwords[i1:i2])
             newfrag = ''.join(newwords[j1:j2])
             if tag != 'equal':
                 if oldfrag:
                     oldfrag = '<del>%s</del>' % oldfrag
                 if newfrag:
                     newfrag = '<ins>%s</ins>' % newfrag
             oldfragments.append(oldfrag)
             newfragments.append(newfrag)
         old['line'] = "".join(oldfragments)
         new['line'] = "".join(newfragments)
     def _highlight_line_udiff(self, line, next_):
         """
         Highlight inline changes in both lines.
         """
         start = 0
         limit = min(len(line['line']), len(next_['line']))
         while start < limit and line['line'][start] == next_['line'][start]:
             start += 1
         end = -1
         limit -= start
         while -end <= limit and line['line'][end] == next_['line'][end]:
             end -= 1
         end += 1
         if start or end:
             def do(l):
                 last = end + len(l['line'])
                 if l['action'] == 'add':
                     tag = 'ins'
                 else:
                     tag = 'del'
                 l['line'] = '%s<%s>%s</%s>%s' % (
                     l['line'][:start],
                     tag,
                     l['line'][start:last],
                     tag,
                     l['line'][last:]
+                )
             do(line)
             do(next_)
     def _get_header(self, diff_chunk):
         """
         parses the diff header, and returns parts, and leftover diff
         parts consists of 14 elements::
             a_path, b_path, similarity_index, rename_from, rename_to,
             old_mode, new_mode, new_file_mode, deleted_file_mode,
             a_blob_id, b_blob_id, b_mode, a_file, b_file
         :param diff_chunk:
         :type diff_chunk:
         """
         if self.vcs == 'git':
             match = self._git_header_re.match(diff_chunk)
             diff = diff_chunk[match.end():]
             return match.groupdict(), imap(self._escaper, diff.splitlines(1))
         elif self.vcs == 'hg':
             match = self._hg_header_re.match(diff_chunk)
             diff = diff_chunk[match.end():]
             return match.groupdict(), imap(self._escaper, diff.splitlines(1))
         else:
             raise Exception('VCS type %s is not supported' % self.vcs)
     def _clean_line(self, line, command):
         if command in ['+', '-', ' ']:
             #only modify the line if it's actually a diff thing
             line = line[1:]
         return line
     def _parse_gitdiff(self, inline_diff=True):
         _files = []
         diff_container = lambda arg: arg
         ##split the diff in chunks of separate --git a/file b/file chunks
         for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
             head, diff = self._get_header(raw_diff)
             op = None
             stats = None
-            msg = None
+            msgs = []
             if not head['a_file'] and head['b_file']:
                 op = 'A'
                 stats = ['b', NEW_FILENODE]
                 msgs.append('new file')
             elif head['a_file'] and head['b_file']:
                 op = 'M'
                 stats = ['b', MOD_FILENODE]
             elif head['a_file'] and not head['b_file']:
                 op = 'D'
                 stats = ['b', DEL_FILENODE]
                 msgs.append('deleted file')
             else:
                 if head['deleted_file_mode']:
                     op = 'D'
                     stats = ['b', DEL_FILENODE]
-                    msg = 'deleted file'
+                    msgs.append('deleted file')
                 elif head['new_file_mode']:
                     op = 'A'
                     stats = ['b', NEW_FILENODE]
-                    msg = 'new file %s' % head['new_file_mode']
+                    msgs.append('new file %s' % head['new_file_mode'])
                 else:
                     if head['new_mode'] and head['old_mode']:
                         op = 'M'
                         stats = ['b', CHMOD_FILENODE]
                         op = 'M'
                         msg = ('modified file chmod %s => %s'
                         msgs.append('modified file chmod %s => %s'
                                       % (head['old_mode'], head['new_mode']))
-                    elif (head['rename_from'] and head['rename_to']
+                    if (head['rename_from'] and head['rename_to']
                           and head['rename_from'] != head['rename_to']):
                         stats = ['b', RENAMED_FILENODE]
                         op = 'M'
                         msg = ('file renamed from %s to %s'
                         stats = ['b', RENAMED_FILENODE] # might overwrite CHMOD_FILENODE
                         msgs.append('file renamed from %s to %s'
                                       % (head['rename_from'], head['rename_to']))
                     else:
                     if op is None:
                         op = 'M'
                         stats = ['b', MOD_FILENODE]
                         op = 'M'
                         msg = 'modified file'
             if head['a_file'] or head['b_file']: # a real diff
                 try:
                     chunks, stats = self._parse_lines(diff)
                 except DiffLimitExceeded:
                     diff_container = lambda _diff: LimitedDiffContainer(
                                                 self.diff_limit,
                                                 self.cur_diff_size,
                                                 _diff)
                     break
             else: # GIT binary patch (or empty diff)
                 chunks = []
                 if not msg: # don't overwrite more important message
                     msg = 'binary diff not shown'
                 msgs.append('binary diff not shown') # or no diff because it was a rename or chmod or add/remove of empty file
-            if msg:
+            if msgs:
                 chunks.insert(0, [{
                     'old_lineno': '',
                     'new_lineno': '',
                     'action':     'binary',
                     'line':       msg,
                 }])
+                    } for msg in msgs])
             _files.append({
                 'filename':         head['b_path'],
                 'old_revision':     head['a_blob_id'],
                 'new_revision':     head['b_blob_id'],
                 'chunks':           chunks,
                 'operation':        op,
                 'stats':            stats,
             })
         sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
         if not inline_diff:
             return diff_container(sorted(_files, key=sorter))
         # highlight inline changes
         for diff_data in _files:
             for chunk in diff_data['chunks']:
                 lineiter = iter(chunk)
                 try:
                     while 1:
                         line = lineiter.next()
                         if line['action'] not in ['unmod', 'context']:
                             nextline = lineiter.next()
                             if nextline['action'] in ['unmod', 'context'] or \
                                nextline['action'] == line['action']:
                                 continue
                             self.differ(line, nextline)
                 except StopIteration:
                     pass
         return diff_container(sorted(_files, key=sorter))
     def _parse_udiff(self, inline_diff=True):
         raise NotImplementedError()
     def _parse_lines(self, diff):
         """
         Parse the diff an return data for the template.
         """
         lineiter = iter(diff)
         stats = [0, 0]
         try:
             chunks = []
             line = lineiter.next()
             while line:
                 lines = []
                 chunks.append(lines)
                 match = self._chunk_re.match(line)
                 if not match:
                     break
                 gr = match.groups()
                 (old_line, old_end,
                  new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                 old_line -= 1
                 new_line -= 1
                 context = len(gr) == 5
                 old_end += old_line
                 new_end += new_line
                 if context:
                     # skip context only if it's first line
                     if int(gr[0]) > 1:
                         lines.append({
                             'old_lineno': '...',
                             'new_lineno': '...',
                             'action':     'context',
                             'line':       line,
                         })
                 line = lineiter.next()
                 while old_line < old_end or new_line < new_end:
                     command = ' '
                     if line:
                         command = line[0]
                     affects_old = affects_new = False
                     # ignore those if we don't expect them
                     if command in '#@':
                         continue
                     elif command == '+':
                         affects_new = True
                         action = 'add'
                         stats[0] += 1
                     elif command == '-':
                         affects_old = True
                         action = 'del'
                         stats[1] += 1
                     else:
                         affects_old = affects_new = True
                         action = 'unmod'
                     if not self._newline_marker.match(line):
                         old_line += affects_old
                         new_line += affects_new
                         lines.append({
                             'old_lineno':   affects_old and old_line or '',
                             'new_lineno':   affects_new and new_line or '',
                             'action':       action,
                             'line':         self._clean_line(line, command)
                         })
                     line = lineiter.next()
                     if self._newline_marker.match(line):
                         # we need to append to lines, since this is not
                         # counted in the line specs of diff
                         lines.append({
                             'old_lineno':   '...',
                             'new_lineno':   '...',
                             'action':       'context',
                             'line':         self._clean_line(line, command)
                         })
         except StopIteration:
             pass
         return chunks, stats
     def _safe_id(self, idstring):
         """Make a string safe for including in an id attribute.
         The HTML spec says that id attributes 'must begin with
         a letter ([A-Za-z]) and may be followed by any number
         of letters, digits ([0-9]), hyphens ("-"), underscores
         ("_"), colons (":"), and periods (".")'. These regexps
         are slightly over-zealous, in that they remove colons
         and periods unnecessarily.
         Whitespace is transformed into underscores, and then
         anything which is not a hyphen or a character that
         matches \w (alphanumerics and underscore) is removed.
         """
         # Transform all whitespace to underscore
         idstring = re.sub(r'\s', "_", '%s' % idstring)
         # Remove everything that is not a hyphen or a member of \w
         idstring = re.sub(r'(?!-)\W', "", idstring).lower()
         return idstring
     def prepare(self, inline_diff=True):
         """
         Prepare the passed udiff for HTML rendering. It'l return a list
         of dicts with diff information
         """
         parsed = self._parser(inline_diff=inline_diff)
         self.parsed = True
         self.parsed_diff = parsed
         return parsed
     def as_raw(self, diff_lines=None):
         """
         Returns raw string diff
         """
         return self._diff
         #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
     def as_html(self, table_class='code-difftable', line_class='line',
                 old_lineno_class='lineno old', new_lineno_class='lineno new',
                 code_class='code', enable_comments=False, parsed_lines=None):
         """
         Return given diff as html table with customized css classes
         """
         def _link_to_if(condition, label, url):
             """
             Generates a link if condition is meet or just the label if not.
             """
             if condition:
                 return '''<a href="%(url)s">%(label)s</a>''' % {
                     'url': url,
                     'label': label
+                }
             else:
                 return label
         if not self.parsed:
             self.prepare()
         diff_lines = self.parsed_diff
         if parsed_lines:
             diff_lines = parsed_lines
         _html_empty = True
         _html = []

0 comments (0 inline, 0 general)