Changeset - 9cdb81c7b855
[Not reviewed]
default
0 1 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: extract _parse_lines as a pure function
1 file changed with 102 insertions and 100 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -259,26 +259,24 @@ RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _diff_git_re = re.compile('^diff --git', re.MULTILINE)
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
@@ -355,25 +353,25 @@ class DiffProcessor(object):
 
                        op = 'D'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                chunks, added, deleted = self._parse_lines(diff_lines)
 
                chunks, added, deleted = _parse_lines(diff_lines)
 
                stats['binary'] = False
 
                stats['added'] = added
 
                stats['deleted'] = deleted
 
                # explicit mark that it's a modified file
 
                if op == 'M':
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
@@ -427,121 +425,24 @@ class DiffProcessor(object):
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return _files
 

	
 
    def _parse_lines(self, diff_lines):
 
        """
 
        Given an iterator of diff body lines, parse them and return a dict per
 
        line and added/removed totals.
 
        """
 
        added = deleted = 0
 
        old_line = old_end = new_line = new_end = None
 

	
 
        try:
 
            chunks = []
 
            line = diff_lines.next()
 

	
 
            while True:
 
                lines = []
 
                chunks.append(lines)
 

	
 
                match = self._chunk_re.match(line)
 

	
 
                if not match:
 
                    raise Exception('error parsing diff @@ line %r' % line)
 

	
 
                gr = match.groups()
 
                (old_line, old_end,
 
                 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
                old_line -= 1
 
                new_line -= 1
 

	
 
                context = len(gr) == 5
 
                old_end += old_line
 
                new_end += new_line
 

	
 
                if context:
 
                    # skip context only if it's first line
 
                    if int(gr[0]) > 1:
 
                        lines.append({
 
                            'old_lineno': '...',
 
                            'new_lineno': '...',
 
                            'action':     'context',
 
                            'line':       line,
 
                        })
 

	
 
                line = diff_lines.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    if not line:
 
                        raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                    affects_old = affects_new = False
 

	
 
                    command = line[0]
 
                    if command == '+':
 
                        affects_new = True
 
                        action = 'add'
 
                        added += 1
 
                    elif command == '-':
 
                        affects_old = True
 
                        action = 'del'
 
                        deleted += 1
 
                    elif command == ' ':
 
                        affects_old = affects_new = True
 
                        action = 'unmod'
 
                    else:
 
                        raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         line[1:],
 
                        })
 

	
 
                    line = diff_lines.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line,
 
                        })
 
                        line = diff_lines.next()
 
                if old_line > old_end:
 
                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, added, deleted
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
def _escaper(string):
 
    """
 
@@ -621,24 +522,125 @@ def _get_header(vcs, diff_chunk):
 
    elif vcs == 'hg':
 
        match = _hg_header_re.match(diff_chunk)
 
    if match is None:
 
        raise Exception('diff not recognized as valid %s diff' % vcs)
 
    meta_info = match.groupdict()
 
    rest = diff_chunk[match.end():]
 
    if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000]))
 
    diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
_newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
 

	
 
def _parse_lines(diff_lines):
 
    """
 
    Given an iterator of diff body lines, parse them and return a dict per
 
    line and added/removed totals.
 
    """
 
    added = deleted = 0
 
    old_line = old_end = new_line = new_end = None
 

	
 
    try:
 
        chunks = []
 
        line = diff_lines.next()
 

	
 
        while True:
 
            lines = []
 
            chunks.append(lines)
 

	
 
            match = _chunk_re.match(line)
 

	
 
            if not match:
 
                raise Exception('error parsing diff @@ line %r' % line)
 

	
 
            gr = match.groups()
 
            (old_line, old_end,
 
             new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
            old_line -= 1
 
            new_line -= 1
 

	
 
            context = len(gr) == 5
 
            old_end += old_line
 
            new_end += new_line
 

	
 
            if context:
 
                # skip context only if it's first line
 
                if int(gr[0]) > 1:
 
                    lines.append({
 
                        'old_lineno': '...',
 
                        'new_lineno': '...',
 
                        'action':     'context',
 
                        'line':       line,
 
                    })
 

	
 
            line = diff_lines.next()
 

	
 
            while old_line < old_end or new_line < new_end:
 
                if not line:
 
                    raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                affects_old = affects_new = False
 

	
 
                command = line[0]
 
                if command == '+':
 
                    affects_new = True
 
                    action = 'add'
 
                    added += 1
 
                elif command == '-':
 
                    affects_old = True
 
                    action = 'del'
 
                    deleted += 1
 
                elif command == ' ':
 
                    affects_old = affects_new = True
 
                    action = 'unmod'
 
                else:
 
                    raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                if not _newline_marker.match(line):
 
                    old_line += affects_old
 
                    new_line += affects_new
 
                    lines.append({
 
                        'old_lineno':   affects_old and old_line or '',
 
                        'new_lineno':   affects_new and new_line or '',
 
                        'action':       action,
 
                        'line':         line[1:],
 
                    })
 

	
 
                line = diff_lines.next()
 

	
 
                if _newline_marker.match(line):
 
                    # we need to append to lines, since this is not
 
                    # counted in the line specs of diff
 
                    lines.append({
 
                        'old_lineno':   '...',
 
                        'new_lineno':   '...',
 
                        'action':       'context',
 
                        'line':         line,
 
                    })
 
                    line = diff_lines.next()
 
            if old_line > old_end:
 
                raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
            if new_line > new_end:
 
                raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
    except StopIteration:
 
        pass
 
    if old_line != old_end or new_line != new_end:
 
        raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
    return chunks, added, deleted
 

	
 
# Used for inline highlighter word split, must match the substitutions in _escaper
 
_token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 

	
 
def _highlight_inline_diff(old, new):
 
    """
 
    Highlight simple add/remove in two lines given as info dicts. They are
 
    modified in place and given markup with <del>/<ins>.
 
    """
 
    assert old['action'] == 'del'
 
    assert new['action'] == 'add'
 

	
0 comments (0 inline, 0 general)