Changeset - e708b26819cd
[Not reviewed]
default
0 1 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: move _escaper to a pure function

It used to count diff size - now it is a clean function.
1 file changed with 28 insertions and 25 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -284,115 +284,91 @@ class DiffProcessor(object):
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
           ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        self.diff_limit = diff_limit
 
        self.limited_diff = False
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _escaper(self, string):
 
        """
 
        Do HTML escaping/markup
 
        """
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return '<u class="cr"></u>'
 
            if groups[5]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
        1. A dict with meta info:
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        2. An iterator yielding lines with simple HTML markup.
 
        """
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        meta_info = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, diff_lines
 

	
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        Might set limited_diff.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            if self.diff_limit and end > self.diff_limit:
 
                self.limited_diff = True
 
                continue
 

	
 
            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
@@ -603,48 +579,75 @@ class DiffProcessor(object):
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line,
 
                        })
 
                        line = diff_lines.next()
 
                if old_line > old_end:
 
                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, added, deleted
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
def _escaper(string):
 
    """
 
    Do HTML escaping/markup
 
    """
 

	
 
    def substitute(m):
 
        groups = m.groups()
 
        if groups[0]:
 
            return '&amp;'
 
        if groups[1]:
 
            return '&lt;'
 
        if groups[2]:
 
            return '&gt;'
 
        if groups[3]:
 
            return '<u>\t</u>'
 
        if groups[4]:
 
            return '<u class="cr"></u>'
 
        if groups[5]:
 
            return ' <i></i>'
 
        assert False
 

	
 
    return _escape_re.sub(substitute, safe_unicode(string))
 

	
 

	
 
# Used for inline highlighter word split, must match the substitutions in _escaper
 
_token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 

	
 
def _highlight_inline_diff(old, new):
 
    """
 
    Highlight simple add/remove in two lines given as info dicts. They are
 
    modified in place and given markup with <del>/<ins>.
 
    """
 
    assert old['action'] == 'del'
 
    assert new['action'] == 'add'
 

	
 
    oldwords = _token_re.split(old['line'])
 
    newwords = _token_re.split(new['line'])
 
    sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
    oldfragments, newfragments = [], []
 
    for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
        oldfrag = ''.join(oldwords[i1:i2])
 
        newfrag = ''.join(newwords[j1:j2])
 
        if tag != 'equal':
 
            if oldfrag:
 
                oldfrag = '<del>%s</del>' % oldfrag
 
            if newfrag:
0 comments (0 inline, 0 general)