Changeset - 1886705c4a8c
[Not reviewed]
default
0 1 0
Mads Kiilerich - 6 years ago 2019-11-25 01:48:22
mads@kiilerich.com
Grafted from: 60ff536e2c95
py3: use memoryview ... almost, but not entirely similar to buffer
1 file changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -103,408 +103,408 @@ def as_html(table_class='code-difftable'
 
                    'filename': _safe_id(diff['filename']),
 
                    'oldline_no': change['old_lineno']
 
                }
 
                anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                    'filename': _safe_id(diff['filename']),
 
                    'oldline_no': change['new_lineno']
 
                }
 
                cond_old = (change['old_lineno'] != '...' and
 
                            change['old_lineno'])
 
                cond_new = (change['new_lineno'] != '...' and
 
                            change['new_lineno'])
 
                no_lineno = (change['old_lineno'] == '...' and
 
                             change['new_lineno'] == '...')
 
                if cond_old:
 
                    anchor_old_id = 'id="%s"' % anchor_old
 
                if cond_new:
 
                    anchor_new_id = 'id="%s"' % anchor_new
 
                ###########################################################
 
                # OLD LINE NUMBER
 
                ###########################################################
 
                _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
 
                    'a_id': anchor_old_id,
 
                    'olc': no_lineno_class if no_lineno else old_lineno_class,
 
                    'colspan': 'colspan="2"' if no_lineno else ''
 
                })
 

	
 
                _html.append('''%(link)s''' % {
 
                    'link': _link_to_if(not no_lineno, change['old_lineno'],
 
                                        '#%s' % anchor_old)
 
                })
 
                _html.append('''</td>\n''')
 
                ###########################################################
 
                # NEW LINE NUMBER
 
                ###########################################################
 

	
 
                if not no_lineno:
 
                    _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                        'a_id': anchor_new_id,
 
                        'nlc': new_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['new_lineno'],
 
                                            '#%s' % anchor_new)
 
                    })
 
                    _html.append('''</td>\n''')
 
                ###########################################################
 
                # CODE
 
                ###########################################################
 
                comments = '' if enable_comments else 'no-comment'
 
                _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                    'cc': code_class,
 
                    'inc': comments
 
                })
 
                _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
 
                    'code': change['line']
 
                })
 

	
 
                _html.append('''\t</td>''')
 
                _html.append('''\n</tr>\n''')
 
    _html.append('''</table>''')
 
    if _html_empty:
 
        return None
 
    return ''.join(_html)
 

	
 

	
 
def wrap_to_table(html):
 
    """Given a string with html, return it wrapped in a table, similar to what
 
    DiffProcessor returns."""
 
    return '''\
 
              <table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % html
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    Returns a file diff wrapped into a table.
 
    Checks for diff_limit and presents a message if the diff is too big.
 
    """
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    a_path = filenode_old.path # default, might be overriden by actual rename in diff
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        html_diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 

	
 
    elif diff_limit != -1 and (
 
            diff_limit is None or
 
            (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
 

	
 
        raw_diff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(raw_diff)
 
        if diff_processor.parsed: # there should be exactly one element, for the specified file
 
            f = diff_processor.parsed[0]
 
            op = f['operation']
 
            a_path = f['old_filename']
 

	
 
        html_diff = as_html(parsed_lines=diff_processor.parsed, enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 

	
 
    else:
 
        html_diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 

	
 
    if not html_diff:
 
        submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
 
        if submodules:
 
            html_diff = wrap_to_table(h.escape('Submodule %r' % submodules[0]))
 
        else:
 
            html_diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return cs1, cs2, a_path, html_diff, stats, op
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
 
    if submodules:
 
        return b''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,
 
                           ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 

	
 
def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):
 
    """
 
    A thin wrapper around vcs lib get_diff.
 
    """
 
    try:
 
        return scm_instance.get_diff(rev1, rev2, path=path,
 
                                     ignore_whitespace=ignore_whitespace, context=context)
 
    except MemoryError:
 
        h.flash('MemoryError: Diff is too big', category='error')
 
        return b''
 

	
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _diff_git_re = re.compile(b'^diff --git', re.MULTILINE)
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, bytes):
 
            raise Exception('Diff must be bytes - got %s' % type(diff))
 

	
 
        self._diff = diff
 
        self._diff = memoryview(diff)
 
        self.adds = 0
 
        self.removes = 0
 
        self.diff_limit = diff_limit
 
        self.limited_diff = False
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        Might set limited_diff.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            if self.diff_limit and end > self.diff_limit:
 
                self.limited_diff = True
 
                continue
 

	
 
            head, diff_lines = _get_header(self.vcs, buffer(self._diff, start, end - start))
 
            head, diff_lines = _get_header(self.vcs, self._diff[start:end])
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'removed'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'added'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'renamed'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'added'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'removed'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                chunks, added, deleted = _parse_lines(diff_lines)
 
                stats['binary'] = False
 
                stats['added'] = added
 
                stats['deleted'] = deleted
 
                # explicit mark that it's a modified file
 
                if op == 'modified':
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'removed' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
                'old_lineno': '',
 
                'new_lineno': '',
 
                'action':     'context',
 
                'line':       msg,
 
                } for _op, msg in stats['ops'].iteritems()
 
                  if _op not in [MOD_FILENODE]])
 

	
 
            _files.append({
 
                'old_filename':     head['a_path'],
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        if not inline_diff:
 
            return _files
 

	
 
        # highlight inline changes when one del is followed by one add
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    peekline = next(lineiter)
 
                    while True:
 
                        # find a first del line
 
                        while peekline['action'] != 'del':
 
                            peekline = next(lineiter)
 
                        delline = peekline
 
                        peekline = next(lineiter)
 
                        # if not followed by add, eat all following del lines
 
                        if peekline['action'] != 'add':
 
                            while peekline['action'] == 'del':
 
                                peekline = next(lineiter)
 
                            continue
 
                        # found an add - make sure it is the only one
 
                        addline = peekline
 
                        try:
 
                            peekline = next(lineiter)
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return _files
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
def _escaper(string):
 
    """
 
    Do HTML escaping/markup
 
    """
 

	
 
    def substitute(m):
 
        groups = m.groups()
 
        if groups[0]:
 
            return '&amp;'
 
        if groups[1]:
 
            return '&lt;'
 
        if groups[2]:
 
            return '&gt;'
 
        if groups[3]:
 
            return '<u>\t</u>'
 
        if groups[4]:
 
            return '<u class="cr"></u>'
 
        if groups[5]:
 
            return ' <i></i>'
 
        assert False
 

	
 
    return _escape_re.sub(substitute, safe_unicode(string))
 

	
 

	
 
_git_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
       ^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_hg_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
    (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
       ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
0 comments (0 inline, 0 general)