Changeset - 60dfc369df1d
[Not reviewed]
codereview
0 1 0
Marcin Kuzminski - 13 years ago 2012-06-09 18:10:59
marcin@python-works.com
Improved cross-repo diff using bundlerepo
1 file changed with 4 insertions and 1 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/diffs.py
Show inline comments
 
@@ -236,389 +236,392 @@ class DiffProcessor(object):
 

	
 
        if line['action'] == 'del':
 
            old, new = line, next_
 
        else:
 
            old, new = next_, line
 

	
 
        oldwords = re.split(r'(\W)', old['line'])
 
        newwords = re.split(r'(\W)', new['line'])
 

	
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
        oldfragments, newfragments = [], []
 
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
            oldfrag = ''.join(oldwords[i1:i2])
 
            newfrag = ''.join(newwords[j1:j2])
 
            if tag != 'equal':
 
                if oldfrag:
 
                    oldfrag = '<del>%s</del>' % oldfrag
 
                if newfrag:
 
                    newfrag = '<ins>%s</ins>' % newfrag
 
            oldfragments.append(oldfrag)
 
            newfragments.append(newfrag)
 

	
 
        old['line'] = "".join(oldfragments)
 
        new['line'] = "".join(newfragments)
 

	
 
    def _highlight_line_udiff(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 
        start = 0
 
        limit = min(len(line['line']), len(next_['line']))
 
        while start < limit and line['line'][start] == next_['line'][start]:
 
            start += 1
 
        end = -1
 
        limit -= start
 
        while -end <= limit and line['line'][end] == next_['line'][end]:
 
            end -= 1
 
        end += 1
 
        if start or end:
 
            def do(l):
 
                last = end + len(l['line'])
 
                if l['action'] == 'add':
 
                    tag = 'ins'
 
                else:
 
                    tag = 'del'
 
                l['line'] = '%s<%s>%s</%s>%s' % (
 
                    l['line'][:start],
 
                    tag,
 
                    l['line'][start:last],
 
                    tag,
 
                    l['line'][last:]
 
                )
 
            do(line)
 
            do(next_)
 

	
 
    def _parse_udiff(self, inline_diff=True):
 
        """
 
        Parse the diff an return data for the template.
 
        """
 
        lineiter = self.lines
 
        files = []
 
        try:
 
            line = lineiter.next()
 
            while 1:
 
                # continue until we found the old file
 
                if not line.startswith('--- '):
 
                    line = lineiter.next()
 
                    continue
 

	
 
                chunks = []
 
                stats = [0, 0]
 
                operation, filename, old_rev, new_rev = \
 
                    self._extract_rev(line, lineiter.next())
 
                files.append({
 
                    'filename':         filename,
 
                    'old_revision':     old_rev,
 
                    'new_revision':     new_rev,
 
                    'chunks':           chunks,
 
                    'operation':        operation,
 
                    'stats':            stats,
 
                })
 

	
 
                line = lineiter.next()
 
                while line:
 
                    match = self._chunk_re.match(line)
 
                    if not match:
 
                        break
 

	
 
                    lines = []
 
                    chunks.append(lines)
 

	
 
                    old_line, old_end, new_line, new_end = \
 
                        [int(x or 1) for x in match.groups()[:-1]]
 
                    old_line -= 1
 
                    new_line -= 1
 
                    gr = match.groups()
 
                    context = len(gr) == 5
 
                    old_end += old_line
 
                    new_end += new_line
 

	
 
                    if context:
 
                        # skip context only if it's first line
 
                        if int(gr[0]) > 1:
 
                            lines.append({
 
                                'old_lineno': '...',
 
                                'new_lineno': '...',
 
                                'action':     'context',
 
                                'line':       line,
 
                            })
 

	
 
                    line = lineiter.next()
 
                    while old_line < old_end or new_line < new_end:
 
                        if line:
 
                            command, line = line[0], line[1:]
 
                        else:
 
                            command = ' '
 
                        affects_old = affects_new = False
 

	
 
                        # ignore those if we don't expect them
 
                        if command in '#@':
 
                            continue
 
                        elif command == '+':
 
                            affects_new = True
 
                            action = 'add'
 
                            stats[0] += 1
 
                        elif command == '-':
 
                            affects_old = True
 
                            action = 'del'
 
                            stats[1] += 1
 
                        else:
 
                            affects_old = affects_new = True
 
                            action = 'unmod'
 

	
 
                        if line.find('No newline at end of file') != -1:
 
                            lines.append({
 
                                'old_lineno':   '...',
 
                                'new_lineno':   '...',
 
                                'action':       'context',
 
                                'line':         line
 
                            })
 

	
 
                        else:
 
                            old_line += affects_old
 
                            new_line += affects_new
 
                            lines.append({
 
                                'old_lineno':   affects_old and old_line or '',
 
                                'new_lineno':   affects_new and new_line or '',
 
                                'action':       action,
 
                                'line':         line
 
                            })
 

	
 
                        line = lineiter.next()
 

	
 
        except StopIteration:
 
            pass
 

	
 
        sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
 
        if inline_diff is False:
 
            return sorted(files, key=sorter)
 

	
 
        # highlight inline changes
 
        for diff_data in files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    while 1:
 
                        line = lineiter.next()
 
                        if line['action'] != 'unmod':
 
                            nextline = lineiter.next()
 
                            if nextline['action'] in ['unmod', 'context'] or \
 
                               nextline['action'] == line['action']:
 
                                continue
 
                            self.differ(line, nextline)
 
                except StopIteration:
 
                    pass
 

	
 
        return sorted(files, key=sorter)
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'l return a list
 
        of dicts
 
        """
 
        return self._parse_udiff(inline_diff=inline_diff)
 

	
 
    def _safe_id(self, idstring):
 
        """Make a string safe for including in an id attribute.
 

	
 
        The HTML spec says that id attributes 'must begin with
 
        a letter ([A-Za-z]) and may be followed by any number
 
        of letters, digits ([0-9]), hyphens ("-"), underscores
 
        ("_"), colons (":"), and periods (".")'. These regexps
 
        are slightly over-zealous, in that they remove colons
 
        and periods unnecessarily.
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", '%s' % idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def raw_diff(self):
 
        """
 
        Returns raw string as udiff
 
        """
 
        udiff_copy = self.copy_iterator()
 
        if self.__format == 'gitdiff':
 
            udiff_copy = self._parse_gitdiff(udiff_copy)
 
        return u''.join(udiff_copy)
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                new_lineno_class='lineno old', old_lineno_class='lineno new',
 
                code_class='code', enable_comments=False, diff_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if diff_lines is None:
 
            diff_lines = self.prepare()
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
 
                for change in line:
 
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                        'lc': line_class,
 
                        'action': change['action']
 
                    })
 
                    anchor_old_id = ''
 
                    anchor_new_id = ''
 
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['old_lineno']
 
                    }
 
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['new_lineno']
 
                    }
 
                    cond_old = (change['old_lineno'] != '...' and
 
                                change['old_lineno'])
 
                    cond_new = (change['new_lineno'] != '...' and
 
                                change['new_lineno'])
 
                    if cond_old:
 
                        anchor_old_id = 'id="%s"' % anchor_old
 
                    if cond_new:
 
                        anchor_new_id = 'id="%s"' % anchor_new
 
                    ###########################################################
 
                    # OLD LINE NUMBER
 
                    ###########################################################
 
                    _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
 
                        'a_id': anchor_old_id,
 
                        'olc': old_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['old_lineno'],
 
                                            '#%s' % anchor_old)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # NEW LINE NUMBER
 
                    ###########################################################
 

	
 
                    _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                        'a_id': anchor_new_id,
 
                        'nlc': new_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['new_lineno'],
 
                                            '#%s' % anchor_new)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # CODE
 
                    ###########################################################
 
                    comments = '' if enable_comments else 'no-comment'
 
                    _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                        'cc': code_class,
 
                        'inc': comments
 
                    })
 
                    _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
 
                        'code': change['line']
 
                    })
 
                    _html.append('''\t</td>''')
 
                    _html.append('''\n</tr>\n''')
 
        _html.append('''</table>''')
 
        if _html_empty:
 
            return None
 
        return ''.join(_html)
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
class InMemoryBundleRepo(bundlerepository):
 
    def __init__(self, ui, path, bundlestream):
 
        self._tempparent = None
 
        localrepo.localrepository.__init__(self, ui, path)
 
        self.ui.setconfig('phases', 'publish', False)
 

	
 
        self.bundle = bundlestream
 

	
 
        # dict with the mapping 'filename' -> position in the bundle
 
        self.bundlefilespos = {}
 

	
 

	
 
def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
 
    """
 
    General differ between branches, bookmarks or separate but releated 
 
    repositories
 

	
 
    :param org_repo:
 
    :type org_repo:
 
    :param org_ref:
 
    :type org_ref:
 
    :param other_repo:
 
    :type other_repo:
 
    :param other_ref:
 
    :type other_ref:
 
    """
 

	
 
    bundlerepo = None
 
    ignore_whitespace = False
 
    context = 3
 
    org_repo = org_repo.scm_instance._repo
 
    other_repo = other_repo.scm_instance._repo
 
    opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
 
    org_ref = org_ref[1]
 
    other_ref = other_ref[1]
 

	
 
    if org_repo != other_repo:
 

	
 
        common, incoming, rheads = discovery_data
 

	
 
        # create a bundle (uncompressed if other repo is not local)
 
        if other_repo.capable('getbundle') and incoming:
 
            # disable repo hooks here since it's just bundle !
 
            # patch and reset hooks section of UI config to not run any
 
            # hooks on fetching archives with subrepos
 
            for k, _ in other_repo.ui.configitems('hooks'):
 
                other_repo.ui.setconfig('hooks', k, None)
 

	
 
            unbundle = other_repo.getbundle('incoming', common=common,
 
                                            heads=rheads)
 

	
 
            buf = io.BytesIO()
 
            while True:
 
                chunk = unbundle._stream.read(1024 * 4)
 
                if not chunk:
 
                    break
 
                buf.write(chunk)
 

	
 
            buf.seek(0)
 
            # replace chunked _stream with data that can do tell() and seek()
 
            unbundle._stream = buf
 

	
 
            ui = make_ui('db')
 
            bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
 
                                            bundlestream=unbundle)
 
        return ''.join(patch.diff(bundlerepo or org_repo, node2=other_ref,
 

	
 
        return ''.join(patch.diff(bundlerepo or org_repo,
 
                                  node1=org_repo[org_ref].node(),
 
                                  node2=other_repo[other_ref].node(),
 
                                  opts=opts))
 
    else:
 
        return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
 
                                  opts=opts))
0 comments (0 inline, 0 general)