diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/diffs.py @@ -0,0 +1,447 @@ +# -*- coding: utf-8 -*- +""" + rhodecode.lib.diffs + ~~~~~~~~~~~~~~~~~~~ + + Set of diffing helpers, previously part of vcs + + + :created_on: Dec 4, 2011 + :author: marcink + :copyright: (C) 2009-2011 Marcin Kuzminski + :original copyright: 2007-2008 by Armin Ronacher + :license: GPLv3, see COPYING for more details. +""" +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import re +import difflib + +from itertools import tee, imap + +from mercurial.match import match + +from vcs.exceptions import VCSError +from vcs.nodes import FileNode + +def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True): + """ + Returns git style diff between given ``filenode_old`` and ``filenode_new``. + + :param ignore_whitespace: ignore whitespaces in diff + """ + + for filenode in (filenode_old, filenode_new): + if not isinstance(filenode, FileNode): + raise VCSError("Given object should be FileNode object, not %s" + % filenode.__class__) + + old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40) + new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40) + + repo = filenode_new.changeset.repository + vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path, + ignore_whitespace) + + return vcs_gitdiff + + +class DiffProcessor(object): + """ + Give it a unified diff and it returns a list of the files that were + mentioned in the diff together with a dict of meta information that + can be used to render it in a HTML template. + """ + _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') + + def __init__(self, diff, differ='diff', format='udiff'): + """ + :param diff: a text in diff format or generator + :param format: format of diff passed, `udiff` or `gitdiff` + """ + if isinstance(diff, basestring): + diff = [diff] + + self.__udiff = diff + self.__format = format + self.adds = 0 + self.removes = 0 + + if isinstance(self.__udiff, basestring): + self.lines = iter(self.__udiff.splitlines(1)) + + elif self.__format == 'gitdiff': + udiff_copy = self.copy_iterator() + self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy)) + else: + udiff_copy = self.copy_iterator() + self.lines = imap(self.escaper, udiff_copy) + + # Select a differ. + if differ == 'difflib': + self.differ = self._highlight_line_difflib + else: + self.differ = self._highlight_line_udiff + + def escaper(self, string): + return string.replace('<', '<').replace('>', '>') + + def copy_iterator(self): + """ + make a fresh copy of generator, we should not iterate thru + an original as it's needed for repeating operations on + this instance of DiffProcessor + """ + self.__udiff, iterator_copy = tee(self.__udiff) + return iterator_copy + + def _extract_rev(self, line1, line2): + """ + Extract the filename and revision hint from a line. + """ + + try: + if line1.startswith('--- ') and line2.startswith('+++ '): + l1 = line1[4:].split(None, 1) + old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None + old_rev = l1[1] if len(l1) == 2 else 'old' + + l2 = line2[4:].split(None, 1) + new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None + new_rev = l2[1] if len(l2) == 2 else 'new' + + filename = old_filename if (old_filename != + 'dev/null') else new_filename + + return filename, new_rev, old_rev + except (ValueError, IndexError): + pass + + return None, None, None + + def _parse_gitdiff(self, diffiterator): + def line_decoder(l): + if l.startswith('+') and not l.startswith('+++'): + self.adds += 1 + elif l.startswith('-') and not l.startswith('---'): + self.removes += 1 + return l.decode('utf8', 'replace') + + output = list(diffiterator) + size = len(output) + + if size == 2: + l = [] + l.extend([output[0]]) + l.extend(output[1].splitlines(1)) + return map(line_decoder, l) + elif size == 1: + return map(line_decoder, output[0].splitlines(1)) + elif size == 0: + return [] + + raise Exception('wrong size of diff %s' % size) + + def _highlight_line_difflib(self, line, next): + """ + Highlight inline changes in both lines. + """ + + if line['action'] == 'del': + old, new = line, next + else: + old, new = next, line + + oldwords = re.split(r'(\W)', old['line']) + newwords = re.split(r'(\W)', new['line']) + + sequence = difflib.SequenceMatcher(None, oldwords, newwords) + + oldfragments, newfragments = [], [] + for tag, i1, i2, j1, j2 in sequence.get_opcodes(): + oldfrag = ''.join(oldwords[i1:i2]) + newfrag = ''.join(newwords[j1:j2]) + if tag != 'equal': + if oldfrag: + oldfrag = '%s' % oldfrag + if newfrag: + newfrag = '%s' % newfrag + oldfragments.append(oldfrag) + newfragments.append(newfrag) + + old['line'] = "".join(oldfragments) + new['line'] = "".join(newfragments) + + def _highlight_line_udiff(self, line, next): + """ + Highlight inline changes in both lines. + """ + start = 0 + limit = min(len(line['line']), len(next['line'])) + while start < limit and line['line'][start] == next['line'][start]: + start += 1 + end = -1 + limit -= start + while -end <= limit and line['line'][end] == next['line'][end]: + end -= 1 + end += 1 + if start or end: + def do(l): + last = end + len(l['line']) + if l['action'] == 'add': + tag = 'ins' + else: + tag = 'del' + l['line'] = '%s<%s>%s%s' % ( + l['line'][:start], + tag, + l['line'][start:last], + tag, + l['line'][last:] + ) + do(line) + do(next) + + def _parse_udiff(self): + """ + Parse the diff an return data for the template. + """ + lineiter = self.lines + files = [] + try: + line = lineiter.next() + # skip first context + skipfirst = True + while 1: + # continue until we found the old file + if not line.startswith('--- '): + line = lineiter.next() + continue + + chunks = [] + filename, old_rev, new_rev = \ + self._extract_rev(line, lineiter.next()) + files.append({ + 'filename': filename, + 'old_revision': old_rev, + 'new_revision': new_rev, + 'chunks': chunks + }) + + line = lineiter.next() + while line: + match = self._chunk_re.match(line) + if not match: + break + + lines = [] + chunks.append(lines) + + old_line, old_end, new_line, new_end = \ + [int(x or 1) for x in match.groups()[:-1]] + old_line -= 1 + new_line -= 1 + context = len(match.groups()) == 5 + old_end += old_line + new_end += new_line + + if context: + if not skipfirst: + lines.append({ + 'old_lineno': '...', + 'new_lineno': '...', + 'action': 'context', + 'line': line, + }) + else: + skipfirst = False + + line = lineiter.next() + while old_line < old_end or new_line < new_end: + if line: + command, line = line[0], line[1:] + else: + command = ' ' + affects_old = affects_new = False + + # ignore those if we don't expect them + if command in '#@': + continue + elif command == '+': + affects_new = True + action = 'add' + elif command == '-': + affects_old = True + action = 'del' + else: + affects_old = affects_new = True + action = 'unmod' + + old_line += affects_old + new_line += affects_new + lines.append({ + 'old_lineno': affects_old and old_line or '', + 'new_lineno': affects_new and new_line or '', + 'action': action, + 'line': line + }) + line = lineiter.next() + + except StopIteration: + pass + + # highlight inline changes + for file in files: + for chunk in chunks: + lineiter = iter(chunk) + #first = True + try: + while 1: + line = lineiter.next() + if line['action'] != 'unmod': + nextline = lineiter.next() + if nextline['action'] == 'unmod' or \ + nextline['action'] == line['action']: + continue + self.differ(line, nextline) + except StopIteration: + pass + + return files + + def prepare(self): + """ + Prepare the passed udiff for HTML rendering. It'l return a list + of dicts + """ + return self._parse_udiff() + + def _safe_id(self, idstring): + """Make a string safe for including in an id attribute. + + The HTML spec says that id attributes 'must begin with + a letter ([A-Za-z]) and may be followed by any number + of letters, digits ([0-9]), hyphens ("-"), underscores + ("_"), colons (":"), and periods (".")'. These regexps + are slightly over-zealous, in that they remove colons + and periods unnecessarily. + + Whitespace is transformed into underscores, and then + anything which is not a hyphen or a character that + matches \w (alphanumerics and underscore) is removed. + + """ + # Transform all whitespace to underscore + idstring = re.sub(r'\s', "_", '%s' % idstring) + # Remove everything that is not a hyphen or a member of \w + idstring = re.sub(r'(?!-)\W', "", idstring).lower() + return idstring + + def raw_diff(self): + """ + Returns raw string as udiff + """ + udiff_copy = self.copy_iterator() + if self.__format == 'gitdiff': + udiff_copy = self._parse_gitdiff(udiff_copy) + return u''.join(udiff_copy) + + def as_html(self, table_class='code-difftable', line_class='line', + new_lineno_class='lineno old', old_lineno_class='lineno new', + code_class='code'): + """ + Return udiff as html table with customized css classes + """ + def _link_to_if(condition, label, url): + """ + Generates a link if condition is meet or just the label if not. + """ + + if condition: + return '''%(label)s''' % {'url': url, + 'label': label} + else: + return label + diff_lines = self.prepare() + _html_empty = True + _html = [] + _html.append('''\n''' \ + % {'table_class': table_class}) + for diff in diff_lines: + for line in diff['chunks']: + _html_empty = False + for change in line: + _html.append('''\n''' \ + % {'line_class': line_class, + 'action': change['action']}) + anchor_old_id = '' + anchor_new_id = '' + anchor_old = "%(filename)s_o%(oldline_no)s" % \ + {'filename': self._safe_id(diff['filename']), + 'oldline_no': change['old_lineno']} + anchor_new = "%(filename)s_n%(oldline_no)s" % \ + {'filename': self._safe_id(diff['filename']), + 'oldline_no': change['new_lineno']} + cond_old = change['old_lineno'] != '...' and \ + change['old_lineno'] + cond_new = change['new_lineno'] != '...' and \ + change['new_lineno'] + if cond_old: + anchor_old_id = 'id="%s"' % anchor_old + if cond_new: + anchor_new_id = 'id="%s"' % anchor_new + ########################################################### + # OLD LINE NUMBER + ########################################################### + _html.append('''\t\n''') + ########################################################### + # NEW LINE NUMBER + ########################################################### + + _html.append('''\t\n''') + ########################################################### + # CODE + ########################################################### + _html.append('''\t''') + _html.append('''\n\n''') + _html.append('''
''' \ + % {'a_id': anchor_old_id, + 'old_lineno_cls': old_lineno_class}) + + _html.append('''
%(link)s
''' \ + % {'link': + _link_to_if(cond_old, change['old_lineno'], '#%s' \ + % anchor_old)}) + _html.append('''
''' \ + % {'a_id': anchor_new_id, + 'new_lineno_cls': new_lineno_class}) + + _html.append('''
%(link)s
''' \ + % {'link': + _link_to_if(cond_new, change['new_lineno'], '#%s' \ + % anchor_new)}) + _html.append('''
''' \ + % {'code_class': code_class}) + _html.append('''\n\t\t
%(code)s
\n''' \ + % {'code': change['line']}) + _html.append('''\t
''') + if _html_empty: + return None + return ''.join(_html) + + def stat(self): + """ + Returns tuple of added, and removed lines for this instance + """ + return self.adds, self.removes