diff --git a/rhodecode/lib/vcs/utils/diffs.py b/rhodecode/lib/vcs/utils/diffs.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/vcs/utils/diffs.py
@@ -0,0 +1,460 @@
+# -*- coding: utf-8 -*-
+# original copyright: 2007-2008 by Armin Ronacher
+# licensed under the BSD license.
+
+import re
+import difflib
+import logging
+
+from difflib import unified_diff
+from itertools import tee, imap
+
+from mercurial.match import match
+
+from rhodecode.lib.vcs.exceptions import VCSError
+from rhodecode.lib.vcs.nodes import FileNode, NodeError
+
+
+def get_udiff(filenode_old, filenode_new,show_whitespace=True):
+ """
+ Returns unified diff between given ``filenode_old`` and ``filenode_new``.
+ """
+ try:
+ filenode_old_date = filenode_old.last_changeset.date
+ except NodeError:
+ filenode_old_date = None
+
+ try:
+ filenode_new_date = filenode_new.last_changeset.date
+ except NodeError:
+ filenode_new_date = None
+
+ for filenode in (filenode_old, filenode_new):
+ if not isinstance(filenode, FileNode):
+ raise VCSError("Given object should be FileNode object, not %s"
+ % filenode.__class__)
+
+ if filenode_old_date and filenode_new_date:
+ if not filenode_old_date < filenode_new_date:
+ logging.debug("Generating udiff for filenodes with not increasing "
+ "dates")
+
+ vcs_udiff = unified_diff(filenode_old.content.splitlines(True),
+ filenode_new.content.splitlines(True),
+ filenode_old.name,
+ filenode_new.name,
+ filenode_old_date,
+ filenode_old_date)
+ return vcs_udiff
+
+
+def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True):
+ """
+ Returns git style diff between given ``filenode_old`` and ``filenode_new``.
+
+ :param ignore_whitespace: ignore whitespaces in diff
+ """
+
+ for filenode in (filenode_old, filenode_new):
+ if not isinstance(filenode, FileNode):
+ raise VCSError("Given object should be FileNode object, not %s"
+ % filenode.__class__)
+
+ old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
+ new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
+
+ repo = filenode_new.changeset.repository
+ vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path,
+ ignore_whitespace)
+
+ return vcs_gitdiff
+
+
+class DiffProcessor(object):
+ """
+ Give it a unified diff and it returns a list of the files that were
+ mentioned in the diff together with a dict of meta information that
+ can be used to render it in a HTML template.
+ """
+ _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
+
+ def __init__(self, diff, differ='diff', format='udiff'):
+ """
+ :param diff: a text in diff format or generator
+ :param format: format of diff passed, `udiff` or `gitdiff`
+ """
+ if isinstance(diff, basestring):
+ diff = [diff]
+
+ self.__udiff = diff
+ self.__format = format
+ self.adds = 0
+ self.removes = 0
+
+ if isinstance(self.__udiff, basestring):
+ self.lines = iter(self.__udiff.splitlines(1))
+
+ elif self.__format == 'gitdiff':
+ udiff_copy = self.copy_iterator()
+ self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
+ else:
+ udiff_copy = self.copy_iterator()
+ self.lines = imap(self.escaper, udiff_copy)
+
+ # Select a differ.
+ if differ == 'difflib':
+ self.differ = self._highlight_line_difflib
+ else:
+ self.differ = self._highlight_line_udiff
+
+ def escaper(self, string):
+ return string.replace('<', '<').replace('>', '>')
+
+ def copy_iterator(self):
+ """
+ make a fresh copy of generator, we should not iterate thru
+ an original as it's needed for repeating operations on
+ this instance of DiffProcessor
+ """
+ self.__udiff, iterator_copy = tee(self.__udiff)
+ return iterator_copy
+
+ def _extract_rev(self, line1, line2):
+ """
+ Extract the filename and revision hint from a line.
+ """
+
+ try:
+ if line1.startswith('--- ') and line2.startswith('+++ '):
+ l1 = line1[4:].split(None, 1)
+ old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
+ old_rev = l1[1] if len(l1) == 2 else 'old'
+
+ l2 = line2[4:].split(None, 1)
+ new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
+ new_rev = l2[1] if len(l2) == 2 else 'new'
+
+ filename = old_filename if (old_filename !=
+ 'dev/null') else new_filename
+
+ return filename, new_rev, old_rev
+ except (ValueError, IndexError):
+ pass
+
+ return None, None, None
+
+ def _parse_gitdiff(self, diffiterator):
+ def line_decoder(l):
+ if l.startswith('+') and not l.startswith('+++'):
+ self.adds += 1
+ elif l.startswith('-') and not l.startswith('---'):
+ self.removes += 1
+ return l.decode('utf8', 'replace')
+
+ output = list(diffiterator)
+ size = len(output)
+
+ if size == 2:
+ l = []
+ l.extend([output[0]])
+ l.extend(output[1].splitlines(1))
+ return map(line_decoder, l)
+ elif size == 1:
+ return map(line_decoder, output[0].splitlines(1))
+ elif size == 0:
+ return []
+
+ raise Exception('wrong size of diff %s' % size)
+
+ def _highlight_line_difflib(self, line, next):
+ """
+ Highlight inline changes in both lines.
+ """
+
+ if line['action'] == 'del':
+ old, new = line, next
+ else:
+ old, new = next, line
+
+ oldwords = re.split(r'(\W)', old['line'])
+ newwords = re.split(r'(\W)', new['line'])
+
+ sequence = difflib.SequenceMatcher(None, oldwords, newwords)
+
+ oldfragments, newfragments = [], []
+ for tag, i1, i2, j1, j2 in sequence.get_opcodes():
+ oldfrag = ''.join(oldwords[i1:i2])
+ newfrag = ''.join(newwords[j1:j2])
+ if tag != 'equal':
+ if oldfrag:
+ oldfrag = '%s' % oldfrag
+ if newfrag:
+ newfrag = '%s' % newfrag
+ oldfragments.append(oldfrag)
+ newfragments.append(newfrag)
+
+ old['line'] = "".join(oldfragments)
+ new['line'] = "".join(newfragments)
+
+ def _highlight_line_udiff(self, line, next):
+ """
+ Highlight inline changes in both lines.
+ """
+ start = 0
+ limit = min(len(line['line']), len(next['line']))
+ while start < limit and line['line'][start] == next['line'][start]:
+ start += 1
+ end = -1
+ limit -= start
+ while -end <= limit and line['line'][end] == next['line'][end]:
+ end -= 1
+ end += 1
+ if start or end:
+ def do(l):
+ last = end + len(l['line'])
+ if l['action'] == 'add':
+ tag = 'ins'
+ else:
+ tag = 'del'
+ l['line'] = '%s<%s>%s%s>%s' % (
+ l['line'][:start],
+ tag,
+ l['line'][start:last],
+ tag,
+ l['line'][last:]
+ )
+ do(line)
+ do(next)
+
+ def _parse_udiff(self):
+ """
+ Parse the diff an return data for the template.
+ """
+ lineiter = self.lines
+ files = []
+ try:
+ line = lineiter.next()
+ # skip first context
+ skipfirst = True
+ while 1:
+ # continue until we found the old file
+ if not line.startswith('--- '):
+ line = lineiter.next()
+ continue
+
+ chunks = []
+ filename, old_rev, new_rev = \
+ self._extract_rev(line, lineiter.next())
+ files.append({
+ 'filename': filename,
+ 'old_revision': old_rev,
+ 'new_revision': new_rev,
+ 'chunks': chunks
+ })
+
+ line = lineiter.next()
+ while line:
+ match = self._chunk_re.match(line)
+ if not match:
+ break
+
+ lines = []
+ chunks.append(lines)
+
+ old_line, old_end, new_line, new_end = \
+ [int(x or 1) for x in match.groups()[:-1]]
+ old_line -= 1
+ new_line -= 1
+ context = len(match.groups()) == 5
+ old_end += old_line
+ new_end += new_line
+
+ if context:
+ if not skipfirst:
+ lines.append({
+ 'old_lineno': '...',
+ 'new_lineno': '...',
+ 'action': 'context',
+ 'line': line,
+ })
+ else:
+ skipfirst = False
+
+ line = lineiter.next()
+ while old_line < old_end or new_line < new_end:
+ if line:
+ command, line = line[0], line[1:]
+ else:
+ command = ' '
+ affects_old = affects_new = False
+
+ # ignore those if we don't expect them
+ if command in '#@':
+ continue
+ elif command == '+':
+ affects_new = True
+ action = 'add'
+ elif command == '-':
+ affects_old = True
+ action = 'del'
+ else:
+ affects_old = affects_new = True
+ action = 'unmod'
+
+ old_line += affects_old
+ new_line += affects_new
+ lines.append({
+ 'old_lineno': affects_old and old_line or '',
+ 'new_lineno': affects_new and new_line or '',
+ 'action': action,
+ 'line': line
+ })
+ line = lineiter.next()
+
+ except StopIteration:
+ pass
+
+ # highlight inline changes
+ for file in files:
+ for chunk in chunks:
+ lineiter = iter(chunk)
+ #first = True
+ try:
+ while 1:
+ line = lineiter.next()
+ if line['action'] != 'unmod':
+ nextline = lineiter.next()
+ if nextline['action'] == 'unmod' or \
+ nextline['action'] == line['action']:
+ continue
+ self.differ(line, nextline)
+ except StopIteration:
+ pass
+
+ return files
+
+ def prepare(self):
+ """
+ Prepare the passed udiff for HTML rendering. It'l return a list
+ of dicts
+ """
+ return self._parse_udiff()
+
+ def _safe_id(self, idstring):
+ """Make a string safe for including in an id attribute.
+
+ The HTML spec says that id attributes 'must begin with
+ a letter ([A-Za-z]) and may be followed by any number
+ of letters, digits ([0-9]), hyphens ("-"), underscores
+ ("_"), colons (":"), and periods (".")'. These regexps
+ are slightly over-zealous, in that they remove colons
+ and periods unnecessarily.
+
+ Whitespace is transformed into underscores, and then
+ anything which is not a hyphen or a character that
+ matches \w (alphanumerics and underscore) is removed.
+
+ """
+ # Transform all whitespace to underscore
+ idstring = re.sub(r'\s', "_", '%s' % idstring)
+ # Remove everything that is not a hyphen or a member of \w
+ idstring = re.sub(r'(?!-)\W', "", idstring).lower()
+ return idstring
+
+ def raw_diff(self):
+ """
+ Returns raw string as udiff
+ """
+ udiff_copy = self.copy_iterator()
+ if self.__format == 'gitdiff':
+ udiff_copy = self._parse_gitdiff(udiff_copy)
+ return u''.join(udiff_copy)
+
+ def as_html(self, table_class='code-difftable', line_class='line',
+ new_lineno_class='lineno old', old_lineno_class='lineno new',
+ code_class='code'):
+ """
+ Return udiff as html table with customized css classes
+ """
+ def _link_to_if(condition, label, url):
+ """
+ Generates a link if condition is meet or just the label if not.
+ """
+
+ if condition:
+ return '''%(label)s''' % {'url': url,
+ 'label': label}
+ else:
+ return label
+ diff_lines = self.prepare()
+ _html_empty = True
+ _html = []
+ _html.append('''
''' \
+ % {'a_id': anchor_old_id,
+ 'old_lineno_cls': old_lineno_class})
+
+ _html.append('''%(link)s''' \ + % {'link': + _link_to_if(cond_old, change['old_lineno'], '#%s' \ + % anchor_old)}) + _html.append(''' | \n''')
+ ###########################################################
+ # NEW LINE NUMBER
+ ###########################################################
+
+ _html.append('''\t''' \
+ % {'a_id': anchor_new_id,
+ 'new_lineno_cls': new_lineno_class})
+
+ _html.append('''%(link)s''' \ + % {'link': + _link_to_if(cond_new, change['new_lineno'], '#%s' \ + % anchor_new)}) + _html.append(''' | \n''')
+ ###########################################################
+ # CODE
+ ###########################################################
+ _html.append('''\t''' \
+ % {'code_class': code_class})
+ _html.append('''\n\t\t%(code)s\n''' \ + % {'code': change['line']}) + _html.append('''\t | ''')
+ _html.append('''\n