Changeset - 7b8cbcb927b2
[Not reviewed]
default
0 4 0
Mads Kiilerich - 11 years ago 2014-12-15 13:47:36
madski@unity3d.com
diff: refactor diff parsing - fail on any parse error instead of showing an incorrect diff
4 files changed with 25 insertions and 18 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.diffs
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Set of diffing helpers, previously part of vcs
 

	
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 4, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import re
 
import difflib
 
import logging
 

	
 
from itertools import tee, imap
 

	
 
from pylons.i18n.translation import _
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def wrap_to_table(str_):
 
    return '''<table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % str_
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    returns a wrapped diff into a table, checks for cut_off_limit and presents
 
    proper message
 
    """
 

	
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 
        size = 0
 

	
 
    elif cut_off_limit != -1 and (cut_off_limit is None or
 
    (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 
        size = len(diff or '')
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 
        size = 0
 
    if not diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return size, cs1, cs2, diff, stats
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 

	
 
    :param ignore_whitespace: ignore whitespaces in diff
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffLimitExceeded(Exception):
 
    pass
 

	
 

	
 
class LimitedDiffContainer(object):
 

	
 
    def __init__(self, diff_limit, cur_diff_size, diff):
 
        self.diff = diff
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = cur_diff_size
 

	
 
    def __iter__(self):
 
        for l in self.diff:
 
            yield l
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        #^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>\S+)\n
 
           ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        #^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
 
           ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>\S+)\n
 
           ^copy[ ]to[ ](?P<copy_to>\S+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    #used for inline highlighter word split
 
    _token_re = re.compile(r'()(&gt;|&lt;|&amp;|<u>\t</u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(?<=.)( \n| $)')
 

	
 

	
 
    def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version controll hg or git
 
        :param format: format of diff passed, `udiff` or `gitdiff`
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self._format = format
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 

	
 
        if format == 'gitdiff':
 
            self.differ = self._highlight_line_difflib
 
            self._parser = self._parse_gitdiff
 
        else:
 
            self.differ = self._highlight_line_udiff
 
            self._parser = self._parse_udiff
 

	
 
    def _copy_iterator(self):
 
        """
 
        make a fresh copy of generator, we should not iterate thru
 
        an original as it's needed for repeating operations on
 
        this instance of DiffProcessor
 
        """
 
        self.__udiff, iterator_copy = tee(self.__udiff)
 
        return iterator_copy
 

	
 
    def _escaper(self, string):
 
        """
 
        Escaper for diff escapes special chars and checks the diff limit
 

	
 
        :param string:
 
        """
 

	
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _line_counter(self, l):
 
        """
 
        Checks each line and bumps total adds/removes for this diff
 

	
 
        :param l:
 
        """
 
        if l.startswith('+') and not l.startswith('+++'):
 
            self.adds += 1
 
        elif l.startswith('-') and not l.startswith('---'):
 
            self.removes += 1
 
        return safe_unicode(l)
 

	
 
    def _highlight_line_difflib(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 

	
 
        if line['action'] == 'del':
 
            old, new = line, next_
 
        else:
 
            old, new = next_, line
 

	
 
        oldwords = self._token_re.split(old['line'])
 
        newwords = self._token_re.split(new['line'])
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
        oldfragments, newfragments = [], []
 
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
            oldfrag = ''.join(oldwords[i1:i2])
 
            newfrag = ''.join(newwords[j1:j2])
 
            if tag != 'equal':
 
                if oldfrag:
 
                    oldfrag = '<del>%s</del>' % oldfrag
 
                if newfrag:
 
                    newfrag = '<ins>%s</ins>' % newfrag
 
            oldfragments.append(oldfrag)
 
            newfragments.append(newfrag)
 

	
 
        old['line'] = "".join(oldfragments)
 
        new['line'] = "".join(newfragments)
 

	
 
    def _highlight_line_udiff(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 
        start = 0
 
        limit = min(len(line['line']), len(next_['line']))
 
        while start < limit and line['line'][start] == next_['line'][start]:
 
            start += 1
 
        end = -1
 
        limit -= start
 
        while -end <= limit and line['line'][end] == next_['line'][end]:
 
            end -= 1
 
        end += 1
 
        if start or end:
 
            def do(l):
 
                last = end + len(l['line'])
 
                if l['action'] == 'add':
 
                    tag = 'ins'
 
                else:
 
                    tag = 'del'
 
                l['line'] = '%s<%s>%s</%s>%s' % (
 
                    l['line'][:start],
 
                    tag,
 
                    l['line'][start:last],
 
                    tag,
 
                    l['line'][last:]
 
                )
 
            do(line)
 
            do(next_)
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        parses the diff header, and returns parts, and leftover diff
 
        parts consists of 14 elements::
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        :param diff_chunk:
 
        """
 

	
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('VCS type %s is not supported' % self.vcs)
 
        groups = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal '):
 
            raise Exception('cannot parse diff header: %r followed by %r' % (diff_chunk[:match.end()], rest[:1000]))
 
        difflines = imap(self._escaper, rest.splitlines(True))
 
        return groups, difflines
 

	
 
    def _clean_line(self, line, command):
 
        if command in ['+', '-', ' ']:
 
            #only modify the line if it's actually a diff thing
 
            line = line[1:]
 
        return line
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
        _files = []
 
        diff_container = lambda arg: arg
 

	
 
        ##split the diff in chunks of separate --git a/file b/file chunks
 
        for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
 
            head, diff = self._get_header(raw_diff)
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'D'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'A'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'R'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'A'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'D'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                try:
 
                    chunks, _stats = self._parse_lines(diff)
 
                    stats['binary'] = False
 
                    stats['added'] = _stats[0]
 
                    stats['deleted'] = _stats[1]
 
                    # explicit mark that it's a modified file
 
                    if op == 'M':
 
                        stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
                except DiffLimitExceeded:
 
                    diff_container = lambda _diff: \
 
                        LimitedDiffContainer(self.diff_limit,
 
                                            self.cur_diff_size, _diff)
 
                    break
 
            else:  # GIT binary patch (or empty diff)
 
                # GIT Binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'D' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
                'old_lineno': '',
 
                'new_lineno': '',
 
                'action':     'context',
 
                'line':       msg,
 
                } for _op, msg in stats['ops'].iteritems()
 
                  if _op not in [MOD_FILENODE]])
 

	
 
            _files.append({
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        if not inline_diff:
 
            return diff_container(_files)
 

	
 
        # highlight inline changes
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    while 1:
 
                        line = lineiter.next()
 
                        if line['action'] not in ['unmod', 'context']:
 
                            nextline = lineiter.next()
 
                            if nextline['action'] in ['unmod', 'context'] or \
 
                               nextline['action'] == line['action']:
 
                                continue
 
                            self.differ(line, nextline)
 
                except StopIteration:
 
                    pass
 

	
 
        return diff_container(_files)
 

	
 
    def _parse_udiff(self, inline_diff=True):
 
        raise NotImplementedError()
 

	
 
    def _parse_lines(self, diff):
 
        """
 
        Parse the diff and return data for the template.
 
        """
 

	
 
        lineiter = iter(diff)
 
        stats = [0, 0]
 
        (old_line, old_end, new_line, new_end) = (None, None, None, None)
 

	
 
        try:
 
            chunks = []
 
            line = lineiter.next()
 
            line = diff.next()
 

	
 
            while line:
 
            while True:
 
                lines = []
 
                chunks.append(lines)
 

	
 
                match = self._chunk_re.match(line)
 

	
 
                if not match:
 
                    break
 
                    raise Exception('error parsing diff @@ line %r' % line)
 

	
 
                gr = match.groups()
 
                (old_line, old_end,
 
                 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
                old_line -= 1
 
                new_line -= 1
 

	
 
                context = len(gr) == 5
 
                old_end += old_line
 
                new_end += new_line
 

	
 
                if context:
 
                    # skip context only if it's first line
 
                    if int(gr[0]) > 1:
 
                        lines.append({
 
                            'old_lineno': '...',
 
                            'new_lineno': '...',
 
                            'action':     'context',
 
                            'line':       line,
 
                        })
 

	
 
                line = lineiter.next()
 
                line = diff.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    command = ' '
 
                    if line:
 
                        command = line[0]
 
                    if not line:
 
                        raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                    affects_old = affects_new = False
 

	
 
                    # ignore those if we don't expect them
 
                    if command in '#@':
 
                        continue
 
                    elif command == '+':
 
                    command = line[0]
 
                    if command == '+':
 
                        affects_new = True
 
                        action = 'add'
 
                        stats[0] += 1
 
                    elif command == '-':
 
                        affects_old = True
 
                        action = 'del'
 
                        stats[1] += 1
 
                    else:
 
                    elif command == ' ':
 
                        affects_old = affects_new = True
 
                        action = 'unmod'
 
                    else:
 
                        raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         self._clean_line(line, command)
 
                        })
 

	
 
                    line = lineiter.next()
 
                    line = diff.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         self._clean_line(line, command)
 
                        })
 

	
 
                        line = diff.next()
 
                if old_line > old_end:
 
                        raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                        raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, stats
 

	
 
    def _safe_id(self, idstring):
 
        """Make a string safe for including in an id attribute.
 

	
 
        The HTML spec says that id attributes 'must begin with
 
        a letter ([A-Za-z]) and may be followed by any number
 
        of letters, digits ([0-9]), hyphens ("-"), underscores
 
        ("_"), colons (":"), and periods (".")'. These regexps
 
        are slightly over-zealous, in that they remove colons
 
        and periods unnecessarily.
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", '%s' % idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'l return a list
 
        of dicts with diff information
 
        """
 
        parsed = self._parser(inline_diff=inline_diff)
 
        self.parsed = True
 
        self.parsed_diff = parsed
 
        return parsed
 

	
 
    def as_raw(self, diff_lines=None):
 
        """
 
        Returns raw string diff
 
        """
 
        return self._diff
 
        #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                old_lineno_class='lineno old', new_lineno_class='lineno new',
 
                code_class='code', enable_comments=False, parsed_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if not self.parsed:
 
            self.prepare()
 

	
 
        diff_lines = self.parsed_diff
 
        if parsed_lines:
 
            diff_lines = parsed_lines
 

	
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 

	
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
 
                for change in line:
 
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                        'lc': line_class,
 
                        'action': change['action']
 
                    })
 
                    anchor_old_id = ''
 
                    anchor_new_id = ''
 
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['old_lineno']
 
                    }
 
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['new_lineno']
 
                    }
 
                    cond_old = (change['old_lineno'] != '...' and
 
                                change['old_lineno'])
 
                    cond_new = (change['new_lineno'] != '...' and
 
                                change['new_lineno'])
 
                    if cond_old:
 
                        anchor_old_id = 'id="%s"' % anchor_old
 
                    if cond_new:
 
                        anchor_new_id = 'id="%s"' % anchor_new
 
                    ###########################################################
 
                    # OLD LINE NUMBER
 
                    ###########################################################
 
                    _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
 
                        'a_id': anchor_old_id,
 
                        'olc': old_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['old_lineno'],
 
                                            '#%s' % anchor_old)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # NEW LINE NUMBER
 
                    ###########################################################
 

	
 
                    _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                        'a_id': anchor_new_id,
 
                        'nlc': new_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['new_lineno'],
 
                                            '#%s' % anchor_new)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # CODE
 
                    ###########################################################
 
                    comments = '' if enable_comments else 'no-comment'
 
                    _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                        'cc': code_class,
 
                        'inc': comments
 
                    })
 
                    _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
 
                        'code': change['line']
 
                    })
 

	
 
                    _html.append('''\t</td>''')
 
                    _html.append('''\n</tr>\n''')
 
        _html.append('''</table>''')
 
        if _html_empty:
 
            return None
 
        return ''.join(_html)
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
kallithea/tests/fixtures/diff_with_diff_data.diff
Show inline comments
 
diff --git a/vcs/backends/base.py b/vcs/backends/base.py
 
index 212267ca23949807b8d89fa8ca495827dcfab3b1..ad17f16634da602503ed4ddd7cdd2e1ccdf4bed4 100644
 
--- a/vcs/backends/base.py
 
+++ b/vcs/backends/base.py
 
@@ -54,6 +54,7 @@ class BaseRepository(object):
 
     """
 
     scm = None
 
     DEFAULT_BRANCH_NAME = None
 
+    EMPTY_CHANGESET = '0' * 40
 
 
 
     def __init__(self, repo_path, create=False, **kwargs):
 
         """
 
@@ -204,6 +205,23 @@ class BaseRepository(object):
 
         """
 
         raise NotImplementedError
 
 
 
+    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+            context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        raise NotImplementedError
 
+
 
     # ========== #
 
     # COMMIT API #
 
     # ========== #
 
@@ -341,7 +359,6 @@ class BaseChangeset(object):
 
             otherwise; trying to access this attribute while there is no
 
             changesets would raise ``EmptyRepositoryError``
 
     """
 
-
 
     def __str__(self):
 
         return '<%s at %s:%s>' % (self.__class__.__name__, self.revision,
 
             self.short_id)
 
@@ -591,7 +608,6 @@ class BaseChangeset(object):
 
         return data
 
 
 
 
 
-
 
 class BaseWorkdir(object):
 
     """
 
     Working directory representation of single repository.
 
diff --git a/vcs/backends/git/repository.py b/vcs/backends/git/repository.py
 
index 8b9d1247fdee44e7a021b80e4965d8609cfd5720..e9f04e74dedd2f57417eb91dd2f4f7c61ec7e097 100644
 
--- a/vcs/backends/git/repository.py
 
+++ b/vcs/backends/git/repository.py
 
@@ -12,6 +12,7 @@
 
 import os
 
 import re
 
 import time
 
+import inspect
 
 import posixpath
 
 from dulwich.repo import Repo, NotGitRepository
 
 #from dulwich.config import ConfigFile
 
@@ -101,21 +102,6 @@ class GitRepository(BaseRepository):
 
                 "stderr:\n%s" % (cmd, se))
 
         return so, se
 
 
 
-    def _get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
-            context=3):
 
-        rev1 = self._get_revision(rev1)
 
-        rev2 = self._get_revision(rev2)
 
-        
 
-        if ignore_whitespace:
 
-            cmd = 'diff -U%s -w %s %s' % (context, rev1, rev2)
 
-        else:
 
-            cmd = 'diff -U%s %s %s' % (context, rev1, rev2)
 
-        if path:
 
-            cmd += ' -- "%s"' % path
 
-        so, se = self.run_git_command(cmd)
 
-
 
-        return so
 
-
 
     def _check_url(self, url):
 
         """
 
         Functon will check given url and try to verify if it's a valid
 
@@ -322,6 +308,8 @@ class GitRepository(BaseRepository):
 
         Returns ``GitChangeset`` object representing commit from git repository
 
         at the given revision or head (most recent commit) if None given.
 
         """
 
+        if isinstance(revision, GitChangeset):
 
+            return revision
 
         revision = self._get_revision(revision)
 
         changeset = GitChangeset(repository=self, revision=revision)
 
         return changeset
 
@@ -398,6 +386,49 @@ class GitRepository(BaseRepository):
 
         for rev in revs:
 
             yield self.get_changeset(rev)
 
 
 
+    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+            context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        flags = ['-U%s' % context]
 
+        if ignore_whitespace:
 
+            flags.append('-w')
 
+
 
+        if rev1 == self.EMPTY_CHANGESET:
 
+            rev2 = self.get_changeset(rev2).raw_id
 
+            cmd = ' '.join(['show'] + flags + [rev2])
 
+        else:
 
+            rev1 = self.get_changeset(rev1).raw_id
 
+            rev2 = self.get_changeset(rev2).raw_id
 
+            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 
+
 
+        if path:
 
+            cmd += ' -- "%s"' % path
 
+        stdout, stderr = self.run_git_command(cmd)
 
+        # If we used 'show' command, strip first few lines (until actual diff
 
+        # starts)
 
+        if rev1 == self.EMPTY_CHANGESET:
 
+            lines = stdout.splitlines()
 
+            x = 0
 
+            for line in lines:
 
+                if line.startswith('diff'):
 
+                    break
 
+                x += 1
 
+            # Append new line just like 'diff' command do
 
+            stdout = '\n'.join(lines[x:]) + '\n'
 
+        return stdout
 
+
 
     @LazyProperty
 
     def in_memory_changeset(self):
 
         """
 
diff --git a/vcs/backends/hg.py b/vcs/backends/hg.py
 
index f1f9f95e4d476ab01d8e7b02a8b59034c0740a3b..b7d63c552c39b2f8aaec17ef46551369c8b8e793 100644
 
--- a/vcs/backends/hg.py
 
+++ b/vcs/backends/hg.py
 
@@ -256,13 +256,32 @@ class MercurialRepository(BaseRepository):
 
 
 
         return map(lambda x: hex(x[7]), self._repo.changelog.index)[:-1]
 
 
 
-    def _get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                   context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        # Check if given revisions are present at repository (may raise
 
+        # ChangesetDoesNotExistError)
 
+        if rev1 != self.EMPTY_CHANGESET:
 
+            self.get_changeset(rev1)
 
+        self.get_changeset(rev2)
 
+
 
         file_filter = match(self.path, '', [path])
 
-        return patch.diff(self._repo, rev1, rev2, match=file_filter,
 
+        return ''.join(patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                           opts=diffopts(git=True,
 
                                         ignorews=ignore_whitespace,
 
-                                        context=context))
 
+                                        context=context)))
 
 
 
     def _check_url(self, url):
 
         """
 
diff --git a/vcs/tests/test_git.py b/vcs/tests/test_git.py
 
index 30da035a2a35c3dca14064778e97188b6d4ce5d6..d4b82b9e612af8bb5bf490a827377c7c2567735a 100644
 
--- a/vcs/tests/test_git.py
 
+++ b/vcs/tests/test_git.py
 
@@ -639,19 +639,19 @@ class GitSpecificWithRepoTest(BackendTestMixin, unittest.TestCase):
 
 
 
     def test_get_diff_runs_git_command_with_hashes(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff(0, 1)
 
+        self.repo.get_diff(0, 1)
 
         self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s' %
 
             (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
 
 
     def test_get_diff_runs_git_command_with_str_hashes(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff('0' * 40, 1)
 
-        self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s' %
 
-            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
+        self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
 
+        self.repo.run_git_command.assert_called_once_with('show -U%s %s' %
 
+            (3, self.repo._get_revision(1)))
 
 
 
     def test_get_diff_runs_git_command_with_path_if_its_given(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff(0, 1, 'foo')
 
+        self.repo.get_diff(0, 1, 'foo')
 
         self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s -- "foo"'
 
             % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
 
 
diff --git a/vcs/tests/test_repository.py b/vcs/tests/test_repository.py
 
index e34033e29fa9b3d3366b723beab129cee73869b9..b6e3f419778d6009229e9108824acaf83eea1784 100644
 
--- a/vcs/tests/test_repository.py
 
+++ b/vcs/tests/test_repository.py
 
@@ -1,9 +1,12 @@
 
 from __future__ import with_statement
 
+import datetime
 
 from base import BackendTestMixin
 
 from conf import SCM_TESTS
 
+from conf import TEST_USER_CONFIG_FILE
 
+from vcs.nodes import FileNode
 
 from vcs.utils.compat import unittest
 
+from vcs.exceptions import ChangesetDoesNotExistError
 
 
 
-from conf import TEST_USER_CONFIG_FILE
 
 
 
 class RepositoryBaseTest(BackendTestMixin):
 
     recreate_repo_per_test = False
 
@@ -29,6 +32,176 @@ class RepositoryBaseTest(BackendTestMixin):
 
             'foo.bar@example.com')
 
 
 
 
 
+
 
+class RepositoryGetDiffTest(BackendTestMixin):
 
+
 
+    @classmethod
 
+    def _get_commits(cls):
 
+        commits = [
 
+            {
 
+                'message': 'Initial commit',
 
+                'author': 'Joe Doe <joe.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 20),
 
+                'added': [
 
+                    FileNode('foobar', content='foobar'),
 
+                    FileNode('foobar2', content='foobar2'),
 
+                ],
 
+            },
 
+            {
 
+                'message': 'Changed foobar, added foobar3',
 
+                'author': 'Jane Doe <jane.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 21),
 
+                'added': [
 
+                    FileNode('foobar3', content='foobar3'),
 
+                ],
 
+                'changed': [
 
+                    FileNode('foobar', 'FOOBAR'),
 
+                ],
 
+            },
 
+            {
 
+                'message': 'Removed foobar, changed foobar3',
 
+                'author': 'Jane Doe <jane.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 22),
 
+                'changed': [
 
+                    FileNode('foobar3', content='FOOBAR\nFOOBAR\nFOOBAR\n'),
 
+                ],
 
+                'removed': [FileNode('foobar')],
 
+            },
 
+        ]
 
+        return commits
 
+
 
+    def test_raise_for_wrong(self):
 
+        with self.assertRaises(ChangesetDoesNotExistError):
 
+            self.repo.get_diff('a' * 40, 'b' * 40)
 
+
 
+class GitRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
+    backend_alias = 'git'
 
+
 
+    def test_initial_commit_diff(self):
 
+        initial_rev = self.repo.revisions[0]
 
+        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
+new file mode 100644
 
+index 0000000..f6ea049
 
+--- /dev/null
 
++++ b/foobar
 
+@@ -0,0 +1 @@
 
++foobar
 
+\ No newline at end of file
 
+diff --git a/foobar2 b/foobar2
 
+new file mode 100644
 
+index 0000000..e8c9d6b
 
+--- /dev/null
 
++++ b/foobar2
 
+@@ -0,0 +1 @@
 
++foobar2
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_second_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
+index f6ea049..389865b 100644
 
+--- a/foobar
 
++++ b/foobar
 
+@@ -1 +1 @@
 
+-foobar
 
+\ No newline at end of file
 
++FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+new file mode 100644
 
+index 0000000..c11c37d
 
+--- /dev/null
 
++++ b/foobar3
 
+@@ -0,0 +1 @@
 
++foobar3
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_third_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
+deleted file mode 100644
 
+index 389865b..0000000
 
+--- a/foobar
 
++++ /dev/null
 
+@@ -1 +0,0 @@
 
+-FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+index c11c37d..f932447 100644
 
+--- a/foobar3
 
++++ b/foobar3
 
+@@ -1 +1,3 @@
 
+-foobar3
 
+\ No newline at end of file
 
++FOOBAR
 
++FOOBAR
 
++FOOBAR
 
+''')
 
+
 
+
 
+class HgRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
+    backend_alias = 'hg'
 
+
 
+    def test_initial_commit_diff(self):
 
+        initial_rev = self.repo.revisions[0]
 
+        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar
 
+@@ -0,0 +1,1 @@
 
++foobar
 
+\ No newline at end of file
 
+diff --git a/foobar2 b/foobar2
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar2
 
+@@ -0,0 +1,1 @@
 
++foobar2
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_second_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
+--- a/foobar
 
++++ b/foobar
 
+@@ -1,1 +1,1 @@
 
+-foobar
 
+\ No newline at end of file
 
++FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar3
 
+@@ -0,0 +1,1 @@
 
++foobar3
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_third_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
+deleted file mode 100755
 
+--- a/foobar
 
++++ /dev/null
 
+@@ -1,1 +0,0 @@
 
+-FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+--- a/foobar3
 
++++ b/foobar3
 
+@@ -1,1 +1,3 @@
 
+-foobar3
 
+\ No newline at end of file
 
++FOOBAR
 
++FOOBAR
 
++FOOBAR
 
+''')
 
+
 
+
 
 # For each backend create test case class
 
 for alias in SCM_TESTS:
 
     attrs = {
 
@@ -38,7 +211,6 @@ for alias in SCM_TESTS:
 
     bases = (RepositoryBaseTest, unittest.TestCase)
 
     globals()[cls_name] = type(cls_name, bases, attrs)
 
 
 
-
 
 if __name__ == '__main__':
 
     unittest.main()
 
 
kallithea/tests/fixtures/hg_diff_mod_single_file_and_rename_and_chmod.diff
Show inline comments
 
diff --git a/README.rst b/README
 
old mode 100755
 
new mode 100644
 
rename from README.rst
 
rename to README
 
--- a/README.rst
 
+++ b/README
 
@@ -1,4 +1,7 @@
 
 readme2
 
 line 1
 
  line2
 

	
 
 
 
+line 1
 
+ line2
 
+
 
\ No newline at end of file
 
+
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
from __future__ import with_statement
 
from kallithea.tests import *
 
from kallithea.lib.diffs import DiffProcessor, NEW_FILENODE, DEL_FILENODE, \
 
    MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE, COPIED_FILENODE
 
from kallithea.tests.fixture import Fixture
 

	
 
fixture = Fixture()
 

	
 

	
 
DIFF_FIXTURES = {
 
    'hg_diff_add_single_binary_file.diff': [
 
        ('US Warszawa.jpg', 'A',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {NEW_FILENODE: 'new file 100755',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
    ],
 
    'hg_diff_mod_single_binary_file.diff': [
 
        ('US Warszawa.jpg', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
    ],
 

	
 
    'hg_diff_mod_single_file_and_rename_and_chmod.diff': [
 
        ('README', 'R',
 
         {'added': 3,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from README.rst to README',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
 
    ],
 
    'hg_diff_mod_file_and_rename.diff': [
 
        ('README.rst', 'R',
 
         {'added': 3,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from README to README.rst'}}),
 
    ],
 
    'hg_diff_del_single_binary_file.diff': [
 
        ('US Warszawa.jpg', 'D',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {DEL_FILENODE: 'deleted file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
    ],
 
    'hg_diff_chmod_and_mod_single_binary_file.diff': [
 
        ('gravatar.png', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
    ],
 
    'hg_diff_chmod.diff': [
 
        ('file', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {CHMOD_FILENODE: 'modified file chmod 100755 => 100644'}}),
 
    ],
 
    'hg_diff_rename_file.diff': [
 
        ('file_renamed', 'R',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {RENAMED_FILENODE: 'file renamed from file to file_renamed'}}),
 
    ],
 
    'hg_diff_rename_and_chmod_file.diff': [
 
        ('README', 'R',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755',
 
                  RENAMED_FILENODE: 'file renamed from README.rst to README'}}),
 
    ],
 
    'hg_diff_binary_and_normal.diff': [
 
        ('img/baseline-10px.png', 'A',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {NEW_FILENODE: 'new file 100644',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('img/baseline-20px.png', 'D',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {DEL_FILENODE: 'deleted file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('index.html', 'M',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('js/global.js', 'D',
 
         {'added': 0,
 
          'deleted': 75,
 
          'binary': False,
 
          'ops': {DEL_FILENODE: 'deleted file'}}),
 
        ('js/jquery/hashgrid.js', 'A',
 
         {'added': 340,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {NEW_FILENODE: 'new file 100755'}}),
 
        ('less/docs.less', 'M',
 
         {'added': 34,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('less/scaffolding.less', 'M',
 
         {'added': 1,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'M',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_chmod.diff': [
 
        ('work-horus.xls', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
 
    ],
 
    'git_diff_rename_file.diff': [
 
        ('file.xls', 'R',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}})
 
    ],
 
    'git_diff_mod_single_binary_file.diff': [
 
        ('US Warszawa.jpg', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'git_diff_binary_and_normal.diff': [
 
        ('img/baseline-10px.png', 'A',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {NEW_FILENODE: 'new file 100644',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('img/baseline-20px.png', 'D',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {DEL_FILENODE: 'deleted file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('index.html', 'M',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('js/global.js', 'D',
 
         {'added': 0,
 
          'deleted': 75,
 
          'binary': False,
 
          'ops': {DEL_FILENODE: 'deleted file'}}),
 
        ('js/jquery/hashgrid.js', 'A',
 
         {'added': 340,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {NEW_FILENODE: 'new file 100755'}}),
 
        ('less/docs.less', 'M',
 
         {'added': 34,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('less/scaffolding.less', 'M',
 
         {'added': 1,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'M',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'diff_with_diff_data.diff': [
 
        ('vcs/backends/base.py', 'M',
 
         {'added': 18,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/git/repository.py', 'M',
 
         {'added': 46,
 
          'deleted': 15,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/hg.py', 'M',
 
         {'added': 22,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_git.py', 'M',
 
         {'added': 5,
 
          'deleted': 5,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_repository.py', 'M',
 
         {'added': 174,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'M',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'M',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ]
 
}
 

	
 

	
 
class DiffLibTest(BaseTestCase):
 

	
 
    @parameterized.expand([(x,) for x in DIFF_FIXTURES])
 
    def test_diff(self, diff_fixture):
 

	
 
        diff = fixture.load_resource(diff_fixture)
 
        diff = fixture.load_resource(diff_fixture, strip=False)
 

	
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        self.assertListEqual(expected_data, data)
0 comments (0 inline, 0 general)