Changeset - c417ef1f43b1
[Not reviewed]
default
0 2 1
Mads Kiilerich - 10 years ago 2015-06-09 22:50:20
madski@unity3d.com
diffs: avoid conflicts between inline diff mechanism and special markup

It would sometimes emit markup like
<pre><ins><u</ins> <ins>class</ins><ins>=</ins><ins>"cr</ins><ins>"></u></ins></pre>
instead of
<pre><ins><u class="cr"></u></ins></pre>
3 files changed with 48 insertions and 2 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -100,194 +100,194 @@ def wrapped_diff(filenode_old, filenode_
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 

	
 
    :param ignore_whitespace: ignore whitespaces in diff
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffLimitExceeded(Exception):
 
    pass
 

	
 

	
 
class LimitedDiffContainer(object):
 

	
 
    def __init__(self, diff_limit, cur_diff_size, diff):
 
        self.diff = diff
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = cur_diff_size
 

	
 
    def __iter__(self):
 
        for l in self.diff:
 
            yield l
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        # has already been split on this:
 
        # ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        # has already been split on this:
 
        # ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
           ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    #used for inline highlighter word split
 
    _token_re = re.compile(r'()(&gt;|&lt;|&amp;|<u>\t</u>| <i></i>|\W+?)')
 
    # Used for inline highlighter word split, must match the substitutions in _escaper
 
    _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
    def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param format: format of diff passed, `udiff` or `gitdiff`
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self._format = format
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 

	
 
        if format == 'gitdiff':
 
            self.differ = self._highlight_line_difflib
 
            self._parser = self._parse_gitdiff
 
        else:
 
            self.differ = self._highlight_line_udiff
 
            self._parser = self._parse_udiff
 

	
 
    def _copy_iterator(self):
 
        """
 
        make a fresh copy of generator, we should not iterate thru
 
        an original as it's needed for repeating operations on
 
        this instance of DiffProcessor
 
        """
 
        self.__udiff, iterator_copy = tee(self.__udiff)
 
        return iterator_copy
 

	
 
    def _escaper(self, string):
 
        """
 
        Escaper for diff escapes special chars and checks the diff limit
 

	
 
        :param string:
 
        """
 

	
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return '<u class="cr"></u>'
 
            if groups[5]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _line_counter(self, l):
 
        """
 
        Checks each line and bumps total adds/removes for this diff
 

	
 
        :param l:
 
        """
 
        if l.startswith('+') and not l.startswith('+++'):
 
            self.adds += 1
 
        elif l.startswith('-') and not l.startswith('---'):
 
            self.removes += 1
 
        return safe_unicode(l)
 

	
 
    def _highlight_line_difflib(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 

	
 
        if line['action'] == 'del':
 
            old, new = line, next_
 
        else:
kallithea/tests/fixtures/markuptest.diff
Show inline comments
 
new file 100644
 
diff --git a/f b/f
 
--- a/f	
 
+++ b/f	
 
@@ -51,5 +51,12 @@
 
 	begin();
 
 	
 
+	int foo;
 
+	int bar; 
 
+	int baz;	
 
+	int space; 
 
+	int tab;	
 
+	
 
  
 
-	#define MAX_STEPS (48)
 
+	
 
+	#define MAX_STEPS (64)
 
 
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -182,96 +182,125 @@ DIFF_FIXTURES = {
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'M',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'diff_with_diff_data.diff': [
 
        ('vcs/backends/base.py', 'M',
 
         {'added': 18,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/git/repository.py', 'M',
 
         {'added': 46,
 
          'deleted': 15,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/hg.py', 'M',
 
         {'added': 22,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_git.py', 'M',
 
         {'added': 5,
 
          'deleted': 5,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_repository.py', 'M',
 
         {'added': 174,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_modify_binary_file.diff': [
 
        ('file.name', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'M',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'M',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_rename_space_cr.diff': [
 
        ('oh yes', 'R',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
}
 

	
 

	
 
class DiffLibTest(BaseTestCase):
 

	
 
    @parameterized.expand([(x,) for x in DIFF_FIXTURES])
 
    def test_diff(self, diff_fixture):
 
        diff = fixture.load_resource(diff_fixture, strip=False)
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        self.assertListEqual(expected_data, data)
 

	
 
    def test_diff_markup(self):
 
        diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        chunks = diff_proc_d[0]['chunks']
 
        self.assertFalse(chunks[0])
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        print s
 
        self.assertEqual(s, r'''
 
context ... ... u'@@ -51,5 +51,12 @@\n'
 
unmod    51  51 u'<u>\t</u>begin();\n'
 
unmod    52  52 u'<u>\t</u>\n'
 
add      53     u'<u>\t</u>int foo;<u class="cr"></u>\n'
 
add      54     u'<u>\t</u>int bar; <u class="cr"></u>\n'
 
add      55     u'<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
 
add      56     u'<u>\t</u>int space; <i></i>'
 
add      57     u'<u>\t</u>int tab;<u>\t</u>\n'
 
add      58     u'<u>\t</u>\n'
 
unmod    59  53 u' <i></i>'
 
del          54 u'<u>\t</u><del>#define MAX_STEPS (48)</del>\n'
 
add      60     u'<u>\t</u><ins><u class="cr"></u></ins>\n'
 
add      61     u'<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
 
unmod    62  55 u'\n'
 
''')
0 comments (0 inline, 0 general)