Changeset - 0ed42ca7ff9e
[Not reviewed]
beta
0 2 1
Marcin Kuzminski - 13 years ago 2012-11-23 23:01:27
marcin@python-works.com
Fixed issue with inproper handling of diff parsing that could lead to infinit loops.
This was an edge case when diff contained diff data inside. Regresion test was
added
3 files changed with 436 insertions and 12 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/diffs.py
Show inline comments
 
@@ -157,14 +157,14 @@ class LimitedDiffContainer(object):
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = '\\ No newline at end of file\n'
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        #^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>\S+)\n
 
           ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
 
@@ -345,12 +345,18 @@ class DiffProcessor(object):
 
            match = self._hg_header_re.match(diff_chunk)
 
            diff = diff_chunk[match.end():]
 
            return match.groupdict(), imap(self._escaper, diff.splitlines(1))
 
        else:
 
            raise Exception('VCS type %s is not supported' % self.vcs)
 

	
 
    def _clean_line(self, line, command):
 
        if command in ['+', '-', ' ']:
 
            #only modify the line if it's actually a diff thing
 
            line = line[1:]
 
        return line
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
        _files = []
 
        diff_container = lambda arg: arg
 

	
 
        ##split the diff in chunks of separate --git a/file b/file chunks
 
        for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
 
@@ -486,20 +492,15 @@ class DiffProcessor(object):
 
                            'line':       line,
 
                        })
 

	
 
                line = lineiter.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    command = ' '
 
                    if line:
 
                        command = line[0]
 
                        if command in ['+', '-', ' ']:
 
                            #only modify the line if it's actually a diff
 
                            # thing
 
                            line = line[1:]
 
                    else:
 
                        command = ' '
 

	
 
                    affects_old = affects_new = False
 

	
 
                    # ignore those if we don't expect them
 
                    if command in '#@':
 
                        continue
 
@@ -512,32 +513,32 @@ class DiffProcessor(object):
 
                        action = 'del'
 
                        stats[1] += 1
 
                    else:
 
                        affects_old = affects_new = True
 
                        action = 'unmod'
 

	
 
                    if line != self._newline_marker:
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         line
 
                            'line':         self._clean_line(line, command)
 
                        })
 

	
 
                    line = lineiter.next()
 

	
 
                    if line == self._newline_marker:
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line
 
                            'line':         self._clean_line(line, command)
 
                        })
 

	
 
        except StopIteration:
 
            pass
 
        return chunks, stats
 

	
rhodecode/tests/fixtures/diff_with_diff_data.diff
Show inline comments
 
new file 100644
 
diff --git a/vcs/backends/base.py b/vcs/backends/base.py
 
index 212267ca23949807b8d89fa8ca495827dcfab3b1..ad17f16634da602503ed4ddd7cdd2e1ccdf4bed4 100644
 
--- a/vcs/backends/base.py
 
+++ b/vcs/backends/base.py
 
@@ -54,6 +54,7 @@ class BaseRepository(object):
 
     """
 
     scm = None
 
     DEFAULT_BRANCH_NAME = None
 
+    EMPTY_CHANGESET = '0' * 40
 
 
 
     def __init__(self, repo_path, create=False, **kwargs):
 
         """
 
@@ -204,6 +205,23 @@ class BaseRepository(object):
 
         """
 
         raise NotImplementedError
 
 
 
+    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+            context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        raise NotImplementedError
 
+
 
     # ========== #
 
     # COMMIT API #
 
     # ========== #
 
@@ -341,7 +359,6 @@ class BaseChangeset(object):
 
             otherwise; trying to access this attribute while there is no
 
             changesets would raise ``EmptyRepositoryError``
 
     """
 
-
 
     def __str__(self):
 
         return '<%s at %s:%s>' % (self.__class__.__name__, self.revision,
 
             self.short_id)
 
@@ -591,7 +608,6 @@ class BaseChangeset(object):
 
         return data
 
 
 
 
 
-
 
 class BaseWorkdir(object):
 
     """
 
     Working directory representation of single repository.
 
diff --git a/vcs/backends/git/repository.py b/vcs/backends/git/repository.py
 
index 8b9d1247fdee44e7a021b80e4965d8609cfd5720..e9f04e74dedd2f57417eb91dd2f4f7c61ec7e097 100644
 
--- a/vcs/backends/git/repository.py
 
+++ b/vcs/backends/git/repository.py
 
@@ -12,6 +12,7 @@
 
 import os
 
 import re
 
 import time
 
+import inspect
 
 import posixpath
 
 from dulwich.repo import Repo, NotGitRepository
 
 #from dulwich.config import ConfigFile
 
@@ -101,21 +102,6 @@ class GitRepository(BaseRepository):
 
                 "stderr:\n%s" % (cmd, se))
 
         return so, se
 
 
 
-    def _get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
-            context=3):
 
-        rev1 = self._get_revision(rev1)
 
-        rev2 = self._get_revision(rev2)
 
-        
 
-        if ignore_whitespace:
 
-            cmd = 'diff -U%s -w %s %s' % (context, rev1, rev2)
 
-        else:
 
-            cmd = 'diff -U%s %s %s' % (context, rev1, rev2)
 
-        if path:
 
-            cmd += ' -- "%s"' % path
 
-        so, se = self.run_git_command(cmd)
 
-
 
-        return so
 
-
 
     def _check_url(self, url):
 
         """
 
         Functon will check given url and try to verify if it's a valid
 
@@ -322,6 +308,8 @@ class GitRepository(BaseRepository):
 
         Returns ``GitChangeset`` object representing commit from git repository
 
         at the given revision or head (most recent commit) if None given.
 
         """
 
+        if isinstance(revision, GitChangeset):
 
+            return revision
 
         revision = self._get_revision(revision)
 
         changeset = GitChangeset(repository=self, revision=revision)
 
         return changeset
 
@@ -398,6 +386,49 @@ class GitRepository(BaseRepository):
 
         for rev in revs:
 
             yield self.get_changeset(rev)
 
 
 
+    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+            context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        flags = ['-U%s' % context]
 
+        if ignore_whitespace:
 
+            flags.append('-w')
 
+
 
+        if rev1 == self.EMPTY_CHANGESET:
 
+            rev2 = self.get_changeset(rev2).raw_id
 
+            cmd = ' '.join(['show'] + flags + [rev2])
 
+        else:
 
+            rev1 = self.get_changeset(rev1).raw_id
 
+            rev2 = self.get_changeset(rev2).raw_id
 
+            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 
+
 
+        if path:
 
+            cmd += ' -- "%s"' % path
 
+        stdout, stderr = self.run_git_command(cmd)
 
+        # If we used 'show' command, strip first few lines (until actual diff
 
+        # starts)
 
+        if rev1 == self.EMPTY_CHANGESET:
 
+            lines = stdout.splitlines()
 
+            x = 0
 
+            for line in lines:
 
+                if line.startswith('diff'):
 
+                    break
 
+                x += 1
 
+            # Append new line just like 'diff' command do
 
+            stdout = '\n'.join(lines[x:]) + '\n'
 
+        return stdout
 
+
 
     @LazyProperty
 
     def in_memory_changeset(self):
 
         """
 
diff --git a/vcs/backends/hg.py b/vcs/backends/hg.py
 
index f1f9f95e4d476ab01d8e7b02a8b59034c0740a3b..b7d63c552c39b2f8aaec17ef46551369c8b8e793 100644
 
--- a/vcs/backends/hg.py
 
+++ b/vcs/backends/hg.py
 
@@ -256,13 +256,32 @@ class MercurialRepository(BaseRepository):
 
 
 
         return map(lambda x: hex(x[7]), self._repo.changelog.index)[:-1]
 
 
 
-    def _get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
+    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                   context=3):
 
+        """
 
+        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
+        ``rev2`` since ``rev1``.
 
+
 
+        :param rev1: Entry point from which diff is shown. Can be
 
+          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
+          the changes since empty state of the repository until ``rev2``
 
+        :param rev2: Until which revision changes should be shown.
 
+        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
+          changes. Defaults to ``False``.
 
+        :param context: How many lines before/after changed lines should be
 
+          shown. Defaults to ``3``.
 
+        """
 
+        # Check if given revisions are present at repository (may raise
 
+        # ChangesetDoesNotExistError)
 
+        if rev1 != self.EMPTY_CHANGESET:
 
+            self.get_changeset(rev1)
 
+        self.get_changeset(rev2)
 
+
 
         file_filter = match(self.path, '', [path])
 
-        return patch.diff(self._repo, rev1, rev2, match=file_filter,
 
+        return ''.join(patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                           opts=diffopts(git=True,
 
                                         ignorews=ignore_whitespace,
 
-                                        context=context))
 
+                                        context=context)))
 
 
 
     def _check_url(self, url):
 
         """
 
diff --git a/vcs/tests/test_git.py b/vcs/tests/test_git.py
 
index 30da035a2a35c3dca14064778e97188b6d4ce5d6..d4b82b9e612af8bb5bf490a827377c7c2567735a 100644
 
--- a/vcs/tests/test_git.py
 
+++ b/vcs/tests/test_git.py
 
@@ -639,19 +639,19 @@ class GitSpecificWithRepoTest(BackendTestMixin, unittest.TestCase):
 
 
 
     def test_get_diff_runs_git_command_with_hashes(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff(0, 1)
 
+        self.repo.get_diff(0, 1)
 
         self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s' %
 
             (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
 
 
     def test_get_diff_runs_git_command_with_str_hashes(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff('0' * 40, 1)
 
-        self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s' %
 
-            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
+        self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
 
+        self.repo.run_git_command.assert_called_once_with('show -U%s %s' %
 
+            (3, self.repo._get_revision(1)))
 
 
 
     def test_get_diff_runs_git_command_with_path_if_its_given(self):
 
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
-        self.repo._get_diff(0, 1, 'foo')
 
+        self.repo.get_diff(0, 1, 'foo')
 
         self.repo.run_git_command.assert_called_once_with('diff -U%s %s %s -- "foo"'
 
             % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
 
 
diff --git a/vcs/tests/test_repository.py b/vcs/tests/test_repository.py
 
index e34033e29fa9b3d3366b723beab129cee73869b9..b6e3f419778d6009229e9108824acaf83eea1784 100644
 
--- a/vcs/tests/test_repository.py
 
+++ b/vcs/tests/test_repository.py
 
@@ -1,9 +1,12 @@
 
 from __future__ import with_statement
 
+import datetime
 
 from base import BackendTestMixin
 
 from conf import SCM_TESTS
 
+from conf import TEST_USER_CONFIG_FILE
 
+from vcs.nodes import FileNode
 
 from vcs.utils.compat import unittest
 
+from vcs.exceptions import ChangesetDoesNotExistError
 
 
 
-from conf import TEST_USER_CONFIG_FILE
 
 
 
 class RepositoryBaseTest(BackendTestMixin):
 
     recreate_repo_per_test = False
 
@@ -29,6 +32,176 @@ class RepositoryBaseTest(BackendTestMixin):
 
             'foo.bar@example.com')
 
 
 
 
 
+
 
+class RepositoryGetDiffTest(BackendTestMixin):
 
+
 
+    @classmethod
 
+    def _get_commits(cls):
 
+        commits = [
 
+            {
 
+                'message': 'Initial commit',
 
+                'author': 'Joe Doe <joe.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 20),
 
+                'added': [
 
+                    FileNode('foobar', content='foobar'),
 
+                    FileNode('foobar2', content='foobar2'),
 
+                ],
 
+            },
 
+            {
 
+                'message': 'Changed foobar, added foobar3',
 
+                'author': 'Jane Doe <jane.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 21),
 
+                'added': [
 
+                    FileNode('foobar3', content='foobar3'),
 
+                ],
 
+                'changed': [
 
+                    FileNode('foobar', 'FOOBAR'),
 
+                ],
 
+            },
 
+            {
 
+                'message': 'Removed foobar, changed foobar3',
 
+                'author': 'Jane Doe <jane.doe@example.com>',
 
+                'date': datetime.datetime(2010, 1, 1, 22),
 
+                'changed': [
 
+                    FileNode('foobar3', content='FOOBAR\nFOOBAR\nFOOBAR\n'),
 
+                ],
 
+                'removed': [FileNode('foobar')],
 
+            },
 
+        ]
 
+        return commits
 
+
 
+    def test_raise_for_wrong(self):
 
+        with self.assertRaises(ChangesetDoesNotExistError):
 
+            self.repo.get_diff('a' * 40, 'b' * 40)
 
+
 
+class GitRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
+    backend_alias = 'git'
 
+
 
+    def test_initial_commit_diff(self):
 
+        initial_rev = self.repo.revisions[0]
 
+        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
+new file mode 100644
 
+index 0000000..f6ea049
 
+--- /dev/null
 
++++ b/foobar
 
+@@ -0,0 +1 @@
 
++foobar
 
+\ No newline at end of file
 
+diff --git a/foobar2 b/foobar2
 
+new file mode 100644
 
+index 0000000..e8c9d6b
 
+--- /dev/null
 
++++ b/foobar2
 
+@@ -0,0 +1 @@
 
++foobar2
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_second_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
+index f6ea049..389865b 100644
 
+--- a/foobar
 
++++ b/foobar
 
+@@ -1 +1 @@
 
+-foobar
 
+\ No newline at end of file
 
++FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+new file mode 100644
 
+index 0000000..c11c37d
 
+--- /dev/null
 
++++ b/foobar3
 
+@@ -0,0 +1 @@
 
++foobar3
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_third_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
+deleted file mode 100644
 
+index 389865b..0000000
 
+--- a/foobar
 
++++ /dev/null
 
+@@ -1 +0,0 @@
 
+-FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+index c11c37d..f932447 100644
 
+--- a/foobar3
 
++++ b/foobar3
 
+@@ -1 +1,3 @@
 
+-foobar3
 
+\ No newline at end of file
 
++FOOBAR
 
++FOOBAR
 
++FOOBAR
 
+''')
 
+
 
+
 
+class HgRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
+    backend_alias = 'hg'
 
+
 
+    def test_initial_commit_diff(self):
 
+        initial_rev = self.repo.revisions[0]
 
+        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar
 
+@@ -0,0 +1,1 @@
 
++foobar
 
+\ No newline at end of file
 
+diff --git a/foobar2 b/foobar2
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar2
 
+@@ -0,0 +1,1 @@
 
++foobar2
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_second_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
+--- a/foobar
 
++++ b/foobar
 
+@@ -1,1 +1,1 @@
 
+-foobar
 
+\ No newline at end of file
 
++FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+new file mode 100755
 
+--- /dev/null
 
++++ b/foobar3
 
+@@ -0,0 +1,1 @@
 
++foobar3
 
+\ No newline at end of file
 
+''')
 
+
 
+    def test_third_changeset_diff(self):
 
+        revs = self.repo.revisions
 
+        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
+deleted file mode 100755
 
+--- a/foobar
 
++++ /dev/null
 
+@@ -1,1 +0,0 @@
 
+-FOOBAR
 
+\ No newline at end of file
 
+diff --git a/foobar3 b/foobar3
 
+--- a/foobar3
 
++++ b/foobar3
 
+@@ -1,1 +1,3 @@
 
+-foobar3
 
+\ No newline at end of file
 
++FOOBAR
 
++FOOBAR
 
++FOOBAR
 
+''')
 
+
 
+
 
 # For each backend create test case class
 
 for alias in SCM_TESTS:
 
     attrs = {
 
@@ -38,7 +211,6 @@ for alias in SCM_TESTS:
 
     bases = (RepositoryBaseTest, unittest.TestCase)
 
     globals()[cls_name] = type(cls_name, bases, attrs)
 
 
 
-
 
 if __name__ == '__main__':
 
     unittest.main()
rhodecode/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -50,12 +50,19 @@ DIFF_FIXTURES = {
 
        (u'less/docs.less',        'M', [34, 0]),
 
        (u'less/scaffolding.less', 'M', [1, 3]),
 
        (u'readme.markdown',       'M', [1, 10]),
 
        (u'img/baseline-20px.png', 'D', ['b', DEL_FILENODE]),
 
        (u'js/global.js',          'D', [0, 75])
 
    ],
 
    'diff_with_diff_data.diff': [
 
        (u'vcs/backends/base.py', 'M', [18, 2]),
 
        (u'vcs/backends/git/repository.py', 'M', [46, 15]),
 
        (u'vcs/backends/hg.py', 'M', [22, 3]),
 
        (u'vcs/tests/test_git.py', 'M', [5, 5]),
 
        (u'vcs/tests/test_repository.py', 'M', [174, 2])
 
    ],
 
#    'large_diff.diff': [
 
#
 
#    ],
 

	
 

	
 
}
0 comments (0 inline, 0 general)