Changeset - b4a5632733d9
[Not reviewed]
stable
0 2 0
Branko Majic (branko) - 8 years ago 2018-02-13 16:23:55
branko@majic.rs
vcs: Fix internal server error when trying to get diff from Mercurial for paths that include globbing patterns (Issue #308):

- Treat passed-in paths as exact matches, dissallowing any type of
globbing/regex/pattern matching.
- Added accompanying tests.
2 files changed with 37 insertions and 1 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/vcs/backends/hg/repository.py
Show inline comments
 
@@ -76,385 +76,385 @@ class MercurialRepository(BaseRepository
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 
        # TODO: Following raises errors when using InMemoryChangeset...
 
        # return len(self._repo.changelog) == 0
 
        return len(self.revisions) == 0
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def branches(self):
 
        return self._get_branches()
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return self._get_branches(normal=False, closed=True)
 

	
 
    @LazyProperty
 
    def allbranches(self):
 
        """
 
        List all branches, including closed branches.
 
        """
 
        return self._get_branches(closed=True)
 

	
 
    def _get_branches(self, normal=True, closed=False):
 
        """
 
        Gets branches for this repository
 
        Returns only not closed branches by default
 

	
 
        :param closed: return also closed branches for mercurial
 
        :param normal: return also normal branches
 
        """
 

	
 
        if self._empty:
 
            return {}
 

	
 
        bt = OrderedDict()
 
        for bn, _heads, tip, isclosed in sorted(self._repo.branchmap().iterbranches()):
 
            if isclosed:
 
                if closed:
 
                    bt[safe_unicode(bn)] = hex(tip)
 
            else:
 
                if normal:
 
                    bt[safe_unicode(bn)] = hex(tip)
 

	
 
        return bt
 

	
 
    @LazyProperty
 
    def tags(self):
 
        """
 
        Gets tags for this repository
 
        """
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if self._empty:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]  # sort by name
 
        _tags = [(safe_unicode(n), hex(h),) for n, h in
 
                 self._repo.tags().items()]
 

	
 
        return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        local = kwargs.setdefault('local', False)
 

	
 
        if message is None:
 
            message = "Added tag %s for changeset %s" % (name,
 
                changeset.short_id)
 

	
 
        if date is None:
 
            date = datetime.datetime.now().ctime()
 

	
 
        try:
 
            self._repo.tag(name, changeset._ctx.node(), message, local, user,
 
                date)
 
        except Abort as e:
 
            raise RepositoryError(e.message)
 

	
 
        # Reinitialize tags
 
        self.tags = self._get_tags()
 
        tag_id = self.tags[name]
 

	
 
        return self.get_changeset(revision=tag_id)
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        if message is None:
 
            message = "Removed tag %s" % name
 
        if date is None:
 
            date = datetime.datetime.now().ctime()
 
        local = False
 

	
 
        try:
 
            self._repo.tag(name, nullid, message, local, user, date)
 
            self.tags = self._get_tags()
 
        except Abort as e:
 
            raise RepositoryError(e.message)
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Gets bookmarks for this repository
 
        """
 
        return self._get_bookmarks()
 

	
 
    def _get_bookmarks(self):
 
        if self._empty:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]  # sort by name
 
        _bookmarks = [(safe_unicode(n), hex(h),) for n, h in
 
                 self._repo._bookmarks.items()]
 
        return OrderedDict(sorted(_bookmarks, key=sortkey, reverse=True))
 

	
 
    def _get_all_revisions(self):
 

	
 
        return [self._repo[x].hex() for x in self._repo.filtered('visible').changelog.revs()]
 

	
 
    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                  context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``. If negative value is passed-in, it will be
 
          set to ``0`` instead.
 
        """
 

	
 
        # Negative context values make no sense, and will result in
 
        # errors. Ensure this does not happen.
 
        if context < 0:
 
            context = 0
 

	
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        # Check if given revisions are present at repository (may raise
 
        # ChangesetDoesNotExistError)
 
        if rev1 != self.EMPTY_CHANGESET:
 
            self.get_changeset(rev1)
 
        self.get_changeset(rev2)
 
        if path:
 
            file_filter = match(self.path, '', [path])
 
            file_filter = match(self.path, '', [path], exact=True)
 
        else:
 
            file_filter = None
 

	
 
        return ''.join(patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                          opts=diffopts(git=True,
 
                                        showfunc=True,
 
                                        ignorews=ignore_whitespace,
 
                                        context=context)))
 

	
 
    @classmethod
 
    def _check_url(cls, url, repoui=None):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, exception is also thrown
 
        when the return code is non 200
 
        """
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if url.startswith('ssh:'):
 
            # in case of invalid uri or authentication issues, sshpeer will
 
            # throw an exception.
 
            sshpeer(repoui or ui.ui(), url).lookup('tip')
 
            return True
 

	
 
        url_prefix = None
 
        if '+' in url[:url.find('://')]:
 
            url_prefix, url = url.split('+', 1)
 

	
 
        handlers = []
 
        url_obj = hg_url(url)
 
        test_uri, authinfo = url_obj.authinfo()
 
        url_obj.passwd = '*****'
 
        cleaned_uri = str(url_obj)
 

	
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        q = {"cmd": 'between'}
 
        q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception as e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        if not url_prefix: # skip svn+http://... (and git+... too)
 
            # now check if it's a proper hg repo
 
            try:
 
                httppeer(repoui or ui.ui(), url).lookup('tip')
 
            except Exception as e:
 
                raise urllib2.URLError(
 
                    "url [%s] does not look like an hg repo org_exc: %s"
 
                    % (cleaned_uri, e))
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 

	
 
        try:
 
            if src_url:
 
                url = str(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                MercurialRepository._check_url(url, self.baseui)
 
                clone(self.baseui, url, self.path, **opts)
 

	
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return localrepository(self.baseui, self.path, create=create)
 
        except (Abort, RepoError) as err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        _desc = self._repo.ui.config('web', 'description', None, untrusted=True)
 
        return safe_unicode(_desc or undefined_description)
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return safe_unicode(get_contact(self._repo.ui.config)
 
                            or undefined_contact)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            #fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
 
                return os.stat(cl_path).st_mtime
 
            else:
 
                return os.stat(st_path).st_mtime
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Gets an ID revision given as str. This will always return a fill
 
        40 char revision number
 

	
 
        :param revision: str or int or None
 
        """
 
        if isinstance(revision, unicode):
 
            revision = safe_str(revision)
 

	
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in [-1, 'tip', None]:
 
            revision = 'tip'
 

	
 
        try:
 
            revision = hex(self._repo.lookup(revision))
 
        except (LookupError, ):
 
            msg = ("Ambiguous identifier `%s` for %s" % (revision, self))
 
            raise ChangesetDoesNotExistError(msg)
 
        except (IndexError, ValueError, RepoLookupError, TypeError):
 
            msg = ("Revision %s does not exist for %s" % (revision, self))
 
            raise ChangesetDoesNotExistError(msg)
 

	
 
        return revision
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns revision number for the given reference.
 
        """
 
        ref_name = safe_str(ref_name)
 
        if ref_type == 'rev' and not ref_name.strip('0'):
 
            return self.EMPTY_CHANGESET
 
        # lookup up the exact node id
 
        _revset_predicates = {
 
                'branch': 'branch',
 
                'book': 'bookmark',
 
                'tag': 'tag',
 
                'rev': 'id',
 
            }
 
        # avoid expensive branch(x) iteration over whole repo
 
        rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
 
        try:
 
            revs = self._repo.revs(rev_spec, ref_name, ref_name)
 
        except LookupError:
 
            msg = ("Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name))
 
            raise ChangesetDoesNotExistError(msg)
kallithea/tests/vcs/test_repository.py
Show inline comments
 
import datetime
 
from kallithea.tests.vcs.base import _BackendTestMixin
 
from kallithea.tests.vcs.conf import SCM_TESTS
 
from kallithea.tests.vcs.conf import TEST_USER_CONFIG_FILE
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils.compat import unittest
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 

	
 

	
 
class RepositoryBaseTest(_BackendTestMixin):
 
    recreate_repo_per_test = False
 

	
 
    @classmethod
 
    def _get_commits(cls):
 
        return super(RepositoryBaseTest, cls)._get_commits()[:1]
 

	
 
    def test_get_config_value(self):
 
        self.assertEqual(self.repo.get_config_value('universal', 'foo',
 
            TEST_USER_CONFIG_FILE), 'bar')
 

	
 
    def test_get_config_value_defaults_to_None(self):
 
        self.assertEqual(self.repo.get_config_value('universal', 'nonexist',
 
            TEST_USER_CONFIG_FILE), None)
 

	
 
    def test_get_user_name(self):
 
        self.assertEqual(self.repo.get_user_name(TEST_USER_CONFIG_FILE),
 
            'Foo Bar')
 

	
 
    def test_get_user_email(self):
 
        self.assertEqual(self.repo.get_user_email(TEST_USER_CONFIG_FILE),
 
            'foo.bar@example.com')
 

	
 
    def test_repo_equality(self):
 
        self.assertTrue(self.repo == self.repo)
 

	
 
    def test_repo_equality_broken_object(self):
 
        import copy
 
        _repo = copy.copy(self.repo)
 
        delattr(_repo, 'path')
 
        self.assertTrue(self.repo != _repo)
 

	
 
    def test_repo_equality_other_object(self):
 
        class dummy(object):
 
            path = self.repo.path
 
        self.assertTrue(self.repo != dummy())
 

	
 

	
 
class RepositoryGetDiffTest(_BackendTestMixin):
 

	
 
    @classmethod
 
    def _get_commits(cls):
 
        commits = [
 
            {
 
                'message': 'Initial commit',
 
                'author': 'Joe Doe <joe.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 20),
 
                'added': [
 
                    FileNode('foobar', content='foobar'),
 
                    FileNode('foobar2', content='foobar2'),
 
                ],
 
            },
 
            {
 
                'message': 'Changed foobar, added foobar3',
 
                'author': 'Jane Doe <jane.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 21),
 
                'added': [
 
                    FileNode('foobar3', content='foobar3'),
 
                ],
 
                'changed': [
 
                    FileNode('foobar', 'FOOBAR'),
 
                ],
 
            },
 
            {
 
                'message': 'Removed foobar, changed foobar3',
 
                'author': 'Jane Doe <jane.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 22),
 
                'changed': [
 
                    FileNode('foobar3', content='FOOBAR\nFOOBAR\nFOOBAR\n'),
 
                ],
 
                'removed': [FileNode('foobar')],
 
            },
 
            {
 
                'message': u'Commit that contains glob pattern in filename',
 
                'author': 'Jane Doe <jane.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 22),
 
                'added': [
 
                    FileNode('README{', content='Strangely-named README file'),
 
                ],
 
            },
 
        ]
 
        return commits
 

	
 
    def test_raise_for_wrong(self):
 
        with self.assertRaises(ChangesetDoesNotExistError):
 
            self.repo.get_diff('a' * 40, 'b' * 40)
 

	
 
    def test_glob_patterns_in_filename_do_not_raise_exception(self):
 
        revs = self.repo.revisions
 

	
 
        diff = self.repo.get_diff(revs[2], revs[3], path='README{') # should not raise
 

	
 

	
 
class GitRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
    backend_alias = 'git'
 

	
 
    def test_initial_commit_diff(self):
 
        initial_rev = self.repo.revisions[0]
 
        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
new file mode 100644
 
index 0000000000000000000000000000000000000000..f6ea0495187600e7b2288c8ac19c5886383a4632
 
--- /dev/null
 
+++ b/foobar
 
@@ -0,0 +1 @@
 
+foobar
 
\ No newline at end of file
 
diff --git a/foobar2 b/foobar2
 
new file mode 100644
 
index 0000000000000000000000000000000000000000..e8c9d6b98e3dce993a464935e1a53f50b56a3783
 
--- /dev/null
 
+++ b/foobar2
 
@@ -0,0 +1 @@
 
+foobar2
 
\ No newline at end of file
 
''')
 

	
 
    def test_second_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
index f6ea0495187600e7b2288c8ac19c5886383a4632..389865bb681b358c9b102d79abd8d5f941e96551 100644
 
--- a/foobar
 
+++ b/foobar
 
@@ -1 +1 @@
 
-foobar
 
\ No newline at end of file
 
+FOOBAR
 
\ No newline at end of file
 
diff --git a/foobar3 b/foobar3
 
new file mode 100644
 
index 0000000000000000000000000000000000000000..c11c37d41d33fb47741cff93fa5f9d798c1535b0
 
--- /dev/null
 
+++ b/foobar3
 
@@ -0,0 +1 @@
 
+foobar3
 
\ No newline at end of file
 
''')
 

	
 
    def test_third_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
deleted file mode 100644
 
index 389865bb681b358c9b102d79abd8d5f941e96551..0000000000000000000000000000000000000000
 
--- a/foobar
 
+++ /dev/null
 
@@ -1 +0,0 @@
 
-FOOBAR
 
\ No newline at end of file
 
diff --git a/foobar3 b/foobar3
 
index c11c37d41d33fb47741cff93fa5f9d798c1535b0..f9324477362684ff692aaf5b9a81e01b9e9a671c 100644
 
--- a/foobar3
 
+++ b/foobar3
 
@@ -1 +1,3 @@
 
-foobar3
 
\ No newline at end of file
 
+FOOBAR
 
+FOOBAR
 
+FOOBAR
 
''')
 

	
 
    def test_fourth_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[2], revs[3]), '''diff --git a/README{ b/README{
 
new file mode 100644
 
index 0000000000000000000000000000000000000000..cdc0c1b5d234feedb37bbac19cd1b6442061102d
 
--- /dev/null
 
+++ b/README{
 
@@ -0,0 +1 @@
 
+Strangely-named README file
 
\ No newline at end of file
 
''')
 

	
 

	
 
class HgRepositoryGetDiffTest(RepositoryGetDiffTest, unittest.TestCase):
 
    backend_alias = 'hg'
 

	
 
    def test_initial_commit_diff(self):
 
        initial_rev = self.repo.revisions[0]
 
        self.assertEqual(self.repo.get_diff(self.repo.EMPTY_CHANGESET, initial_rev), '''diff --git a/foobar b/foobar
 
new file mode 100644
 
--- /dev/null
 
+++ b/foobar
 
@@ -0,0 +1,1 @@
 
+foobar
 
\ No newline at end of file
 
diff --git a/foobar2 b/foobar2
 
new file mode 100644
 
--- /dev/null
 
+++ b/foobar2
 
@@ -0,0 +1,1 @@
 
+foobar2
 
\ No newline at end of file
 
''')
 

	
 
    def test_second_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[0], revs[1]), '''diff --git a/foobar b/foobar
 
--- a/foobar
 
+++ b/foobar
 
@@ -1,1 +1,1 @@
 
-foobar
 
\ No newline at end of file
 
+FOOBAR
 
\ No newline at end of file
 
diff --git a/foobar3 b/foobar3
 
new file mode 100644
 
--- /dev/null
 
+++ b/foobar3
 
@@ -0,0 +1,1 @@
 
+foobar3
 
\ No newline at end of file
 
''')
 

	
 
    def test_third_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[1], revs[2]), '''diff --git a/foobar b/foobar
 
deleted file mode 100644
 
--- a/foobar
 
+++ /dev/null
 
@@ -1,1 +0,0 @@
 
-FOOBAR
 
\ No newline at end of file
 
diff --git a/foobar3 b/foobar3
 
--- a/foobar3
 
+++ b/foobar3
 
@@ -1,1 +1,3 @@
 
-foobar3
 
\ No newline at end of file
 
+FOOBAR
 
+FOOBAR
 
+FOOBAR
 
''')
 

	
 
    def test_fourth_changeset_diff(self):
 
        revs = self.repo.revisions
 
        self.assertEqual(self.repo.get_diff(revs[2], revs[3]), '''diff --git a/README{ b/README{
 
new file mode 100644
 
--- /dev/null
 
+++ b/README{
 
@@ -0,0 +1,1 @@
 
+Strangely-named README file
 
\ No newline at end of file
 
''')
 

	
 

	
 
# For each backend create test case class
 
for alias in SCM_TESTS:
 
    attrs = {
 
        'backend_alias': alias,
 
    }
 
    cls_name = alias.capitalize() + RepositoryBaseTest.__name__
 
    bases = (RepositoryBaseTest, unittest.TestCase)
 
    globals()[cls_name] = type(cls_name, bases, attrs)
 

	
 
if __name__ == '__main__':
 
    unittest.main()
0 comments (0 inline, 0 general)