Changeset - 182f46d62ab4
[Not reviewed]
default
0 3 0
Mads Kiilerich - 8 years ago 2017-05-30 02:59:45
mads@kiilerich.com
repository: fix crash when forking repositories with unicode names
3 files changed with 26 insertions and 5 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Git repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import os
 
import re
 
import time
 
import errno
 
import urllib
 
import urllib2
 
import logging
 
import posixpath
 

	
 
from dulwich.objects import Tag
 
from dulwich.repo import Repo, NotGitRepository
 
from dulwich.config import ConfigFile
 

	
 
from kallithea.lib.vcs import subprocessio
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.conf import settings
 

	
 
from kallithea.lib.vcs.exceptions import (
 
    BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError,
 
    RepositoryError, TagAlreadyExistError, TagDoesNotExistError
 
)
 
from kallithea.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 
from kallithea.lib.vcs.utils import safe_str, safe_unicode, makedate, date_fromtimestamp
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.ordered_dict import OrderedDict
 
from kallithea.lib.vcs.utils.paths import abspath, get_user_home
 

	
 
from kallithea.lib.vcs.utils.hgcompat import (
 
    hg_url, httpbasicauthhandler, httpdigestauthhandler
 
)
 

	
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .workdir import GitWorkdir
 

	
 
SHA_PATTERN = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = safe_unicode(abspath(repo_path))
 
        repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        self.bare = repo.bare
 

	
 
    @property
 
    def _config_files(self):
 
        return [
 
            self.bare and abspath(self.path, 'config')
 
                      or abspath(self.path, '.git', 'config'),
 
             abspath(get_user_home(), '.gitconfig'),
 
         ]
 

	
 
    @property
 
    def _repo(self):
 
        return Repo(self.path)
 

	
 
    @property
 
    def head(self):
 
        try:
 
            return self._repo.head()
 
        except KeyError:
 
@@ -283,97 +283,97 @@ class GitRepository(BaseRepository):
 

	
 
        is_bstr = isinstance(revision, (str, unicode))
 
        if ((is_bstr and revision.isdigit() and len(revision) < 12)
 
            or isinstance(revision, int) or is_null(revision)):
 
            try:
 
                revision = self.revisions[int(revision)]
 
            except IndexError:
 
                msg = ("Revision %s does not exist for %s" % (revision, self))
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        elif is_bstr:
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(revision)
 
            if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
 
                return _ref_revision[0]
 

	
 
            _tags_shas = self.tags.values()
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            if revision in _tags_shas:
 
                return _tags_shas[_tags_shas.index(revision)]
 

	
 
            elif not SHA_PATTERN.match(revision) or revision not in self.revisions:
 
                msg = ("Revision %s does not exist for %s" % (revision, self))
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        # Ensure we return full id
 
        if not SHA_PATTERN.match(str(revision)):
 
            raise ChangesetDoesNotExistError("Given revision %s not recognized"
 
                % revision)
 
        return revision
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return self._get_revision(ref_name)
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
                yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        url = str(url)
 
        url = safe_str(url)
 
        if url != 'default' and not '://' in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    def get_hook_location(self):
 
        """
 
        returns absolute path to location where hooks are stored
 
        """
 
        loc = os.path.join(self.path, 'hooks')
 
        if not self.bare:
 
            loc = os.path.join(self.path, '.git', 'hooks')
 
        return loc
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            idx_loc = '' if self.bare else '.git'
 
            # fallback to filesystem
 
            in_path = os.path.join(self.path, idx_loc, "index")
 
            he_path = os.path.join(self.path, idx_loc, "HEAD")
 
            if os.path.exists(in_path):
 
                return os.stat(in_path).st_mtime
 
            else:
 
                return os.stat(he_path).st_mtime
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        _desc = self._repo.get_description()
 
        return safe_unicode(_desc or undefined_description)
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return undefined_contact
 

	
kallithea/lib/vcs/backends/hg/repository.py
Show inline comments
 
@@ -304,97 +304,97 @@ class MercurialRepository(BaseRepository
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        q = {"cmd": 'between'}
 
        q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception as e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        if not url_prefix: # skip svn+http://... (and git+... too)
 
            # now check if it's a proper hg repo
 
            try:
 
                httppeer(repoui or ui.ui(), url).lookup('tip')
 
            except Exception as e:
 
                raise urllib2.URLError(
 
                    "url [%s] does not look like an hg repo org_exc: %s"
 
                    % (cleaned_uri, e))
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 

	
 
        try:
 
            if src_url:
 
                url = str(self._get_url(src_url))
 
                url = safe_str(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                MercurialRepository._check_url(url, self.baseui)
 
                clone(self.baseui, url, self.path, **opts)
 

	
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return localrepository(self.baseui, self.path, create=create)
 
        except (Abort, RepoError) as err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s" \
 
                    % (self.path, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s" \
 
                    % (self.path, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        _desc = self._repo.ui.config('web', 'description', None, untrusted=True)
 
        return safe_unicode(_desc or undefined_description)
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return safe_unicode(get_contact(self._repo.ui.config)
 
                            or undefined_contact)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            #fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
@@ -436,97 +436,97 @@ class MercurialRepository(BaseRepository
 
        """
 
        ref_name = safe_str(ref_name)
 
        if ref_type == 'rev' and not ref_name.strip('0'):
 
            return self.EMPTY_CHANGESET
 
        # lookup up the exact node id
 
        _revset_predicates = {
 
                'branch': 'branch',
 
                'book': 'bookmark',
 
                'tag': 'tag',
 
                'rev': 'id',
 
            }
 
        # avoid expensive branch(x) iteration over whole repo
 
        rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
 
        try:
 
            revs = self._repo.revs(rev_spec, ref_name, ref_name)
 
        except LookupError:
 
            msg = ("Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name))
 
            raise ChangesetDoesNotExistError(msg)
 
        except RepoLookupError:
 
            msg = ("Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name))
 
            raise ChangesetDoesNotExistError(msg)
 
        if revs:
 
            try:
 
                revision = revs.last()
 
            except AttributeError:
 
                # removed in hg 3.2
 
                revision = revs[-1]
 
        else:
 
            # TODO: just report 'not found'?
 
            revision = ref_name
 

	
 
        return self._get_revision(revision)
 

	
 
    def _get_archives(self, archive_name='tip'):
 
        allowed = self.baseui.configlist("web", "allow_archive",
 
                                         untrusted=True)
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
            if i[0] in allowed or self._repo.ui.configbool("web",
 
                                                           "allow" + i[0],
 
                                                           untrusted=True):
 
                yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall
 
        to filesystem
 
        (``file:///``) schema.
 
        """
 
        url = str(url)
 
        url = safe_str(url)
 
        if url != 'default' and not '://' in url:
 
            url = "file:" + urllib.pathname2url(url)
 
        return url
 

	
 
    def get_hook_location(self):
 
        """
 
        returns absolute path to location where hooks are stored
 
        """
 
        return os.path.join(self.path, '.hg', '.hgrc')
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        revision = self._get_revision(revision)
 
        changeset = MercurialChangeset(repository=self, revision=revision)
 
        return changeset
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
                       end_date=None, branch_name=None, reverse=False):
 
        """
 
        Returns iterator of ``MercurialChangeset`` objects from start to end
 
        (both are inclusive)
 

	
 
        :param start: None, str, int or mercurial lookup format
 
        :param end:  None, str, int or mercurial lookup format
 
        :param start_date:
 
        :param end_date:
 
        :param branch_name:
 
        :param reversed: return changesets in reversed order
 
        """
 

	
 
        start_raw_id = self._get_revision(start)
 
        start_pos = self.revisions.index(start_raw_id) if start else None
 
        end_raw_id = self._get_revision(end)
 
        end_pos = self.revisions.index(end_raw_id) if end else None
 

	
 
        if None not in [start, end] and start_pos > end_pos:
 
            raise RepositoryError("Start revision '%s' cannot be "
 
                                  "after end revision '%s'" % (start, end))
 

	
 
        if branch_name and branch_name not in self.allbranches.keys():
 
            msg = ("Branch %s not found in %s" % (branch_name, self))
 
            raise BranchDoesNotExistError(msg)
 
        if end_pos is not None:
 
            end_pos += 1
 
        #filter branches
kallithea/tests/functional/test_forks.py
Show inline comments
 
@@ -116,97 +116,118 @@ class _BaseTestCase(TestController):
 
        response = self.app.get(url('repo_check_home', repo_name=fork_name_full))
 
        #test if we have a message that fork is ok
 
        self.checkSessionFlash(response,
 
                'Forked repository %s as <a href="/%s">%s</a>'
 
                % (repo_name, fork_name_full, fork_name_full))
 

	
 
        #test if the fork was created in the database
 
        fork_repo = Session().query(Repository) \
 
            .filter(Repository.repo_name == fork_name_full).one()
 

	
 
        assert fork_repo.repo_name == fork_name_full
 
        assert fork_repo.fork.repo_name == repo_name
 

	
 
        # test if the repository is visible in the list ?
 
        response = self.app.get(url('summary_home', repo_name=fork_name_full))
 
        response.mustcontain(fork_name_full)
 
        response.mustcontain(self.REPO_TYPE)
 
        response.mustcontain('Fork of "<a href="/%s">%s</a>"' % (repo_name, repo_name))
 

	
 
        fixture.destroy_repo(fork_name_full)
 
        fixture.destroy_repo_group(group_id)
 

	
 
    def test_fork_unicode(self):
 
        self.log_user()
 

	
 
        # create a fork
 
        repo_name = self.REPO
 
        org_repo = Repository.get_by_repo_name(repo_name)
 
        fork_name = safe_str(self.REPO_FORK + u'-rødgrød')
 
        creation_args = {
 
            'repo_name': fork_name,
 
            'repo_group': u'-1',
 
            'fork_parent_id': org_repo.repo_id,
 
            'repo_type': self.REPO_TYPE,
 
            'description': 'unicode repo 1',
 
            'private': 'False',
 
            'landing_rev': 'rev:tip',
 
            '_authentication_token': self.authentication_token()}
 
        self.app.post(url(controller='forks', action='fork_create',
 
                          repo_name=repo_name), creation_args)
 
        response = self.app.get(url(controller='forks', action='forks',
 
                                    repo_name=repo_name))
 
        response.mustcontain(
 
            """<a href="/%s">%s</a>""" % (urllib.quote(fork_name), fork_name)
 
        )
 
        fork_repo = Repository.get_by_repo_name(safe_unicode(fork_name))
 
        assert fork_repo
 

	
 
        # remove this fork
 
        # fork the fork
 
        fork_name_2 = safe_str(self.REPO_FORK + u'-blåbærgrød')
 
        creation_args = {
 
            'repo_name': fork_name_2,
 
            'repo_group': u'-1',
 
            'fork_parent_id': fork_repo.repo_id,
 
            'repo_type': self.REPO_TYPE,
 
            'description': 'unicode repo 2',
 
            'private': 'False',
 
            'landing_rev': 'rev:tip',
 
            '_authentication_token': self.authentication_token()}
 
        self.app.post(url(controller='forks', action='fork_create',
 
                          repo_name=fork_name), creation_args)
 
        response = self.app.get(url(controller='forks', action='forks',
 
                                    repo_name=fork_name))
 
        response.mustcontain(
 
            """<a href="/%s">%s</a>""" % (urllib.quote(fork_name_2), fork_name_2)
 
        )
 

	
 
        # remove these forks
 
        response = self.app.post(url('delete_repo', repo_name=fork_name_2),
 
            params={'_authentication_token': self.authentication_token()})
 
        response = self.app.post(url('delete_repo', repo_name=fork_name),
 
            params={'_authentication_token': self.authentication_token()})
 

	
 
    def test_z_fork_create(self):
 
        self.log_user()
 
        fork_name = self.REPO_FORK
 
        description = 'fork of vcs test'
 
        repo_name = self.REPO
 
        org_repo = Repository.get_by_repo_name(repo_name)
 
        creation_args = {
 
            'repo_name': fork_name,
 
            'repo_group': u'-1',
 
            'fork_parent_id': org_repo.repo_id,
 
            'repo_type': self.REPO_TYPE,
 
            'description': description,
 
            'private': 'False',
 
            'landing_rev': 'rev:tip',
 
            '_authentication_token': self.authentication_token()}
 
        self.app.post(url(controller='forks', action='fork_create',
 
                          repo_name=repo_name), creation_args)
 
        repo = Repository.get_by_repo_name(self.REPO_FORK)
 
        assert repo.fork.repo_name == self.REPO
 

	
 
        ## run the check page that triggers the flash message
 
        response = self.app.get(url('repo_check_home', repo_name=fork_name))
 
        #test if we have a message that fork is ok
 
        self.checkSessionFlash(response,
 
                'Forked repository %s as <a href="/%s">%s</a>'
 
                % (repo_name, fork_name, fork_name))
 

	
 
        #test if the fork was created in the database
 
        fork_repo = Session().query(Repository) \
 
            .filter(Repository.repo_name == fork_name).one()
 

	
 
        assert fork_repo.repo_name == fork_name
 
        assert fork_repo.fork.repo_name == repo_name
 

	
 
        # test if the repository is visible in the list ?
 
        response = self.app.get(url('summary_home', repo_name=fork_name))
 
        response.mustcontain(fork_name)
 
        response.mustcontain(self.REPO_TYPE)
 
        response.mustcontain('Fork of "<a href="/%s">%s</a>"' % (repo_name, repo_name))
 

	
 
    def test_zz_fork_permission_page(self):
 
        usr = self.log_user(self.username, self.password)['user_id']
 
        repo_name = self.REPO
 

	
 
        forks = Repository.query() \
0 comments (0 inline, 0 general)