Changeset - 959d0daa44da
[Not reviewed]
beta
0 2 0
Marcin Kuzminski - 13 years ago 2012-08-08 21:32:05
marcin@python-works.com
Added url validator for git
2 files changed with 47 insertions and 7 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git
 
    ~~~~~~~~~~~~~~~~
 

	
 
    Git backend implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import os
 
import re
 
import time
 
import posixpath
 
import logging
 
import traceback
 
import urllib
 
import urllib2
 
from dulwich.repo import Repo, NotGitRepository
 
#from dulwich.config import ConfigFile
 
from string import Template
 
from subprocess import Popen, PIPE
 
from rhodecode.lib.vcs.backends.base import BaseRepository
 
from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 
from rhodecode.lib.vcs.exceptions import RepositoryError
 
from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
 
from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
 
from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
 
from rhodecode.lib.vcs.utils.paths import abspath
 
from rhodecode.lib.vcs.utils.paths import get_user_home
 
from .workdir import GitWorkdir
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .config import ConfigFile
 
from rhodecode.lib import subprocessio
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        self._repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        #temporary set that to now at later we will move it to constructor
 
        baseui = None
 
        if baseui is None:
 
            from mercurial.ui import ui
 
            baseui = ui()
 
        # patch the instance of GitRepo with an "FAKE" ui object to add
 
        # compatibility layer with Mercurial
 
        setattr(self._repo, 'ui', baseui)
 

	
 
        try:
 
            self.head = self._repo.head()
 
        except KeyError:
 
            self.head = None
 

	
 
        self._config_files = [
 
            bare and abspath(self.path, 'config') or abspath(self.path, '.git',
 
                'config'),
 
            abspath(get_user_home(), '.gitconfig'),
 
        ]
 
        self.bare = self._repo.bare
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    def run_git_command(self, cmd):
 
        """
 
        Runs given ``cmd`` as git command and returns tuple
 
        (returncode, stdout, stderr).
 

	
 
        .. note::
 
           This method exists only until log/blame functionality is implemented
 
           at Dulwich (see https://bugs.launchpad.net/bugs/645142). Parsing
 
           os command's output is road to hell...
 

	
 
        :param cmd: git command to be executed
 
        """
 

	
 
        _copts = ['-c', 'core.quotepath=false', ]
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
        if 'GIT_DIR' in gitenv:
 
            del gitenv['GIT_DIR']
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        cmd = ['git'] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 
        try:
 
            opts = dict(
 
                env=gitenv,
 
            )
 
            if os.path.isdir(self.path):
 
                opts['cwd'] = self.path
 
            p = subprocessio.SubprocessIOChunker(cmd, **opts)
 
        except (EnvironmentError, OSError), err:
 
            log.error(traceback.format_exc())
 
            raise RepositoryError("Couldn't run git command (%s).\n"
 
                                  "Original error was:%s" % (cmd, err))
 

	
 
        so = ''.join(p)
 
        se = None
 
        return so, se
 

	
 
    def _check_url(self, url):
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Functon will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError
 
        """
 
        from mercurial.util import url as Url
 

	
 
        #TODO: implement this
 
        pass
 
        # those authnadlers are patched for python 2.6.5 bug an
 
        # infinit looping when given invalid resources
 
        from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if('+' in url[:url.find('://')]):
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        test_uri, authinfo = Url(url).authinfo()
 
        if not test_uri.endswith('info/refs'):
 
            test_uri = test_uri.rstrip('/') + '/info/refs'
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
 

	
 
        q = {"service": 'git-upload-pack'}
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            return resp.code == 200
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError(e)
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False,
 
            bare=False):
 
        if create and os.path.exists(self.path):
 
            raise RepositoryError("Location already exist")
 
        if src_url and not create:
 
            raise RepositoryError("Create should be set to True if src_url is "
 
                                  "given (clone operation creates repository)")
 
        try:
 
            if create and src_url:
 
                self._check_url(src_url)
 
                GitRepository._check_url(src_url)
 
                self.clone(src_url, update_after_clone, bare)
 
                return Repo(self.path)
 
            elif create:
 
                os.mkdir(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return Repo(self.path)
 
        except (NotGitRepository, OSError), err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 
        cmd = 'rev-list --all --reverse --date-order'
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        #alternate implementation using dulwich
 
        includes = [x[1][0] for x in self._parsed_refs.iteritems()
 
                    if x[1][1] != 'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        For git backend we always return integer here. This way we ensure
 
        that changset's revision attribute would become integer.
 
        """
 
        pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
 
        is_bstr = lambda o: isinstance(o, (str, unicode))
 
        is_null = lambda o: len(o) == revision.count('0')
 

	
 
        if len(self.revisions) == 0:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            revision = self.revisions[-1]
 

	
 
        if ((is_bstr(revision) and revision.isdigit() and len(revision) < 12)
 
            or isinstance(revision, int) or is_null(revision)):
 
            try:
 
                revision = self.revisions[int(revision)]
 
            except:
 
                raise ChangesetDoesNotExistError("Revision %r does not exist "
 
                    "for this repository %s" % (revision, self))
 

	
 
        elif is_bstr(revision):
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(revision)
 
            _tags_shas = self.tags.values()
 
            if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
 
                return _ref_revision[0]
 

	
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            elif revision in _tags_shas:
 
                return _tags_shas[_tags_shas.index(revision)]
 

	
 
            elif not pattern.match(revision) or revision not in self.revisions:
 
                raise ChangesetDoesNotExistError("Revision %r does not exist "
 
                    "for this repository %s" % (revision, self))
 

	
 
        # Ensure we return full id
 
        if not pattern.match(str(revision)):
 
            raise ChangesetDoesNotExistError("Given revision %r not recognized"
 
                % revision)
 
        return revision
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
                yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        url = str(url)
 
        if url != 'default' and not '://' in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    @LazyProperty
 
    def name(self):
rhodecode/lib/vcs/backends/hg/repository.py
Show inline comments
 
@@ -156,262 +156,263 @@ class MercurialRepository(BaseRepository
 
            message = "Added tag %s for changeset %s" % (name,
 
                changeset.short_id)
 

	
 
        if date is None:
 
            date = datetime.datetime.now().ctime()
 

	
 
        try:
 
            self._repo.tag(name, changeset._ctx.node(), message, local, user,
 
                date)
 
        except Abort, e:
 
            raise RepositoryError(e.message)
 

	
 
        # Reinitialize tags
 
        self.tags = self._get_tags()
 
        tag_id = self.tags[name]
 

	
 
        return self.get_changeset(revision=tag_id)
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        if message is None:
 
            message = "Removed tag %s" % name
 
        if date is None:
 
            date = datetime.datetime.now().ctime()
 
        local = False
 

	
 
        try:
 
            self._repo.tag(name, nullid, message, local, user, date)
 
            self.tags = self._get_tags()
 
        except Abort, e:
 
            raise RepositoryError(e.message)
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Get's bookmarks for this repository
 
        """
 
        return self._get_bookmarks()
 

	
 
    def _get_bookmarks(self):
 
        if self._empty:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]  # sort by name
 
        _bookmarks = [(safe_unicode(n), hex(h),) for n, h in
 
                 self._repo._bookmarks.items()]
 
        return OrderedDict(sorted(_bookmarks, key=sortkey, reverse=True))
 

	
 
    def _get_all_revisions(self):
 

	
 
        return map(lambda x: hex(x[7]), self._repo.changelog.index)[:-1]
 

	
 
    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                  context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``.
 
        """
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        # Check if given revisions are present at repository (may raise
 
        # ChangesetDoesNotExistError)
 
        if rev1 != self.EMPTY_CHANGESET:
 
            self.get_changeset(rev1)
 
        self.get_changeset(rev2)
 

	
 
        file_filter = match(self.path, '', [path])
 
        return ''.join(patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                          opts=diffopts(git=True,
 
                                        ignorews=ignore_whitespace,
 
                                        context=context)))
 

	
 
    def _check_url(self, url):
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, return code 200 if url
 
        is valid or True if it's a local path
 
        """
 

	
 
        from mercurial.util import url as Url
 

	
 
        # those authnadlers are patched for python 2.6.5 bug an
 
        # infinit looping when given invalid resources
 
        from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if('+' in url[:url.find('://')]):
 
            url = url[url.find('+')+1:]
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        test_uri, authinfo = Url(url).authinfo()
 

	
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        q = {"cmd": 'between'}
 
        q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            return resp.code == 200
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError(e)
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 
        try:
 
            if src_url:
 
                url = str(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                try:
 
                    self._check_url(url)
 
                    MercurialRepository._check_url(url)
 
                    clone(self.baseui, url, self.path, **opts)
 
#                except urllib2.URLError:
 
#                    raise Abort("Got HTTP 404 error")
 
                except Exception:
 
                    raise
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return localrepository(self.baseui, self.path, create=create)
 
        except (Abort, RepoError), err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        return safe_unicode(self._repo.ui.config('web', 'description',
 
                                   undefined_description, untrusted=True))
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return safe_unicode(get_contact(self._repo.ui.config)
 
                            or undefined_contact)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            #fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
 
                return os.stat(cl_path).st_mtime
 
            else:
 
                return os.stat(st_path).st_mtime
 

	
 
    def _get_hidden(self):
 
        return self._repo.ui.configbool("web", "hidden", untrusted=True)
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Get's an ID revision given as str. This will always return a fill
 
        40 char revision number
 

	
 
        :param revision: str or int or None
 
        """
 

	
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in [-1, 'tip', None]:
 
            revision = 'tip'
 

	
 
        try:
 
            revision = hex(self._repo.lookup(revision))
 
        except (IndexError, ValueError, RepoLookupError, TypeError):
 
            raise ChangesetDoesNotExistError("Revision %r does not "
 
                                    "exist for this repository %s" \
 
                                    % (revision, self))
 
        return revision
 

	
 
    def _get_archives(self, archive_name='tip'):
 
        allowed = self.baseui.configlist("web", "allow_archive",
 
                                         untrusted=True)
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
            if i[0] in allowed or self._repo.ui.configbool("web",
 
                                                           "allow" + i[0],
 
                                                           untrusted=True):
 
                yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall
 
        to filesystem
 
        (``file:///``) schema.
 
        """
 
        url = str(url)
 
        if url != 'default' and not '://' in url:
 
            url = "file:" + urllib.pathname2url(url)
 
        return url
0 comments (0 inline, 0 general)