Changeset - 959d0daa44da
[Not reviewed]
beta
0 2 0
Marcin Kuzminski - 13 years ago 2012-08-08 21:32:05
marcin@python-works.com
Added url validator for git
2 files changed with 47 insertions and 7 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git
 
    ~~~~~~~~~~~~~~~~
 

	
 
    Git backend implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import os
 
import re
 
import time
 
import posixpath
 
import logging
 
import traceback
 
import urllib
 
import urllib2
 
from dulwich.repo import Repo, NotGitRepository
 
#from dulwich.config import ConfigFile
 
from string import Template
 
from subprocess import Popen, PIPE
 
from rhodecode.lib.vcs.backends.base import BaseRepository
 
from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 
from rhodecode.lib.vcs.exceptions import RepositoryError
 
from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
 
from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
 
from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
 
from rhodecode.lib.vcs.utils.paths import abspath
 
from rhodecode.lib.vcs.utils.paths import get_user_home
 
from .workdir import GitWorkdir
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .config import ConfigFile
 
from rhodecode.lib import subprocessio
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        self._repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        #temporary set that to now at later we will move it to constructor
 
        baseui = None
 
        if baseui is None:
 
            from mercurial.ui import ui
 
            baseui = ui()
 
        # patch the instance of GitRepo with an "FAKE" ui object to add
 
        # compatibility layer with Mercurial
 
        setattr(self._repo, 'ui', baseui)
 

	
 
        try:
 
@@ -81,119 +83,156 @@ class GitRepository(BaseRepository):
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    def run_git_command(self, cmd):
 
        """
 
        Runs given ``cmd`` as git command and returns tuple
 
        (returncode, stdout, stderr).
 

	
 
        .. note::
 
           This method exists only until log/blame functionality is implemented
 
           at Dulwich (see https://bugs.launchpad.net/bugs/645142). Parsing
 
           os command's output is road to hell...
 

	
 
        :param cmd: git command to be executed
 
        """
 

	
 
        _copts = ['-c', 'core.quotepath=false', ]
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
        if 'GIT_DIR' in gitenv:
 
            del gitenv['GIT_DIR']
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        cmd = ['git'] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 
        try:
 
            opts = dict(
 
                env=gitenv,
 
            )
 
            if os.path.isdir(self.path):
 
                opts['cwd'] = self.path
 
            p = subprocessio.SubprocessIOChunker(cmd, **opts)
 
        except (EnvironmentError, OSError), err:
 
            log.error(traceback.format_exc())
 
            raise RepositoryError("Couldn't run git command (%s).\n"
 
                                  "Original error was:%s" % (cmd, err))
 

	
 
        so = ''.join(p)
 
        se = None
 
        return so, se
 

	
 
    def _check_url(self, url):
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Functon will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError
 
        """
 
        from mercurial.util import url as Url
 

	
 
        #TODO: implement this
 
        pass
 
        # those authnadlers are patched for python 2.6.5 bug an
 
        # infinit looping when given invalid resources
 
        from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if('+' in url[:url.find('://')]):
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        test_uri, authinfo = Url(url).authinfo()
 
        if not test_uri.endswith('info/refs'):
 
            test_uri = test_uri.rstrip('/') + '/info/refs'
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
 

	
 
        q = {"service": 'git-upload-pack'}
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            return resp.code == 200
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError(e)
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False,
 
            bare=False):
 
        if create and os.path.exists(self.path):
 
            raise RepositoryError("Location already exist")
 
        if src_url and not create:
 
            raise RepositoryError("Create should be set to True if src_url is "
 
                                  "given (clone operation creates repository)")
 
        try:
 
            if create and src_url:
 
                self._check_url(src_url)
 
                GitRepository._check_url(src_url)
 
                self.clone(src_url, update_after_clone, bare)
 
                return Repo(self.path)
 
            elif create:
 
                os.mkdir(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return Repo(self.path)
 
        except (NotGitRepository, OSError), err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 
        cmd = 'rev-list --all --reverse --date-order'
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        #alternate implementation using dulwich
 
        includes = [x[1][0] for x in self._parsed_refs.iteritems()
 
                    if x[1][1] != 'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        For git backend we always return integer here. This way we ensure
 
        that changset's revision attribute would become integer.
 
        """
 
        pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
 
        is_bstr = lambda o: isinstance(o, (str, unicode))
 
        is_null = lambda o: len(o) == revision.count('0')
 

	
 
        if len(self.revisions) == 0:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
rhodecode/lib/vcs/backends/hg/repository.py
Show inline comments
 
@@ -204,166 +204,167 @@ class MercurialRepository(BaseRepository
 
        return self._get_bookmarks()
 

	
 
    def _get_bookmarks(self):
 
        if self._empty:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]  # sort by name
 
        _bookmarks = [(safe_unicode(n), hex(h),) for n, h in
 
                 self._repo._bookmarks.items()]
 
        return OrderedDict(sorted(_bookmarks, key=sortkey, reverse=True))
 

	
 
    def _get_all_revisions(self):
 

	
 
        return map(lambda x: hex(x[7]), self._repo.changelog.index)[:-1]
 

	
 
    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                  context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``.
 
        """
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        # Check if given revisions are present at repository (may raise
 
        # ChangesetDoesNotExistError)
 
        if rev1 != self.EMPTY_CHANGESET:
 
            self.get_changeset(rev1)
 
        self.get_changeset(rev2)
 

	
 
        file_filter = match(self.path, '', [path])
 
        return ''.join(patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                          opts=diffopts(git=True,
 
                                        ignorews=ignore_whitespace,
 
                                        context=context)))
 

	
 
    def _check_url(self, url):
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, return code 200 if url
 
        is valid or True if it's a local path
 
        """
 

	
 
        from mercurial.util import url as Url
 

	
 
        # those authnadlers are patched for python 2.6.5 bug an
 
        # infinit looping when given invalid resources
 
        from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if('+' in url[:url.find('://')]):
 
            url = url[url.find('+')+1:]
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        test_uri, authinfo = Url(url).authinfo()
 

	
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        q = {"cmd": 'between'}
 
        q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            return resp.code == 200
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError(e)
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 
        try:
 
            if src_url:
 
                url = str(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                try:
 
                    self._check_url(url)
 
                    MercurialRepository._check_url(url)
 
                    clone(self.baseui, url, self.path, **opts)
 
#                except urllib2.URLError:
 
#                    raise Abort("Got HTTP 404 error")
 
                except Exception:
 
                    raise
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return localrepository(self.baseui, self.path, create=create)
 
        except (Abort, RepoError), err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s"\
 
                    % (self.path, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        return safe_unicode(self._repo.ui.config('web', 'description',
 
                                   undefined_description, untrusted=True))
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return safe_unicode(get_contact(self._repo.ui.config)
 
                            or undefined_contact)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            #fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
0 comments (0 inline, 0 general)