Changeset - b61824e61e68
[Not reviewed]
beta
0 1 0
Marcin Kuzminski - 13 years ago 2012-11-29 19:29:33
marcin@python-works.com
Don't cache dulwich Repos, in pararell multithreaded evniroment dulwich pack file openers
can break badly. We need to create Repo() objects always for each call, even when it's
taken from cache.
Fixed issue with dulwich filedescriptor leak ref #573
1 file changed with 20 insertions and 13 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git
 
    ~~~~~~~~~~~~~~~~
 

	
 
    Git backend implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import os
 
import re
 
import time
 
import posixpath
 
import logging
 
import traceback
 
import urllib
 
import urllib2
 
from dulwich.repo import Repo, NotGitRepository
 
from dulwich.objects import Tag
 
from string import Template
 
from subprocess import Popen, PIPE
 
from rhodecode.lib.vcs.backends.base import BaseRepository
 
from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 
from rhodecode.lib.vcs.exceptions import RepositoryError
 
from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
 
from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
 
from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
 
from rhodecode.lib.vcs.utils.paths import abspath
 
from rhodecode.lib.vcs.utils.paths import get_user_home
 
from .workdir import GitWorkdir
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .config import ConfigFile
 
from rhodecode.lib import subprocessio
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        self._repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        self.bare = repo.bare
 

	
 
        self._config_files = [
 
            bare and abspath(self.path, 'config') or abspath(self.path, '.git',
 
                'config'),
 
            abspath(get_user_home(), '.gitconfig'),
 
        ]
 

	
 
    @property
 
    def _repo(self):
 
        repo = Repo(self.path)
 
        #temporary set that to now at later we will move it to constructor
 
        baseui = None
 
        if baseui is None:
 
            from mercurial.ui import ui
 
            baseui = ui()
 
        # patch the instance of GitRepo with an "FAKE" ui object to add
 
        # compatibility layer with Mercurial
 
        setattr(self._repo, 'ui', baseui)
 

	
 
        try:
 
            self.head = self._repo.head()
 
        except KeyError:
 
            self.head = None
 
        setattr(repo, 'ui', baseui)
 
        return repo
 

	
 
        self._config_files = [
 
            bare and abspath(self.path, 'config') or abspath(self.path, '.git',
 
                'config'),
 
            abspath(get_user_home(), '.gitconfig'),
 
        ]
 
        self.bare = self._repo.bare
 
    @property
 
    def head(self):
 
        try:
 
            return self._repo.head()
 
        except KeyError:
 
            return None
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    def run_git_command(self, cmd):
 
        """
 
        Runs given ``cmd`` as git command and returns tuple
 
        (returncode, stdout, stderr).
 

	
 
        .. note::
 
           This method exists only until log/blame functionality is implemented
 
           at Dulwich (see https://bugs.launchpad.net/bugs/645142). Parsing
 
           os command's output is road to hell...
 

	
 
        :param cmd: git command to be executed
 
        """
 

	
 
        _copts = ['-c', 'core.quotepath=false', ]
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
        if 'GIT_DIR' in gitenv:
 
            del gitenv['GIT_DIR']
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        cmd = ['git'] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 
        try:
 
            opts = dict(
 
                env=gitenv,
 
                shell=False,
 
            )
 
            if os.path.isdir(self.path):
 
                opts['cwd'] = self.path
 
            p = subprocessio.SubprocessIOChunker(cmd, **opts)
 
        except (EnvironmentError, OSError), err:
 
            log.error(traceback.format_exc())
 
            raise RepositoryError("Couldn't run git command (%s).\n"
 
                                  "Original error was:%s" % (cmd, err))
 

	
 
        return ''.join(p.output), ''.join(p.error)
 

	
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Functon will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError
 
        """
 
        from mercurial.util import url as Url
 

	
 
        # those authnadlers are patched for python 2.6.5 bug an
 
        # infinit looping when given invalid resources
 
        from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if('+' in url[:url.find('://')]):
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        test_uri, authinfo = Url(url).authinfo()
 
        if not test_uri.endswith('info/refs'):
 
            test_uri = test_uri.rstrip('/') + '/info/refs'
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
 

	
 
        q = {"service": 'git-upload-pack'}
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            return resp.code == 200
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError("[%s] %s" % (url, e))
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False,
 
                  bare=False):
 
        if create and os.path.exists(self.path):
 
            raise RepositoryError("Location already exist")
 
        if src_url and not create:
 
            raise RepositoryError("Create should be set to True if src_url is "
 
                                  "given (clone operation creates repository)")
 
        try:
 
            if create and src_url:
 
                GitRepository._check_url(src_url)
 
                self.clone(src_url, update_after_clone, bare)
 
                return Repo(self.path)
 
            elif create:
 
                os.mkdir(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return Repo(self.path)
 
        except (NotGitRepository, OSError), err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 
        cmd = 'rev-list --all --reverse --date-order'
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        #alternate implementation using dulwich
 
        includes = [x[1][0] for x in self._parsed_refs.iteritems()
 
                    if x[1][1] != 'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        For git backend we always return integer here. This way we ensure
 
        that changset's revision attribute would become integer.
 
        """
 
        pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
 
        is_bstr = lambda o: isinstance(o, (str, unicode))
 
        is_null = lambda o: len(o) == revision.count('0')
 

	
 
        if len(self.revisions) == 0:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            revision = self.revisions[-1]
 

	
 
        if ((is_bstr(revision) and revision.isdigit() and len(revision) < 12)
 
            or isinstance(revision, int) or is_null(revision)):
 
            try:
 
                revision = self.revisions[int(revision)]
 
            except:
 
                raise ChangesetDoesNotExistError("Revision %r does not exist "
 
                    "for this repository %s" % (revision, self))
 

	
 
        elif is_bstr(revision):
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(revision)
 
            _tags_shas = self.tags.values()
 
            if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
 
                return _ref_revision[0]
 

	
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            elif revision in _tags_shas:
 
                return _tags_shas[_tags_shas.index(revision)]
 

	
 
            elif not pattern.match(revision) or revision not in self.revisions:
 
                raise ChangesetDoesNotExistError("Revision %r does not exist "
 
                    "for this repository %s" % (revision, self))
 

	
 
        # Ensure we return full id
 
        if not pattern.match(str(revision)):
 
            raise ChangesetDoesNotExistError("Given revision %r not recognized"
 
                % revision)
 
        return revision
 

	
 
    def _get_archives(self, archive_name='tip'):
0 comments (0 inline, 0 general)