Changeset - 0e2d450feb03
[Not reviewed]
default
0 8 0
Mads Kiilerich - 10 years ago 2015-06-09 22:53:24
madski@unity3d.com
git: run external commands as list of strings so we really get correct quoting (Issue #135)

a6dfd14d4b20 from
https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend
fixed that issue but did not make it "safe". The vcs git backend still used
command strings but tried to quote them correctly ... but that approach is
almost impossible to get right.

Instead, pass a string list all the way to the subprocess module and let it do
the quoting. This also makes some of the code more simple.
8 files changed with 57 insertions and 91 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/compare.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.compare
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
compare controller for pylons showing differences between two
 
repos, branches, bookmarks or tips
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: May 6, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import re
 

	
 
from webob.exc import HTTPBadRequest
 
from pylons import request, tmpl_context as c, url
 
from pylons.controllers.util import redirect
 
from pylons.i18n.translation import _
 

	
 
from kallithea.lib.vcs.utils.hgcompat import unionrepo
 
from kallithea.lib import helpers as h
 
from kallithea.lib.base import BaseRepoController, render
 
from kallithea.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
 
from kallithea.lib import diffs
 
from kallithea.model.db import Repository
 
from kallithea.lib.diffs import LimitedDiffContainer
 
from kallithea.controllers.changeset import _ignorews_url,\
 
    _context_url, get_line_ctx, get_ignore_ws
 
from kallithea.lib.graphmod import graph_data
 
from kallithea.lib.compat import json
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class CompareController(BaseRepoController):
 

	
 
    def __before__(self):
 
        super(CompareController, self).__before__()
 

	
 
    @staticmethod
 
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from org_repo@org_rev
 
        to other_repo@other_rev
 
        ... and the other way
 
        ... and the ancestor that would be used for merge
 

	
 
        :param org_repo: repo object, that is most likely the original repo we forked from
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        ancestor = None
 
        if org_rev == other_rev or org_repo.EMPTY_CHANGESET in (org_rev, other_rev):
 
            org_changesets = []
 
            other_changesets = []
 
            ancestor = org_rev
 

	
 
        elif alias == 'hg':
 
            #case two independent repos
 
            if org_repo != other_repo:
 
                hgrepo = unionrepo.unionrepository(other_repo.baseui,
 
                                                   other_repo.path,
 
                                                   org_repo.path)
 
                # all ancestors of other_rev will be in other_repo and
 
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
 

	
 
            #no remote compare do it on the same repository
 
            else:
 
                hgrepo = other_repo._repo
 

	
 
            ancestors = hgrepo.revs("ancestor(id(%s), id(%s))", org_rev, other_rev)
 
            if ancestors:
 
                # FIXME: picks arbitrary ancestor - but there is usually only one
 
                try:
 
                    ancestor = hgrepo[ancestors.first()].hex()
 
                except AttributeError:
 
                    # removed in hg 3.2
 
                    ancestor = hgrepo[ancestors[0]].hex()
 

	
 
            other_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                                     other_rev, org_rev, org_rev)
 
            other_changesets = [other_repo.get_changeset(rev) for rev in other_revs]
 
            org_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                                   org_rev, other_rev, other_rev)
 

	
 
            org_changesets = [org_repo.get_changeset(hgrepo[rev].hex()) for rev in org_revs]
 

	
 
        elif alias == 'git':
 
            if org_repo != other_repo:
 
                from dulwich.repo import Repo
 
                from dulwich.client import SubprocessGitClient
 

	
 
                gitrepo = Repo(org_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
 

	
 
                gitrepo_remote = Repo(other_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(org_repo.path, gitrepo_remote)
 

	
 
                revs = []
 
                for x in gitrepo_remote.get_walker(include=[other_rev],
 
                                                   exclude=[org_rev]):
 
                    revs.append(x.commit.id)
 

	
 
                other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
 
                if other_changesets:
 
                    ancestor = other_changesets[0].parents[0].raw_id
 
                else:
 
                    # no changesets from other repo, ancestor is the other_rev
 
                    ancestor = other_rev
 

	
 
            else:
 
                so, se = org_repo.run_git_command(
 
                    'log --reverse --pretty="format: %%H" -s %s..%s'
 
                        % (org_rev, other_rev)
 
                    ['log', '--reverse', '--pretty=format:%H',
 
                     '-s', '%s..%s' % (org_rev, other_rev)]
 
                )
 
                other_changesets = [org_repo.get_changeset(cs)
 
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
                so, se = org_repo.run_git_command(
 
                    'merge-base %s %s' % (org_rev, other_rev)
 
                    ['merge-base', org_rev, other_rev]
 
                )
 
                ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
 
            org_changesets = []
 

	
 
        else:
 
            raise Exception('Bad alias only git and hg is allowed')
 

	
 
        return other_changesets, org_changesets, ancestor
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def index(self, repo_name):
 
        c.compare_home = True
 
        org_repo = c.db_repo.repo_name
 
        other_repo = request.GET.get('other_repo', org_repo)
 
        c.a_repo = Repository.get_by_repo_name(org_repo)
 
        c.cs_repo = Repository.get_by_repo_name(other_repo)
 
        c.a_ref_name = c.cs_ref_name = _('Select changeset')
 
        return render('compare/compare_diff.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def compare(self, repo_name, org_ref_type, org_ref_name, other_ref_type, other_ref_name):
 
        org_repo = c.db_repo.repo_name
 
        other_repo = request.GET.get('other_repo', org_repo)
 
        # If merge is True:
 
        #   Show what org would get if merged with other:
 
        #   List changesets that are ancestors of other but not of org.
 
        #   New changesets in org is thus ignored.
 
        #   Diff will be from common ancestor, and merges of org to other will thus be ignored.
 
        # If merge is False:
 
        #   Make a raw diff from org to other, no matter if related or not.
 
        #   Changesets in one and not in the other will be ignored
 
        merge = bool(request.GET.get('merge'))
 
        # fulldiff disables cut_off_limit
 
        c.fulldiff = request.GET.get('fulldiff')
 
        # partial uses compare_cs.html template directly
 
        partial = request.environ.get('HTTP_X_PARTIAL_XHR')
 
        # as_form puts hidden input field with changeset revisions
 
        c.as_form = partial and request.GET.get('as_form')
 
        # swap url for compare_diff page - never partial and never as_form
 
        c.swap_url = h.url('compare_url',
 
            repo_name=other_repo,
 
            org_ref_type=other_ref_type, org_ref_name=other_ref_name,
 
            other_repo=org_repo,
 
            other_ref_type=org_ref_type, other_ref_name=org_ref_name,
 
            merge=merge or '')
 

	
 
        # set callbacks for generating markup for icons
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = request.GET.get('context', 3)
 

	
 
        org_repo = Repository.get_by_repo_name(org_repo)
 
        other_repo = Repository.get_by_repo_name(other_repo)
 

	
 
        if org_repo is None:
 
            msg = 'Could not find org repo %s' % org_repo
 
            log.error(msg)
 
            h.flash(msg, category='error')
 
            return redirect(url('compare_home', repo_name=c.repo_name))
 

	
 
        if other_repo is None:
 
            msg = 'Could not find other repo %s' % other_repo
 
            log.error(msg)
 
            h.flash(msg, category='error')
 
            return redirect(url('compare_home', repo_name=c.repo_name))
 

	
 
        if org_repo.scm_instance.alias != other_repo.scm_instance.alias:
 
            msg = 'compare of two different kind of remote repos not available'
 
            log.error(msg)
 
            h.flash(msg, category='error')
 
            return redirect(url('compare_home', repo_name=c.repo_name))
 

	
 
        c.a_rev = self._get_ref_rev(org_repo, org_ref_type, org_ref_name,
 
            returnempty=True)
 
        c.cs_rev = self._get_ref_rev(other_repo, other_ref_type, other_ref_name)
 

	
 
        c.compare_home = False
 
        c.a_repo = org_repo
 
        c.a_ref_name = org_ref_name
 
        c.a_ref_type = org_ref_type
 
        c.cs_repo = other_repo
 
        c.cs_ref_name = other_ref_name
 
        c.cs_ref_type = other_ref_type
 

	
 
        c.cs_ranges, c.cs_ranges_org, c.ancestor = self._get_changesets(
 
            org_repo.scm_instance.alias, org_repo.scm_instance, c.a_rev,
 
            other_repo.scm_instance, c.cs_rev)
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = other_repo.get_comments(raw_ids)
 
        c.statuses = other_repo.statuses(raw_ids)
 

	
 
        revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
        c.jsdata = json.dumps(graph_data(c.cs_repo.scm_instance, revs))
 

	
 
        if partial:
 
            return render('compare/compare_cs.html')
 
        if merge and c.ancestor:
 
            # case we want a simple diff without incoming changesets,
 
            # previewing what will be merged.
 
            # Make the diff on the other repo (which is known to have other_rev)
 
            log.debug('Using ancestor %s as rev1 instead of %s'
 
                      % (c.ancestor, c.a_rev))
 
            rev1 = c.ancestor
 
            org_repo = other_repo
 
        else: # comparing tips, not necessarily linearly related
 
            if merge:
 
                log.error('Unable to find ancestor revision')
 
            if org_repo != other_repo:
 
                # TODO: we could do this by using hg unionrepo
 
                log.error('cannot compare across repos %s and %s', org_repo, other_repo)
 
                h.flash(_('Cannot compare repositories without using common ancestor'), category='error')
 
                raise HTTPBadRequest
 
            rev1 = c.a_rev
 

	
 
        diff_limit = self.cut_off_limit if not c.fulldiff else None
 

	
 
        log.debug('running diff between %s and %s in %s'
 
                  % (rev1, c.cs_rev, org_repo.scm_instance.path))
 
        txtdiff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', format='gitdiff',
 
                                             diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.files = []
 
        c.changes = {}
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in _parsed:
 
            st = f['stats']
 
            if not st['binary']:
 
                c.lines_added += st['added']
 
                c.lines_deleted += st['deleted']
 
            fid = h.FID('', f['filename'])
 
            c.files.append([fid, f['operation'], f['filename'], f['stats']])
 
            htmldiff = diff_processor.as_html(enable_comments=False, parsed_lines=[f])
 
            c.changes[fid] = [f['operation'], f['filename'], htmldiff]
 

	
 
        return render('compare/compare_diff.html')
kallithea/lib/hooks.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.hooks
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Hooks run by Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Aug 6, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import os
 
import sys
 
import time
 
import binascii
 

	
 
from kallithea.lib.vcs.utils.hgcompat import nullrev, revrange
 
from kallithea.lib import helpers as h
 
from kallithea.lib.utils import action_logger
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.exceptions import HTTPLockedRC, UserCreationError
 
from kallithea.lib.utils2 import safe_str, _extract_extras
 
from kallithea.model.db import Repository, User
 

	
 

	
 
def _get_scm_size(alias, root_path):
 

	
 
    if not alias.startswith('.'):
 
        alias += '.'
 

	
 
    size_scm, size_root = 0, 0
 
    for path, dirs, files in os.walk(safe_str(root_path)):
 
        if path.find(alias) != -1:
 
            for f in files:
 
                try:
 
                    size_scm += os.path.getsize(os.path.join(path, f))
 
                except OSError:
 
                    pass
 
        else:
 
            for f in files:
 
                try:
 
                    size_root += os.path.getsize(os.path.join(path, f))
 
                except OSError:
 
                    pass
 

	
 
    size_scm_f = h.format_byte_size(size_scm)
 
    size_root_f = h.format_byte_size(size_root)
 
    size_total_f = h.format_byte_size(size_root + size_scm)
 

	
 
    return size_scm_f, size_root_f, size_total_f
 

	
 

	
 
def repo_size(ui, repo, hooktype=None, **kwargs):
 
    """
 
    Presents size of repository after push
 

	
 
    :param ui:
 
    :param repo:
 
    :param hooktype:
 
    """
 

	
 
    size_hg_f, size_root_f, size_total_f = _get_scm_size('.hg', repo.root)
 

	
 
    last_cs = repo[len(repo) - 1]
 

	
 
    msg = ('Repository size .hg:%s repo:%s total:%s\n'
 
           'Last revision is now r%s:%s\n') % (
 
        size_hg_f, size_root_f, size_total_f, last_cs.rev(), last_cs.hex()[:12]
 
    )
 

	
 
    sys.stdout.write(msg)
 

	
 

	
 
def pre_push(ui, repo, **kwargs):
 
    # pre push function, currently used to ban pushing when
 
    # repository is locked
 
    ex = _extract_extras()
 

	
 
    usr = User.get_by_username(ex.username)
 
    if ex.locked_by[0] and usr.user_id != int(ex.locked_by[0]):
 
        locked_by = User.get(ex.locked_by[0]).username
 
        # this exception is interpreted in git/hg middlewares and based
 
        # on that proper return code is server to client
 
        _http_ret = HTTPLockedRC(ex.repository, locked_by)
 
        if str(_http_ret.code).startswith('2'):
 
            #2xx Codes don't raise exceptions
 
            sys.stdout.write(_http_ret.title)
 
        else:
 
            raise _http_ret
 

	
 

	
 
def pre_pull(ui, repo, **kwargs):
 
    # pre push function, currently used to ban pushing when
 
    # repository is locked
 
    ex = _extract_extras()
 
    if ex.locked_by[0]:
 
        locked_by = User.get(ex.locked_by[0]).username
 
        # this exception is interpreted in git/hg middlewares and based
 
        # on that proper return code is server to client
 
        _http_ret = HTTPLockedRC(ex.repository, locked_by)
 
        if str(_http_ret.code).startswith('2'):
 
            #2xx Codes don't raise exceptions
 
            sys.stdout.write(_http_ret.title)
 
        else:
 
            raise _http_ret
 

	
 

	
 
def log_pull_action(ui, repo, **kwargs):
 
    """
 
    Logs user last pull action
 

	
 
    :param ui:
 
    :param repo:
 
    """
 
    ex = _extract_extras()
 

	
 
    user = User.get_by_username(ex.username)
 
    action = 'pull'
 
    action_logger(user, action, ex.repository, ex.ip, commit=True)
 
    # extension hook call
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'PULL_HOOK', None)
 
    if callable(callback):
 
        kw = {}
 
        kw.update(ex)
 
        callback(**kw)
 

	
 
    if ex.make_lock is not None and ex.make_lock:
 
        Repository.lock(Repository.get_by_repo_name(ex.repository), user.user_id)
 
        #msg = 'Made lock on repo `%s`' % repository
 
        #sys.stdout.write(msg)
 

	
 
    if ex.locked_by[0]:
 
        locked_by = User.get(ex.locked_by[0]).username
 
        _http_ret = HTTPLockedRC(ex.repository, locked_by)
 
        if str(_http_ret.code).startswith('2'):
 
            #2xx Codes don't raise exceptions
 
            sys.stdout.write(_http_ret.title)
 
    return 0
 

	
 

	
 
def log_push_action(ui, repo, **kwargs):
 
    """
 
    Maps user last push action to new changeset id, from mercurial
 

	
 
    :param ui:
 
    :param repo: repo object containing the `ui` object
 
    """
 

	
 
    ex = _extract_extras()
 

	
 
    action_tmpl = ex.action + ':%s'
 
    revs = []
 
    if ex.scm == 'hg':
 
        node = kwargs['node']
 

	
 
        def get_revs(repo, rev_opt):
 
            if rev_opt:
 
                revs = revrange(repo, rev_opt)
 

	
 
                if len(revs) == 0:
 
                    return (nullrev, nullrev)
 
                return max(revs), min(revs)
 
            else:
 
                return len(repo) - 1, 0
 

	
 
        stop, start = get_revs(repo, [node + ':'])
 
        _h = binascii.hexlify
 
        revs = [_h(repo[r].node()) for r in xrange(start, stop + 1)]
 
    elif ex.scm == 'git':
 
        revs = kwargs.get('_git_revs', [])
 
        if '_git_revs' in kwargs:
 
            kwargs.pop('_git_revs')
 

	
 
    action = action_tmpl % ','.join(revs)
 
    action_logger(ex.username, action, ex.repository, ex.ip, commit=True)
 

	
 
    # extension hook call
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'PUSH_HOOK', None)
 
    if callable(callback):
 
        kw = {'pushed_revs': revs}
 
        kw.update(ex)
 
        callback(**kw)
 

	
 
    if ex.make_lock is not None and not ex.make_lock:
 
        Repository.unlock(Repository.get_by_repo_name(ex.repository))
 
        msg = 'Released lock on repo `%s`\n' % ex.repository
 
        sys.stdout.write(msg)
 

	
 
    if ex.locked_by[0]:
 
        locked_by = User.get(ex.locked_by[0]).username
 
        _http_ret = HTTPLockedRC(ex.repository, locked_by)
 
        if str(_http_ret.code).startswith('2'):
 
            #2xx Codes don't raise exceptions
 
            sys.stdout.write(_http_ret.title)
 

	
 
    return 0
 

	
 

	
 
def log_create_repository(repository_dict, created_by, **kwargs):
 
    """
 
    Post create repository Hook.
 

	
 
    :param repository: dict dump of repository object
 
    :param created_by: username who created repository
 

	
 
    available keys of repository_dict:
 

	
 
     'repo_type',
 
     'description',
 
     'private',
 
     'created_on',
 
     'enable_downloads',
 
     'repo_id',
 
     'user_id',
 
     'enable_statistics',
 
     'clone_uri',
 
     'fork_id',
 
     'group_id',
 
     'repo_name'
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'CREATE_REPO_HOOK', None)
 
    if callable(callback):
 
        kw = {}
 
        kw.update(repository_dict)
 
        kw.update({'created_by': created_by})
 
        kw.update(kwargs)
 
        return callback(**kw)
 

	
 
    return 0
 

	
 

	
 
def check_allowed_create_user(user_dict, created_by, **kwargs):
 
    # pre create hooks
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'PRE_CREATE_USER_HOOK', None)
 
    if callable(callback):
 
        allowed, reason = callback(created_by=created_by, **user_dict)
 
        if not allowed:
 
            raise UserCreationError(reason)
 

	
 

	
 
def log_create_user(user_dict, created_by, **kwargs):
 
    """
 
    Post create user Hook.
 

	
 
    :param user_dict: dict dump of user object
 

	
 
    available keys for user_dict:
 

	
 
     'username',
 
     'full_name_or_username',
 
     'full_contact',
 
     'user_id',
 
     'name',
 
     'firstname',
 
     'short_contact',
 
     'admin',
 
     'lastname',
 
     'ip_addresses',
 
     'ldap_dn',
 
     'email',
 
     'api_key',
 
     'last_login',
 
     'full_name',
 
     'active',
 
     'password',
 
     'emails',
 
     'inherit_default_permissions'
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'CREATE_USER_HOOK', None)
 
    if callable(callback):
 
        return callback(created_by=created_by, **user_dict)
 

	
 
    return 0
 

	
 

	
 
def log_delete_repository(repository_dict, deleted_by, **kwargs):
 
    """
 
    Post delete repository Hook.
 

	
 
    :param repository: dict dump of repository object
 
    :param deleted_by: username who deleted the repository
 

	
 
    available keys of repository_dict:
 

	
 
     'repo_type',
 
     'description',
 
     'private',
 
     'created_on',
 
     'enable_downloads',
 
     'repo_id',
 
     'user_id',
 
     'enable_statistics',
 
     'clone_uri',
 
     'fork_id',
 
     'group_id',
 
     'repo_name'
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'DELETE_REPO_HOOK', None)
 
    if callable(callback):
 
        kw = {}
 
        kw.update(repository_dict)
 
        kw.update({'deleted_by': deleted_by,
 
                   'deleted_on': time.time()})
 
        kw.update(kwargs)
 
        return callback(**kw)
 

	
 
    return 0
 

	
 

	
 
def log_delete_user(user_dict, deleted_by, **kwargs):
 
    """
 
    Post delete user Hook.
 

	
 
    :param user_dict: dict dump of user object
 

	
 
    available keys for user_dict:
 

	
 
     'username',
 
     'full_name_or_username',
 
     'full_contact',
 
     'user_id',
 
     'name',
 
     'firstname',
 
     'short_contact',
 
     'admin',
 
     'lastname',
 
     'ip_addresses',
 
     'ldap_dn',
 
     'email',
 
     'api_key',
 
     'last_login',
 
     'full_name',
 
     'active',
 
     'password',
 
     'emails',
 
     'inherit_default_permissions'
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'DELETE_USER_HOOK', None)
 
    if callable(callback):
 
        return callback(deleted_by=deleted_by, **user_dict)
 

	
 
    return 0
 

	
 

	
 
handle_git_pre_receive = (lambda repo_path, revs, env:
 
    handle_git_receive(repo_path, revs, env, hook_type='pre'))
 
handle_git_post_receive = (lambda repo_path, revs, env:
 
    handle_git_receive(repo_path, revs, env, hook_type='post'))
 

	
 

	
 
def handle_git_receive(repo_path, revs, env, hook_type='post'):
 
    """
 
    A really hacky method that is run by git post-receive hook and logs
 
    an push action together with pushed revisions. It's executed by subprocess
 
    thus needs all info to be able to create a on the fly pylons environment,
 
    connect to database and run the logging code. Hacky as sh*t but works.
 

	
 
    :param repo_path:
 
    :param revs:
 
    :param env:
 
    """
 
    from paste.deploy import appconfig
 
    from sqlalchemy import engine_from_config
 
    from kallithea.config.environment import load_environment
 
    from kallithea.model import init_model
 
    from kallithea.model.db import Ui
 
    from kallithea.lib.utils import make_ui
 
    extras = _extract_extras(env)
 

	
 
    path, ini_name = os.path.split(extras['config'])
 
    conf = appconfig('config:%s' % ini_name, relative_to=path)
 
    load_environment(conf.global_conf, conf.local_conf, test_env=False,
 
                     test_index=False)
 

	
 
    engine = engine_from_config(conf, 'sqlalchemy.db1.')
 
    init_model(engine)
 

	
 
    baseui = make_ui('db')
 
    # fix if it's not a bare repo
 
    if repo_path.endswith(os.sep + '.git'):
 
        repo_path = repo_path[:-5]
 

	
 
    repo = Repository.get_by_full_path(repo_path)
 
    if not repo:
 
        raise OSError('Repository %s not found in database'
 
                      % (safe_str(repo_path)))
 

	
 
    _hooks = dict(baseui.configitems('hooks')) or {}
 

	
 
    if hook_type == 'pre':
 
        repo = repo.scm_instance
 
    else:
 
        #post push shouldn't use the cached instance never
 
        repo = repo.scm_instance_no_cache()
 

	
 
    if hook_type == 'pre':
 
        pre_push(baseui, repo)
 

	
 
    # if push hook is enabled via web interface
 
    elif hook_type == 'post' and _hooks.get(Ui.HOOK_PUSH):
 
        rev_data = []
 
        for l in revs:
 
            old_rev, new_rev, ref = l.split(' ')
 
            _ref_data = ref.split('/')
 
            if _ref_data[1] in ['tags', 'heads']:
 
                rev_data.append({'old_rev': old_rev,
 
                                 'new_rev': new_rev,
 
                                 'ref': ref,
 
                                 'type': _ref_data[1],
 
                                 'name': _ref_data[2].strip()})
 

	
 
        git_revs = []
 

	
 
        for push_ref in rev_data:
 
            _type = push_ref['type']
 
            if _type == 'heads':
 
                if push_ref['old_rev'] == EmptyChangeset().raw_id:
 
                    # update the symbolic ref if we push new repo
 
                    if repo.is_empty():
 
                        repo._repo.refs.set_symbolic_ref('HEAD',
 
                                            'refs/heads/%s' % push_ref['name'])
 

	
 
                    cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'"
 
                    cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*']
 
                    heads = repo.run_git_command(cmd)[0]
 
                    cmd = ['log', push_ref['new_rev'],
 
                           '--reverse', '--pretty=format:%H', '--not']
 
                    heads = heads.replace(push_ref['ref'], '')
 
                    heads = ' '.join(map(lambda c: c.strip('\n').strip(),
 
                                         heads.splitlines()))
 
                    cmd = (('log %(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H" --not ' + heads)
 
                    for l in heads.splitlines():
 
                        cmd.append(l.strip())
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
                elif push_ref['new_rev'] == EmptyChangeset().raw_id:
 
                    #delete branch case
 
                    git_revs += ['delete_branch=>%s' % push_ref['name']]
 
                else:
 
                    cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H"')
 
                    cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref,
 
                           '--reverse', '--pretty=format:%H']
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
            elif _type == 'tags':
 
                git_revs += ['tag=>%s' % push_ref['name']]
 

	
 
        log_push_action(baseui, repo, _git_revs=git_revs)
kallithea/lib/middleware/pygrack.py
Show inline comments
 
import os
 
import socket
 
import logging
 
import traceback
 

	
 
from webob import Request, Response, exc
 

	
 
import kallithea
 
from kallithea.lib.vcs import subprocessio
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class FileWrapper(object):
 

	
 
    def __init__(self, fd, content_length):
 
        self.fd = fd
 
        self.content_length = content_length
 
        self.remain = content_length
 

	
 
    def read(self, size):
 
        if size <= self.remain:
 
            try:
 
                data = self.fd.read(size)
 
            except socket.error:
 
                raise IOError(self)
 
            self.remain -= size
 
        elif self.remain:
 
            data = self.fd.read(self.remain)
 
            self.remain = 0
 
        else:
 
            data = None
 
        return data
 

	
 
    def __repr__(self):
 
        return '<FileWrapper %s len: %s, read: %s>' % (
 
            self.fd, self.content_length, self.content_length - self.remain
 
        )
 

	
 

	
 
class GitRepository(object):
 
    git_folder_signature = set(['config', 'head', 'info', 'objects', 'refs'])
 
    commands = ['git-upload-pack', 'git-receive-pack']
 

	
 
    def __init__(self, repo_name, content_path, extras):
 
        files = set([f.lower() for f in os.listdir(content_path)])
 
        if  not (self.git_folder_signature.intersection(files)
 
                == self.git_folder_signature):
 
            raise OSError('%s missing git signature' % content_path)
 
        self.content_path = content_path
 
        self.valid_accepts = ['application/x-%s-result' %
 
                              c for c in self.commands]
 
        self.repo_name = repo_name
 
        self.extras = extras
 

	
 
    def _get_fixedpath(self, path):
 
        """
 
        Small fix for repo_path
 

	
 
        :param path:
 
        """
 
        return path.split(self.repo_name, 1)[-1].strip('/')
 

	
 
    def inforefs(self, request, environ):
 
        """
 
        WSGI Response producer for HTTP GET Git Smart
 
        HTTP /info/refs request.
 
        """
 

	
 
        git_command = request.GET.get('service')
 
        if git_command not in self.commands:
 
            log.debug('command %s not allowed' % git_command)
 
            return exc.HTTPMethodNotAllowed()
 

	
 
        # note to self:
 
        # please, resist the urge to add '\n' to git capture and increment
 
        # line count by 1.
 
        # The code in Git client not only does NOT need '\n', but actually
 
        # blows up if you sprinkle "flush" (0000) as "0001\n".
 
        # It reads binary, per number of bytes specified.
 
        # if you do add '\n' as part of data, count it.
 
        server_advert = '# service=%s' % git_command
 
        packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
 
        _git_path = kallithea.CONFIG.get('git_path', 'git')
 
        cmd = [_git_path, git_command[4:],
 
               '--stateless-rpc', '--advertise-refs', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            out = subprocessio.SubprocessIOChunker(
 
                r'%s %s --stateless-rpc --advertise-refs "%s"' % (
 
                    _git_path, git_command[4:], self.content_path),
 
                starting_values=[
 
                    packet_len + server_advert + '0000'
 
                ]
 
            out = subprocessio.SubprocessIOChunker(cmd,
 
                starting_values=[packet_len + server_advert + '0000']
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
 
            raise exc.HTTPExpectationFailed()
 
        resp = Response()
 
        resp.content_type = 'application/x-%s-advertisement' % str(git_command)
 
        resp.charset = None
 
        resp.app_iter = out
 
        return resp
 

	
 
    def backend(self, request, environ):
 
        """
 
        WSGI Response producer for HTTP POST Git Smart HTTP requests.
 
        Reads commands and data from HTTP POST's body.
 
        returns an iterator obj with contents of git command's
 
        response to stdout
 
        """
 
        _git_path = kallithea.CONFIG.get('git_path', 'git')
 
        git_command = self._get_fixedpath(request.path_info)
 
        if git_command not in self.commands:
 
            log.debug('command %s not allowed' % git_command)
 
            return exc.HTTPMethodNotAllowed()
 

	
 
        if 'CONTENT_LENGTH' in environ:
 
            inputstream = FileWrapper(environ['wsgi.input'],
 
                                      request.content_length)
 
        else:
 
            inputstream = environ['wsgi.input']
 

	
 
        try:
 
            gitenv = os.environ
 
        gitenv = dict(os.environ)
 
            # forget all configs
 
            gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 
            opts = dict(
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
            cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:],
 
                                                   self.content_path),
 
            log.debug('handling cmd %s' % cmd)
 
        cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            out = subprocessio.SubprocessIOChunker(
 
                cmd,
 
                inputstream=inputstream,
 
                **opts
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
 
            raise exc.HTTPExpectationFailed()
 

	
 
        if git_command in [u'git-receive-pack']:
 
            # updating refs manually after each push.
 
            # Needed for pre-1.7.0.4 git clients using regular HTTP mode.
 
            from kallithea.lib.vcs import get_repo
 
            from dulwich.server import update_server_info
 
            repo = get_repo(self.content_path)
 
            if repo:
 
                update_server_info(repo._repo)
 

	
 
        resp = Response()
 
        resp.content_type = 'application/x-%s-result' % git_command.encode('utf8')
 
        resp.charset = None
 
        resp.app_iter = out
 
        return resp
 

	
 
    def __call__(self, environ, start_response):
 
        request = Request(environ)
 
        _path = self._get_fixedpath(request.path_info)
 
        if _path.startswith('info/refs'):
 
            app = self.inforefs
 
        elif [a for a in self.valid_accepts if a in request.accept]:
 
            app = self.backend
 
        try:
 
            resp = app(request, environ)
 
        except exc.HTTPException, e:
 
            resp = e
 
            log.error(traceback.format_exc())
 
        except Exception, e:
 
            log.error(traceback.format_exc())
 
            resp = exc.HTTPInternalServerError()
 
        return resp(environ, start_response)
 

	
 

	
 
class GitDirectory(object):
 

	
 
    def __init__(self, repo_root, repo_name, extras):
 
        repo_location = os.path.join(repo_root, repo_name)
 
        if not os.path.isdir(repo_location):
 
            raise OSError(repo_location)
 

	
 
        self.content_path = repo_location
 
        self.repo_name = repo_name
 
        self.repo_location = repo_location
 
        self.extras = extras
 

	
 
    def __call__(self, environ, start_response):
 
        content_path = self.content_path
 
        try:
 
            app = GitRepository(self.repo_name, content_path, self.extras)
 
        except (AssertionError, OSError):
 
            content_path = os.path.join(content_path, '.git')
 
            if os.path.isdir(content_path):
 
                app = GitRepository(self.repo_name, content_path, self.extras)
 
            else:
 
                return exc.HTTPNotFound()(environ, start_response)
 
        return app(environ, start_response)
 

	
 

	
 
def make_wsgi_app(repo_name, repo_root, extras):
 
    from dulwich.web import LimitedInputFilter, GunzipFilter
 
    app = GitDirectory(repo_root, repo_name, extras)
 
    return GunzipFilter(LimitedInputFilter(app))
kallithea/lib/utils.py
Show inline comments
 
@@ -34,839 +34,839 @@ import paste
 
import beaker
 
import tarfile
 
import shutil
 
import decorator
 
import warnings
 
from os.path import abspath
 
from os.path import dirname as dn, join as jn
 

	
 
from paste.script.command import Command, BadCommand
 

	
 
from webhelpers.text import collapse, remove_formatting, strip_tags
 
from beaker.cache import _cache_decorate
 

	
 
from kallithea import BRAND
 

	
 
from kallithea.lib.vcs.utils.hgcompat import ui, config
 
from kallithea.lib.vcs.utils.helpers import get_scm
 
from kallithea.lib.vcs.exceptions import VCSError
 

	
 
from kallithea.lib.caching_query import FromCache
 

	
 
from kallithea.model import meta
 
from kallithea.model.db import Repository, User, Ui, \
 
    UserLog, RepoGroup, Setting, CacheInvalidation, UserGroup
 
from kallithea.model.meta import Session
 
from kallithea.model.repo_group import RepoGroupModel
 
from kallithea.lib.utils2 import safe_str, safe_unicode, get_current_authuser
 
from kallithea.lib.vcs.utils.fakemod import create_module
 

	
 
log = logging.getLogger(__name__)
 

	
 
REMOVED_REPO_PAT = re.compile(r'rm__\d{8}_\d{6}_\d{6}_.*')
 

	
 

	
 
def recursive_replace(str_, replace=' '):
 
    """
 
    Recursive replace of given sign to just one instance
 

	
 
    :param str_: given string
 
    :param replace: char to find and replace multiple instances
 

	
 
    Examples::
 
    >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
 
    'Mighty-Mighty-Bo-sstones'
 
    """
 

	
 
    if str_.find(replace * 2) == -1:
 
        return str_
 
    else:
 
        str_ = str_.replace(replace * 2, replace)
 
        return recursive_replace(str_, replace)
 

	
 

	
 
def repo_name_slug(value):
 
    """
 
    Return slug of name of repository
 
    This function is called on each creation/modification
 
    of repository to prevent bad names in repo
 
    """
 

	
 
    slug = remove_formatting(value)
 
    slug = strip_tags(slug)
 

	
 
    for c in """`?=[]\;'"<>,/~!@#$%^&*()+{}|: """:
 
        slug = slug.replace(c, '-')
 
    slug = recursive_replace(slug, '-')
 
    slug = collapse(slug, '-')
 
    return slug
 

	
 

	
 
#==============================================================================
 
# PERM DECORATOR HELPERS FOR EXTRACTING NAMES FOR PERM CHECKS
 
#==============================================================================
 
def get_repo_slug(request):
 
    _repo = request.environ['pylons.routes_dict'].get('repo_name')
 
    if _repo:
 
        _repo = _repo.rstrip('/')
 
    return _repo
 

	
 

	
 
def get_repo_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('group_name')
 
    if _group:
 
        _group = _group.rstrip('/')
 
    return _group
 

	
 

	
 
def get_user_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('id')
 
    _group = UserGroup.get(_group)
 
    if _group:
 
        return _group.users_group_name
 
    return None
 

	
 

	
 
def _extract_id_from_repo_name(repo_name):
 
    if repo_name.startswith('/'):
 
        repo_name = repo_name.lstrip('/')
 
    by_id_match = re.match(r'^_(\d{1,})', repo_name)
 
    if by_id_match:
 
        return by_id_match.groups()[0]
 

	
 

	
 
def get_repo_by_id(repo_name):
 
    """
 
    Extracts repo_name by id from special urls. Example url is _11/repo_name
 

	
 
    :param repo_name:
 
    :return: repo_name if matched else None
 
    """
 
    _repo_id = _extract_id_from_repo_name(repo_name)
 
    if _repo_id:
 
        from kallithea.model.db import Repository
 
        repo = Repository.get(_repo_id)
 
        if repo:
 
            # TODO: return repo instead of reponame? or would that be a layering violation?
 
            return repo.repo_name
 
    return None
 

	
 

	
 
def action_logger(user, action, repo, ipaddr='', sa=None, commit=False):
 
    """
 
    Action logger for various actions made by users
 

	
 
    :param user: user that made this action, can be a unique username string or
 
        object containing user_id attribute
 
    :param action: action to log, should be on of predefined unique actions for
 
        easy translations
 
    :param repo: string name of repository or object containing repo_id,
 
        that action was made on
 
    :param ipaddr: optional IP address from what the action was made
 
    :param sa: optional sqlalchemy session
 

	
 
    """
 

	
 
    if not sa:
 
        sa = meta.Session()
 
    # if we don't get explicit IP address try to get one from registered user
 
    # in tmpl context var
 
    if not ipaddr:
 
        ipaddr = getattr(get_current_authuser(), 'ip_addr', '')
 

	
 
    if getattr(user, 'user_id', None):
 
        user_obj = User.get(user.user_id)
 
    elif isinstance(user, basestring):
 
        user_obj = User.get_by_username(user)
 
    else:
 
        raise Exception('You have to provide a user object or a username')
 

	
 
    if getattr(repo, 'repo_id', None):
 
        repo_obj = Repository.get(repo.repo_id)
 
        repo_name = repo_obj.repo_name
 
    elif isinstance(repo, basestring):
 
        repo_name = repo.lstrip('/')
 
        repo_obj = Repository.get_by_repo_name(repo_name)
 
    else:
 
        repo_obj = None
 
        repo_name = ''
 

	
 
    user_log = UserLog()
 
    user_log.user_id = user_obj.user_id
 
    user_log.username = user_obj.username
 
    user_log.action = safe_unicode(action)
 

	
 
    user_log.repository = repo_obj
 
    user_log.repository_name = repo_name
 

	
 
    user_log.action_date = datetime.datetime.now()
 
    user_log.user_ip = ipaddr
 
    sa.add(user_log)
 

	
 
    log.info('Logging action:%s on %s by user:%s ip:%s' %
 
             (action, safe_unicode(repo), user_obj, ipaddr))
 
    if commit:
 
        sa.commit()
 

	
 

	
 
def get_filesystem_repos(path, recursive=False, skip_removed_repos=True):
 
    """
 
    Scans given path for repos and return (name,(type,path)) tuple
 

	
 
    :param path: path to scan for repositories
 
    :param recursive: recursive search and return names with subdirs in front
 
    """
 

	
 
    # remove ending slash for better results
 
    path = path.rstrip(os.sep)
 
    log.debug('now scanning in %s location recursive:%s...' % (path, recursive))
 

	
 
    def _get_repos(p):
 
        if not os.access(p, os.R_OK) or not os.access(p, os.X_OK):
 
            log.warning('ignoring repo path without access: %s' % (p,))
 
            return
 
        if not os.access(p, os.W_OK):
 
            log.warning('repo path without write access: %s' % (p,))
 
        for dirpath in os.listdir(p):
 
            if os.path.isfile(os.path.join(p, dirpath)):
 
                continue
 
            cur_path = os.path.join(p, dirpath)
 

	
 
            # skip removed repos
 
            if skip_removed_repos and REMOVED_REPO_PAT.match(dirpath):
 
                continue
 

	
 
            #skip .<somethin> dirs
 
            if dirpath.startswith('.'):
 
                continue
 

	
 
            try:
 
                scm_info = get_scm(cur_path)
 
                yield scm_info[1].split(path, 1)[-1].lstrip(os.sep), scm_info
 
            except VCSError:
 
                if not recursive:
 
                    continue
 
                #check if this dir containts other repos for recursive scan
 
                rec_path = os.path.join(p, dirpath)
 
                if not os.path.islink(rec_path) and os.path.isdir(rec_path):
 
                    for inner_scm in _get_repos(rec_path):
 
                        yield inner_scm
 

	
 
    return _get_repos(path)
 

	
 

	
 
def is_valid_repo(repo_name, base_path, scm=None):
 
    """
 
    Returns True if given path is a valid repository False otherwise.
 
    If scm param is given also compare if given scm is the same as expected
 
    from scm parameter
 

	
 
    :param repo_name:
 
    :param base_path:
 
    :param scm:
 

	
 
    :return True: if given path is a valid repository
 
    """
 
    full_path = os.path.join(safe_str(base_path), safe_str(repo_name))
 

	
 
    try:
 
        scm_ = get_scm(full_path)
 
        if scm:
 
            return scm_[0] == scm
 
        return True
 
    except VCSError:
 
        return False
 

	
 

	
 
def is_valid_repo_group(repo_group_name, base_path, skip_path_check=False):
 
    """
 
    Returns True if given path is a repository group False otherwise
 

	
 
    :param repo_name:
 
    :param base_path:
 
    """
 
    full_path = os.path.join(safe_str(base_path), safe_str(repo_group_name))
 

	
 
    # check if it's not a repo
 
    if is_valid_repo(repo_group_name, base_path):
 
        return False
 

	
 
    try:
 
        # we need to check bare git repos at higher level
 
        # since we might match branches/hooks/info/objects or possible
 
        # other things inside bare git repo
 
        get_scm(os.path.dirname(full_path))
 
        return False
 
    except VCSError:
 
        pass
 

	
 
    # check if it's a valid path
 
    if skip_path_check or os.path.isdir(full_path):
 
        return True
 

	
 
    return False
 

	
 

	
 
def ask_ok(prompt, retries=4, complaint='Yes or no please!'):
 
    while True:
 
        ok = raw_input(prompt)
 
        if ok in ('y', 'ye', 'yes'):
 
            return True
 
        if ok in ('n', 'no', 'nop', 'nope'):
 
            return False
 
        retries = retries - 1
 
        if retries < 0:
 
            raise IOError
 
        print complaint
 

	
 
#propagated from mercurial documentation
 
ui_sections = ['alias', 'auth',
 
                'decode/encode', 'defaults',
 
                'diff', 'email',
 
                'extensions', 'format',
 
                'merge-patterns', 'merge-tools',
 
                'hooks', 'http_proxy',
 
                'smtp', 'patch',
 
                'paths', 'profiling',
 
                'server', 'trusted',
 
                'ui', 'web', ]
 

	
 

	
 
def make_ui(read_from='file', path=None, checkpaths=True, clear_session=True):
 
    """
 
    A function that will read python rc files or database
 
    and make an mercurial ui object from read options
 

	
 
    :param path: path to mercurial config file
 
    :param checkpaths: check the path
 
    :param read_from: read from 'file' or 'db'
 
    """
 

	
 
    baseui = ui.ui()
 

	
 
    # clean the baseui object
 
    baseui._ocfg = config.config()
 
    baseui._ucfg = config.config()
 
    baseui._tcfg = config.config()
 

	
 
    if read_from == 'file':
 
        if not os.path.isfile(path):
 
            log.debug('hgrc file is not present at %s, skipping...' % path)
 
            return False
 
        log.debug('reading hgrc from %s' % path)
 
        cfg = config.config()
 
        cfg.read(path)
 
        for section in ui_sections:
 
            for k, v in cfg.items(section):
 
                log.debug('settings ui from file: [%s] %s=%s' % (section, k, v))
 
                baseui.setconfig(safe_str(section), safe_str(k), safe_str(v))
 

	
 
    elif read_from == 'db':
 
        sa = meta.Session()
 
        ret = sa.query(Ui).all()
 

	
 
        hg_ui = ret
 
        for ui_ in hg_ui:
 
            if ui_.ui_active:
 
                ui_val = safe_str(ui_.ui_value)
 
                if ui_.ui_section == 'hooks' and BRAND != 'kallithea' and ui_val.startswith('python:' + BRAND + '.lib.hooks.'):
 
                    ui_val = ui_val.replace('python:' + BRAND + '.lib.hooks.', 'python:kallithea.lib.hooks.')
 
                log.debug('settings ui from db: [%s] %s=%s', ui_.ui_section,
 
                          ui_.ui_key, ui_val)
 
                baseui.setconfig(safe_str(ui_.ui_section), safe_str(ui_.ui_key),
 
                                 ui_val)
 
            if ui_.ui_key == 'push_ssl':
 
                # force set push_ssl requirement to False, kallithea
 
                # handles that
 
                baseui.setconfig(safe_str(ui_.ui_section), safe_str(ui_.ui_key),
 
                                 False)
 
        if clear_session:
 
            meta.Session.remove()
 

	
 
        # prevent interactive questions for ssh password / passphrase
 
        ssh = baseui.config('ui', 'ssh', default='ssh')
 
        baseui.setconfig('ui', 'ssh', '%s -oBatchMode=yes -oIdentitiesOnly=yes' % ssh)
 

	
 
    return baseui
 

	
 

	
 
def set_app_settings(config):
 
    """
 
    Updates pylons config with new settings from database
 

	
 
    :param config:
 
    """
 
    hgsettings = Setting.get_app_settings()
 

	
 
    for k, v in hgsettings.items():
 
        config[k] = v
 

	
 

	
 
def set_vcs_config(config):
 
    """
 
    Patch VCS config with some Kallithea specific stuff
 

	
 
    :param config: kallithea.CONFIG
 
    """
 
    import kallithea
 
    from kallithea.lib.vcs import conf
 
    from kallithea.lib.utils2 import aslist
 
    conf.settings.BACKENDS = {
 
        'hg': 'kallithea.lib.vcs.backends.hg.MercurialRepository',
 
        'git': 'kallithea.lib.vcs.backends.git.GitRepository',
 
    }
 

	
 
    conf.settings.GIT_EXECUTABLE_PATH = config.get('git_path', 'git')
 
    conf.settings.GIT_REV_FILTER = config.get('git_rev_filter', '--all').strip()
 
    conf.settings.DEFAULT_ENCODINGS = aslist(config.get('default_encoding',
 
                                                        'utf8'), sep=',')
 

	
 

	
 
def map_groups(path):
 
    """
 
    Given a full path to a repository, create all nested groups that this
 
    repo is inside. This function creates parent-child relationships between
 
    groups and creates default perms for all new groups.
 

	
 
    :param paths: full path to repository
 
    """
 
    sa = meta.Session()
 
    groups = path.split(Repository.url_sep())
 
    parent = None
 
    group = None
 

	
 
    # last element is repo in nested groups structure
 
    groups = groups[:-1]
 
    rgm = RepoGroupModel(sa)
 
    owner = User.get_first_admin()
 
    for lvl, group_name in enumerate(groups):
 
        group_name = '/'.join(groups[:lvl] + [group_name])
 
        group = RepoGroup.get_by_group_name(group_name)
 
        desc = '%s group' % group_name
 

	
 
        # skip folders that are now removed repos
 
        if REMOVED_REPO_PAT.match(group_name):
 
            break
 

	
 
        if group is None:
 
            log.debug('creating group level: %s group_name: %s'
 
                      % (lvl, group_name))
 
            group = RepoGroup(group_name, parent)
 
            group.group_description = desc
 
            group.user = owner
 
            sa.add(group)
 
            perm_obj = rgm._create_default_perms(group)
 
            sa.add(perm_obj)
 
            sa.flush()
 

	
 
        parent = group
 
    return group
 

	
 

	
 
def repo2db_mapper(initial_repo_list, remove_obsolete=False,
 
                   install_git_hook=False, user=None):
 
    """
 
    maps all repos given in initial_repo_list, non existing repositories
 
    are created, if remove_obsolete is True it also check for db entries
 
    that are not in initial_repo_list and removes them.
 

	
 
    :param initial_repo_list: list of repositories found by scanning methods
 
    :param remove_obsolete: check for obsolete entries in database
 
    :param install_git_hook: if this is True, also check and install githook
 
        for a repo if missing
 
    """
 
    from kallithea.model.repo import RepoModel
 
    from kallithea.model.scm import ScmModel
 
    sa = meta.Session()
 
    repo_model = RepoModel()
 
    if user is None:
 
        user = User.get_first_admin()
 
    added = []
 

	
 
    ##creation defaults
 
    defs = Setting.get_default_repo_settings(strip_prefix=True)
 
    enable_statistics = defs.get('repo_enable_statistics')
 
    enable_locking = defs.get('repo_enable_locking')
 
    enable_downloads = defs.get('repo_enable_downloads')
 
    private = defs.get('repo_private')
 

	
 
    for name, repo in initial_repo_list.items():
 
        group = map_groups(name)
 
        unicode_name = safe_unicode(name)
 
        db_repo = repo_model.get_by_repo_name(unicode_name)
 
        # found repo that is on filesystem not in Kallithea database
 
        if not db_repo:
 
            log.info('repository %s not found, creating now' % name)
 
            added.append(name)
 
            desc = (repo.description
 
                    if repo.description != 'unknown'
 
                    else '%s repository' % name)
 

	
 
            new_repo = repo_model._create_repo(
 
                repo_name=name,
 
                repo_type=repo.alias,
 
                description=desc,
 
                repo_group=getattr(group, 'group_id', None),
 
                owner=user,
 
                enable_locking=enable_locking,
 
                enable_downloads=enable_downloads,
 
                enable_statistics=enable_statistics,
 
                private=private,
 
                state=Repository.STATE_CREATED
 
            )
 
            sa.commit()
 
            # we added that repo just now, and make sure it has githook
 
            # installed, and updated server info
 
            if new_repo.repo_type == 'git':
 
                git_repo = new_repo.scm_instance
 
                ScmModel().install_git_hook(git_repo)
 
                # update repository server-info
 
                log.debug('Running update server info')
 
                git_repo._update_server_info()
 
            new_repo.update_changeset_cache()
 
        elif install_git_hook:
 
            if db_repo.repo_type == 'git':
 
                ScmModel().install_git_hook(db_repo.scm_instance)
 

	
 
    removed = []
 
    # remove from database those repositories that are not in the filesystem
 
    for repo in sa.query(Repository).all():
 
        if repo.repo_name not in initial_repo_list.keys():
 
            if remove_obsolete:
 
                log.debug("Removing non-existing repository found in db `%s`" %
 
                          repo.repo_name)
 
                try:
 
                    RepoModel(sa).delete(repo, forks='detach', fs_remove=False)
 
                    sa.commit()
 
                except Exception:
 
                    #don't hold further removals on error
 
                    log.error(traceback.format_exc())
 
                    sa.rollback()
 
            removed.append(repo.repo_name)
 
    return added, removed
 

	
 

	
 
# set cache regions for beaker so celery can utilise it
 
def add_cache(settings):
 
    cache_settings = {'regions': None}
 
    for key in settings.keys():
 
        for prefix in ['beaker.cache.', 'cache.']:
 
            if key.startswith(prefix):
 
                name = key.split(prefix)[1].strip()
 
                cache_settings[name] = settings[key].strip()
 
    if cache_settings['regions']:
 
        for region in cache_settings['regions'].split(','):
 
            region = region.strip()
 
            region_settings = {}
 
            for key, value in cache_settings.items():
 
                if key.startswith(region):
 
                    region_settings[key.split('.')[1]] = value
 
            region_settings['expire'] = int(region_settings.get('expire',
 
                                                                60))
 
            region_settings.setdefault('lock_dir',
 
                                       cache_settings.get('lock_dir'))
 
            region_settings.setdefault('data_dir',
 
                                       cache_settings.get('data_dir'))
 

	
 
            if 'type' not in region_settings:
 
                region_settings['type'] = cache_settings.get('type',
 
                                                             'memory')
 
            beaker.cache.cache_regions[region] = region_settings
 

	
 

	
 
def load_rcextensions(root_path):
 
    import kallithea
 
    from kallithea.config import conf
 

	
 
    path = os.path.join(root_path, 'rcextensions', '__init__.py')
 
    if os.path.isfile(path):
 
        rcext = create_module('rc', path)
 
        EXT = kallithea.EXTENSIONS = rcext
 
        log.debug('Found rcextensions now loading %s...' % rcext)
 

	
 
        # Additional mappings that are not present in the pygments lexers
 
        conf.LANGUAGES_EXTENSIONS_MAP.update(getattr(EXT, 'EXTRA_MAPPINGS', {}))
 

	
 
        #OVERRIDE OUR EXTENSIONS FROM RC-EXTENSIONS (if present)
 

	
 
        if getattr(EXT, 'INDEX_EXTENSIONS', []):
 
            log.debug('settings custom INDEX_EXTENSIONS')
 
            conf.INDEX_EXTENSIONS = getattr(EXT, 'INDEX_EXTENSIONS', [])
 

	
 
        #ADDITIONAL MAPPINGS
 
        log.debug('adding extra into INDEX_EXTENSIONS')
 
        conf.INDEX_EXTENSIONS.extend(getattr(EXT, 'EXTRA_INDEX_EXTENSIONS', []))
 

	
 
        # auto check if the module is not missing any data, set to default if is
 
        # this will help autoupdate new feature of rcext module
 
        #from kallithea.config import rcextensions
 
        #for k in dir(rcextensions):
 
        #    if not k.startswith('_') and not hasattr(EXT, k):
 
        #        setattr(EXT, k, getattr(rcextensions, k))
 

	
 

	
 
def get_custom_lexer(extension):
 
    """
 
    returns a custom lexer if it's defined in rcextensions module, or None
 
    if there's no custom lexer defined
 
    """
 
    import kallithea
 
    from pygments import lexers
 
    #check if we didn't define this extension as other lexer
 
    if kallithea.EXTENSIONS and extension in kallithea.EXTENSIONS.EXTRA_LEXERS:
 
        _lexer_name = kallithea.EXTENSIONS.EXTRA_LEXERS[extension]
 
        return lexers.get_lexer_by_name(_lexer_name)
 

	
 

	
 
#==============================================================================
 
# TEST FUNCTIONS AND CREATORS
 
#==============================================================================
 
def create_test_index(repo_location, config, full_index):
 
    """
 
    Makes default test index
 

	
 
    :param config: test config
 
    :param full_index:
 
    """
 

	
 
    from kallithea.lib.indexers.daemon import WhooshIndexingDaemon
 
    from kallithea.lib.pidlock import DaemonLock, LockHeld
 

	
 
    repo_location = repo_location
 

	
 
    index_location = os.path.join(config['app_conf']['index_dir'])
 
    if not os.path.exists(index_location):
 
        os.makedirs(index_location)
 

	
 
    try:
 
        l = DaemonLock(file_=jn(dn(index_location), 'make_index.lock'))
 
        WhooshIndexingDaemon(index_location=index_location,
 
                             repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
    except LockHeld:
 
        pass
 

	
 

	
 
def create_test_env(repos_test_path, config):
 
    """
 
    Makes a fresh database and
 
    install test repository into tmp dir
 
    """
 
    from kallithea.lib.db_manage import DbManage
 
    from kallithea.tests import HG_REPO, GIT_REPO, TESTS_TMP_PATH
 

	
 
    # PART ONE create db
 
    dbconf = config['sqlalchemy.db1.url']
 
    log.debug('making test db %s' % dbconf)
 

	
 
    # create test dir if it doesn't exist
 
    if not os.path.isdir(repos_test_path):
 
        log.debug('Creating testdir %s' % repos_test_path)
 
        os.makedirs(repos_test_path)
 

	
 
    dbmanage = DbManage(log_sql=True, dbconf=dbconf, root=config['here'],
 
                        tests=True)
 
    dbmanage.create_tables(override=True)
 
    # for tests dynamically set new root paths based on generated content
 
    dbmanage.create_settings(dbmanage.config_prompt(repos_test_path))
 
    dbmanage.create_default_user()
 
    dbmanage.admin_prompt()
 
    dbmanage.create_permissions()
 
    dbmanage.populate_default_permissions()
 
    Session().commit()
 
    # PART TWO make test repo
 
    log.debug('making test vcs repositories')
 

	
 
    idx_path = config['app_conf']['index_dir']
 
    data_path = config['app_conf']['cache_dir']
 

	
 
    #clean index and data
 
    if idx_path and os.path.exists(idx_path):
 
        log.debug('remove %s' % idx_path)
 
        shutil.rmtree(idx_path)
 

	
 
    if data_path and os.path.exists(data_path):
 
        log.debug('remove %s' % data_path)
 
        shutil.rmtree(data_path)
 

	
 
    #CREATE DEFAULT TEST REPOS
 
    cur_dir = dn(dn(abspath(__file__)))
 
    tar = tarfile.open(jn(cur_dir, 'tests', 'fixtures', "vcs_test_hg.tar.gz"))
 
    tar.extractall(jn(TESTS_TMP_PATH, HG_REPO))
 
    tar.close()
 

	
 
    cur_dir = dn(dn(abspath(__file__)))
 
    tar = tarfile.open(jn(cur_dir, 'tests', 'fixtures', "vcs_test_git.tar.gz"))
 
    tar.extractall(jn(TESTS_TMP_PATH, GIT_REPO))
 
    tar.close()
 

	
 
    #LOAD VCS test stuff
 
    from kallithea.tests.vcs import setup_package
 
    setup_package()
 

	
 

	
 
#==============================================================================
 
# PASTER COMMANDS
 
#==============================================================================
 
class BasePasterCommand(Command):
 
    """
 
    Abstract Base Class for paster commands.
 

	
 
    The celery commands are somewhat aggressive about loading
 
    celery.conf, and since our module sets the `CELERY_LOADER`
 
    environment variable to our loader, we have to bootstrap a bit and
 
    make sure we've had a chance to load the pylons config off of the
 
    command line, otherwise everything fails.
 
    """
 
    min_args = 1
 
    min_args_error = "Please provide a paster config file as an argument."
 
    takes_config_file = 1
 
    requires_config_file = True
 

	
 
    def notify_msg(self, msg, log=False):
 
        """Make a notification to user, additionally if logger is passed
 
        it logs this action using given logger
 

	
 
        :param msg: message that will be printed to user
 
        :param log: logging instance, to use to additionally log this message
 

	
 
        """
 
        if log and isinstance(log, logging):
 
            log(msg)
 

	
 
    def run(self, args):
 
        """
 
        Overrides Command.run
 

	
 
        Checks for a config file argument and loads it.
 
        """
 
        if len(args) < self.min_args:
 
            raise BadCommand(
 
                self.min_args_error % {'min_args': self.min_args,
 
                                       'actual_args': len(args)})
 

	
 
        # Decrement because we're going to lob off the first argument.
 
        # @@ This is hacky
 
        self.min_args -= 1
 
        self.bootstrap_config(args[0])
 
        self.update_parser()
 
        return super(BasePasterCommand, self).run(args[1:])
 

	
 
    def update_parser(self):
 
        """
 
        Abstract method.  Allows for the class's parser to be updated
 
        before the superclass's `run` method is called.  Necessary to
 
        allow options/arguments to be passed through to the underlying
 
        celery command.
 
        """
 
        raise NotImplementedError("Abstract Method.")
 

	
 
    def bootstrap_config(self, conf):
 
        """
 
        Loads the pylons configuration.
 
        """
 
        from pylons import config as pylonsconfig
 

	
 
        self.path_to_ini_file = os.path.realpath(conf)
 
        conf = paste.deploy.appconfig('config:' + self.path_to_ini_file)
 
        pylonsconfig.init_app(conf.global_conf, conf.local_conf)
 

	
 
    def _init_session(self):
 
        """
 
        Inits SqlAlchemy Session
 
        """
 
        logging.config.fileConfig(self.path_to_ini_file)
 
        from pylons import config
 
        from kallithea.model import init_model
 
        from kallithea.lib.utils2 import engine_from_config
 

	
 
        #get to remove repos !!
 
        add_cache(config)
 
        engine = engine_from_config(config, 'sqlalchemy.db1.')
 
        init_model(engine)
 

	
 

	
 
def check_git_version():
 
    """
 
    Checks what version of git is installed in system, and issues a warning
 
    if it's too old for Kallithea to work properly.
 
    """
 
    from kallithea import BACKENDS
 
    from kallithea.lib.vcs.backends.git.repository import GitRepository
 
    from kallithea.lib.vcs.conf import settings
 
    from distutils.version import StrictVersion
 

	
 
    if 'git' not in BACKENDS:
 
        return None
 

	
 
    stdout, stderr = GitRepository._run_git_command('--version', _bare=True,
 
    stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True,
 
                                                    _safe=True)
 

	
 
    m = re.search("\d+.\d+.\d+", stdout)
 
    if m:
 
        ver = StrictVersion(m.group(0))
 
    else:
 
        ver = StrictVersion('0.0.0')
 

	
 
    req_ver = StrictVersion('1.7.4')
 

	
 
    log.debug('Git executable: "%s" version %s detected: %s'
 
              % (settings.GIT_EXECUTABLE_PATH, ver, stdout))
 
    if stderr:
 
        log.warning('Error detecting git version: %r' % stderr)
 
    elif ver < req_ver:
 
        log.warning('Kallithea detected git version %s, which is too old '
 
                    'for the system to function properly. '
 
                    'Please upgrade to version %s or later.' % (ver, req_ver))
 
    return ver
 

	
 

	
 
@decorator.decorator
 
def jsonify(func, *args, **kwargs):
 
    """Action decorator that formats output for JSON
 

	
 
    Given a function that will return content, this decorator will turn
 
    the result into JSON, with a content-type of 'application/json' and
 
    output it.
 

	
 
    """
 
    from pylons.decorators.util import get_pylons
 
    from kallithea.lib.compat import json
 
    pylons = get_pylons(args)
 
    pylons.response.headers['Content-Type'] = 'application/json; charset=utf-8'
 
    data = func(*args, **kwargs)
 
    if isinstance(data, (list, tuple)):
 
        msg = "JSON responses with Array envelopes are susceptible to " \
 
              "cross-site data leak attacks, see " \
 
              "http://wiki.pylonshq.com/display/pylonsfaq/Warnings"
 
        warnings.warn(msg, Warning, 2)
 
        log.warning(msg)
 
    log.debug("Returning JSON wrapped action output")
 
    return json.dumps(data, encoding='utf-8')
 

	
 

	
 
def conditional_cache(region, prefix, condition, func):
 
    """
 

	
 
    Conditional caching function use like::
 
        def _c(arg):
 
            #heavy computation function
 
            return data
 

	
 
        # denpending from condition the compute is wrapped in cache or not
 
        compute = conditional_cache('short_term', 'cache_desc', codnition=True, func=func)
 
        return compute(arg)
 

	
 
    :param region: name of cache region
 
    :param prefix: cache region prefix
 
    :param condition: condition for cache to be triggered, and return data cached
 
    :param func: wrapped heavy function to compute
 

	
 
    """
 
    wrapped = func
 
    if condition:
 
        log.debug('conditional_cache: True, wrapping call of '
 
                  'func: %s into %s region cache' % (region, func))
 
        wrapped = _cache_decorate((prefix,), None, None, region)(func)
 

	
 
    return wrapped
kallithea/lib/vcs/backends/git/changeset.py
Show inline comments
 
import re
 
from itertools import chain
 
from dulwich import objects
 
from subprocess import Popen, PIPE
 

	
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from kallithea.lib.vcs.exceptions import (
 
    RepositoryError, ChangesetError, NodeDoesNotExistError, VCSError,
 
    ChangesetDoesNotExistError, ImproperArchiveTypeError
 
)
 
from kallithea.lib.vcs.nodes import (
 
    FileNode, DirNode, NodeKind, RootNode, RemovedFileNode, SubModuleNode,
 
    ChangedFileNodesGenerator, AddedFileNodesGenerator, RemovedFileNodesGenerator
 
)
 
from kallithea.lib.vcs.utils import (
 
    safe_unicode, safe_str, safe_int, date_fromtimestamp
 
)
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
class GitChangeset(BaseChangeset):
 
    """
 
    Represents state of the repository at single revision.
 
    """
 

	
 
    def __init__(self, repository, revision):
 
        self._stat_modes = {}
 
        self.repository = repository
 
        revision = safe_str(revision)
 
        try:
 
            commit = self.repository._repo[revision]
 
            if isinstance(commit, objects.Tag):
 
                revision = safe_str(commit.object[1])
 
                commit = self.repository._repo.get_object(commit.object[1])
 
        except KeyError:
 
            raise RepositoryError("Cannot get object with id %s" % revision)
 
        self.raw_id = revision
 
        self.id = self.raw_id
 
        self.short_id = self.raw_id[:12]
 
        self._commit = commit
 
        self._tree_id = commit.tree
 
        self._committer_property = 'committer'
 
        self._author_property = 'author'
 
        self._date_property = 'commit_time'
 
        self._date_tz_property = 'commit_timezone'
 
        self.revision = repository.revisions.index(revision)
 

	
 
        self.nodes = {}
 
        self._paths = {}
 

	
 
    @LazyProperty
 
    def message(self):
 
        return safe_unicode(self._commit.message)
 

	
 
    @LazyProperty
 
    def committer(self):
 
        return safe_unicode(getattr(self._commit, self._committer_property))
 

	
 
    @LazyProperty
 
    def author(self):
 
        return safe_unicode(getattr(self._commit, self._author_property))
 

	
 
    @LazyProperty
 
    def date(self):
 
        return date_fromtimestamp(getattr(self._commit, self._date_property),
 
                                  getattr(self._commit, self._date_tz_property))
 

	
 
    @LazyProperty
 
    def _timestamp(self):
 
        return getattr(self._commit, self._date_property)
 

	
 
    @LazyProperty
 
    def status(self):
 
        """
 
        Returns modified, added, removed, deleted files for current changeset
 
        """
 
        return self.changed, self.added, self.removed
 

	
 
    @LazyProperty
 
    def tags(self):
 
        _tags = []
 
        for tname, tsha in self.repository.tags.iteritems():
 
            if tsha == self.raw_id:
 
                _tags.append(tname)
 
        return _tags
 

	
 
    @LazyProperty
 
    def branch(self):
 

	
 
        heads = self.repository._heads(reverse=False)
 

	
 
        ref = heads.get(self.raw_id)
 
        if ref:
 
            return safe_unicode(ref)
 

	
 
    def _fix_path(self, path):
 
        """
 
        Paths are stored without trailing slash so we need to get rid off it if
 
        needed.
 
        """
 
        if path.endswith('/'):
 
            path = path.rstrip('/')
 
        return path
 

	
 
    def _get_id_for_path(self, path):
 
        path = safe_str(path)
 
        # FIXME: Please, spare a couple of minutes and make those codes cleaner;
 
        if not path in self._paths:
 
            path = path.strip('/')
 
            # set root tree
 
            tree = self.repository._repo[self._tree_id]
 
            if path == '':
 
                self._paths[''] = tree.id
 
                return tree.id
 
            splitted = path.split('/')
 
            dirs, name = splitted[:-1], splitted[-1]
 
            curdir = ''
 

	
 
            # initially extract things from root dir
 
            for item, stat, id in tree.iteritems():
 
                if curdir:
 
                    name = '/'.join((curdir, item))
 
                else:
 
                    name = item
 
                self._paths[name] = id
 
                self._stat_modes[name] = stat
 

	
 
            for dir in dirs:
 
                if curdir:
 
                    curdir = '/'.join((curdir, dir))
 
                else:
 
                    curdir = dir
 
                dir_id = None
 
                for item, stat, id in tree.iteritems():
 
                    if dir == item:
 
                        dir_id = id
 
                if dir_id:
 
                    # Update tree
 
                    tree = self.repository._repo[dir_id]
 
                    if not isinstance(tree, objects.Tree):
 
                        raise ChangesetError('%s is not a directory' % curdir)
 
                else:
 
                    raise ChangesetError('%s have not been found' % curdir)
 

	
 
                # cache all items from the given traversed tree
 
                for item, stat, id in tree.iteritems():
 
                    if curdir:
 
                        name = '/'.join((curdir, item))
 
                    else:
 
                        name = item
 
                    self._paths[name] = id
 
                    self._stat_modes[name] = stat
 
            if not path in self._paths:
 
                raise NodeDoesNotExistError("There is no file nor directory "
 
                    "at the given path '%s' at revision %s"
 
                    % (path, safe_str(self.short_id)))
 
        return self._paths[path]
 

	
 
    def _get_kind(self, path):
 
        obj = self.repository._repo[self._get_id_for_path(path)]
 
        if isinstance(obj, objects.Blob):
 
            return NodeKind.FILE
 
        elif isinstance(obj, objects.Tree):
 
            return NodeKind.DIR
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %s at "
 
                " '%s'" % (self.raw_id, path))
 
        return path
 

	
 
    def _get_file_nodes(self):
 
        return chain(*(t[2] for t in self.walk()))
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        return [self.repository.get_changeset(parent)
 
                for parent in self._commit.parents]
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        rev_filter = settings.GIT_REV_FILTER
 
        so, se = self.repository.run_git_command(
 
            "rev-list %s --children" % (rev_filter)
 
            ['rev-list', rev_filter, '--children']
 
        )
 

	
 
        children = []
 
        pat = re.compile(r'^%s' % self.raw_id)
 
        for l in so.splitlines():
 
            if pat.match(l):
 
                childs = l.split(' ')[1:]
 
                children.extend(childs)
 
        return [self.repository.get_changeset(cs) for cs in children]
 

	
 
    def next(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                next_ = cs.revision + 1
 
                next_rev = cs.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(next_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                prev_ = cs.revision - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = cs.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(prev_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
 
        rev2 = self
 
        return ''.join(self.repository.get_diff(rev1, rev2,
 
                                    ignore_whitespace=ignore_whitespace,
 
                                    context=context))
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        # ensure path is traversed
 
        path = safe_str(path)
 
        self._get_id_for_path(path)
 
        return self._stat_modes[path]
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.as_pretty_string()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.raw_length()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        TODO: This function now uses os underlying 'git' and 'grep' commands
 
        which is generally not good. Should be replaced with algorithm
 
        iterating commits.
 
        """
 
        self._get_filectx(path)
 
        cs_id = safe_str(self.id)
 
        f_path = safe_str(path)
 

	
 
        if limit:
 
            cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % (
 
                      safe_int(limit, 0), cs_id, f_path)
 
            cmd = ['log', '-n', str(safe_int(limit, 0)),
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 

	
 
        else:
 
            cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % (
 
                      cs_id, f_path)
 
            cmd = ['log',
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(sha) for sha in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        """
 
        self._get_filectx(path)
 
        from dulwich.walk import Walker
 
        include = [self.id]
 
        walker = Walker(self.repository._repo.object_store, include,
 
                        paths=[path], max_entries=1)
 
        return [self.repository.get_changeset(sha)
 
                for sha in (x.commit.id for x in walker)]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 

	
 
        TODO: This function now uses os underlying 'git' command which is
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        cmd = ['blame', '-l', '--root', '-r', self.id, '--', path]
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for boundaries
 
        # -r sha ==> blames for the given revision
 
        so, se = self.repository.run_git_command(cmd)
 

	
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
            ln_no = i + 1
 
            sha, line = re.split(r' ', blame_line, 1)
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
 
        :raise VcsError: If given stream is None
 

	
 
        """
 
        allowed_kinds = settings.ARCHIVE_SPECS.keys()
 
        if kind not in allowed_kinds:
 
            raise ImproperArchiveTypeError('Archive kind not supported use one'
 
                'of %s', allowed_kinds)
 

	
 
        if prefix is None:
 
            prefix = '%s-%s' % (self.repository.name, self.short_id)
 
        elif prefix.startswith('/'):
 
            raise VCSError("Prefix cannot start with leading slash")
 
        elif prefix.strip() == '':
 
            raise VCSError("Prefix cannot be empty")
 

	
 
        if kind == 'zip':
 
            frmt = 'zip'
 
        else:
 
            frmt = 'tar'
 
        _git_path = settings.GIT_EXECUTABLE_PATH
 
        cmd = '%s archive --format=%s --prefix=%s/ %s' % (_git_path,
 
                                                frmt, prefix, self.raw_id)
 
        if kind == 'tgz':
 
            cmd += ' | gzip -9'
 
        elif kind == 'tbz2':
 
            cmd += ' | bzip2 -9'
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 
        popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
 
                      cwd=self.repository.path)
 

	
 
        buffer_size = 1024 * 8
 
        chunk = popen.stdout.read(buffer_size)
 
        while chunk:
 
            stream.write(chunk)
 
            chunk = popen.stdout.read(buffer_size)
 
        # Make sure all descriptors would be read
 
        popen.communicate()
 

	
 
    def get_nodes(self, path):
 
        if self._get_kind(path) != NodeKind.DIR:
 
            raise ChangesetError("Directory does not exist for revision %s at "
 
                " '%s'" % (self.revision, path))
 
        path = self._fix_path(path)
 
        id = self._get_id_for_path(path)
 
        tree = self.repository._repo[id]
 
        dirnodes = []
 
        filenodes = []
 
        als = self.repository.alias
 
        for name, stat, id in tree.iteritems():
 
            if objects.S_ISGITLINK(stat):
 
                dirnodes.append(SubModuleNode(name, url=None, changeset=id,
 
                                              alias=als))
 
                continue
 

	
 
            obj = self.repository._repo.get_object(id)
 
            if path != '':
 
                obj_path = '/'.join((path, name))
 
            else:
 
                obj_path = name
 
            if obj_path not in self._stat_modes:
 
                self._stat_modes[obj_path] = stat
 
            if isinstance(obj, objects.Tree):
 
                dirnodes.append(DirNode(obj_path, changeset=self))
 
            elif isinstance(obj, objects.Blob):
 
                filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
 
            else:
 
                raise ChangesetError("Requested object should be Tree "
 
                                     "or Blob, is %r" % type(obj))
 
        nodes = dirnodes + filenodes
 
        for node in nodes:
 
            if not node.path in self.nodes:
 
                self.nodes[node.path] = node
 
        nodes.sort()
 
        return nodes
 

	
 
    def get_node(self, path):
 
        if isinstance(path, unicode):
 
            path = path.encode('utf-8')
 
        path = self._fix_path(path)
 
        if not path in self.nodes:
 
            try:
 
                id_ = self._get_id_for_path(path)
 
            except ChangesetError:
 
                raise NodeDoesNotExistError("Cannot find one of parents' "
 
                    "directories for a given path: %s" % path)
 

	
 
            _GL = lambda m: m and objects.S_ISGITLINK(m)
 
            if _GL(self._stat_modes.get(path)):
 
                node = SubModuleNode(path, url=None, changeset=id_,
 
                                     alias=self.repository.alias)
 
            else:
 
                obj = self.repository._repo.get_object(id_)
 

	
 
                if isinstance(obj, objects.Tree):
 
                    if path == '':
 
                        node = RootNode(changeset=self)
 
                    else:
 
                        node = DirNode(path, changeset=self)
 
                    node._tree = obj
 
                elif isinstance(obj, objects.Blob):
 
                    node = FileNode(path, changeset=self)
 
                    node._blob = obj
 
                else:
 
                    raise NodeDoesNotExistError("There is no file nor directory "
 
                        "at the given path '%s' at revision %s"
 
                        % (path, self.short_id))
 
            # cache node
 
            self.nodes[path] = node
 
        return self.nodes[path]
 

	
 
    @LazyProperty
 
    def affected_files(self):
 
        """
 
        Gets a fast accessible file changes for given changeset
 
        """
 
        added, modified, deleted = self._changes_cache
 
        return list(added.union(modified).union(deleted))
 

	
 
    @LazyProperty
 
    def _diff_name_status(self):
 
        output = []
 
        for parent in self.parents:
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
 
                                                                self.raw_id)
 
            cmd = ['diff', '--name-status', parent.raw_id, self.raw_id,
 
                   '--encoding=utf8']
 
            so, se = self.repository.run_git_command(cmd)
 
            output.append(so.strip())
 
        return '\n'.join(output)
 

	
 
    @LazyProperty
 
    def _changes_cache(self):
 
        added = set()
 
        modified = set()
 
        deleted = set()
 
        _r = self.repository._repo
 

	
 
        parents = self.parents
 
        if not self.parents:
 
            parents = [EmptyChangeset()]
 
        for parent in parents:
 
            if isinstance(parent, EmptyChangeset):
 
                oid = None
 
            else:
 
                oid = _r[parent.raw_id].tree
 
            changes = _r.object_store.tree_changes(oid, _r[self.raw_id].tree)
 
            for (oldpath, newpath), (_, _), (_, _) in changes:
 
                if newpath and oldpath:
 
                    modified.add(newpath)
 
                elif newpath and not oldpath:
 
                    added.add(newpath)
 
                elif not newpath and oldpath:
 
                    deleted.add(oldpath)
 
        return added, modified, deleted
 

	
 
    def _get_paths_for_status(self, status):
 
        """
 
        Returns sorted list of paths for given ``status``.
 

	
 
        :param status: one of: *added*, *modified* or *deleted*
 
        """
 
        added, modified, deleted = self._changes_cache
 
        return sorted({
 
            'added': list(added),
 
            'modified': list(modified),
 
            'deleted': list(deleted)}[status]
 
        )
 

	
 
    @LazyProperty
 
    def added(self):
 
        """
 
        Returns list of added ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return list(self._get_file_nodes())
 
        return AddedFileNodesGenerator([n for n in
 
                                self._get_paths_for_status('added')], self)
 

	
 
    @LazyProperty
 
    def changed(self):
 
        """
 
        Returns list of modified ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return ChangedFileNodesGenerator([n for n in
 
                                self._get_paths_for_status('modified')], self)
 

	
 
    @LazyProperty
 
    def removed(self):
 
        """
 
        Returns list of removed ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return RemovedFileNodesGenerator([n for n in
 
                                self._get_paths_for_status('deleted')], self)
 

	
 
    extra = {}
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Git repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import os
 
import re
 
import time
 
import errno
 
import urllib
 
import urllib2
 
import logging
 
import posixpath
 
import string
 
import sys
 
if sys.platform == "win32":
 
    from subprocess import list2cmdline
 
    def quote(s):
 
        return list2cmdline([s])
 
else:
 
    try:
 
        # Python <=2.7
 
        from pipes import quote
 
    except ImportError:
 
        # Python 3.3+
 
        from shlex import quote
 

	
 
from dulwich.objects import Tag
 
from dulwich.repo import Repo, NotGitRepository
 
from dulwich.config import ConfigFile
 

	
 
from kallithea.lib.vcs import subprocessio
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.conf import settings
 

	
 
from kallithea.lib.vcs.exceptions import (
 
    BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError,
 
    RepositoryError, TagAlreadyExistError, TagDoesNotExistError
 
)
 
from kallithea.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.ordered_dict import OrderedDict
 
from kallithea.lib.vcs.utils.paths import abspath, get_user_home
 

	
 
from kallithea.lib.vcs.utils.hgcompat import (
 
    hg_url, httpbasicauthhandler, httpdigestauthhandler
 
)
 

	
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .workdir import GitWorkdir
 

	
 
SHA_PATTERN = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        self.bare = repo.bare
 

	
 
    @property
 
    def _config_files(self):
 
        return [
 
            self.bare and abspath(self.path, 'config')
 
                      or abspath(self.path, '.git', 'config'),
 
             abspath(get_user_home(), '.gitconfig'),
 
         ]
 

	
 
    @property
 
    def _repo(self):
 
        return Repo(self.path)
 

	
 
    @property
 
    def head(self):
 
        try:
 
            return self._repo.head()
 
        except KeyError:
 
            return None
 

	
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 

	
 
        try:
 
            self.revisions[0]
 
        except (KeyError, IndexError):
 
            return True
 
        return False
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @classmethod
 
    def _run_git_command(cls, cmd, **opts):
 
        """
 
        Runs given ``cmd`` as git command and returns tuple
 
        (stdout, stderr).
 

	
 
        :param cmd: git command to be executed
 
        :param opts: env options to pass into Subprocess command
 
        """
 

	
 
        if '_bare' in opts:
 
            _copts = []
 
            del opts['_bare']
 
        else:
 
            _copts = ['-c', 'core.quotepath=false', ]
 
        safe_call = False
 
        if '_safe' in opts:
 
            #no exc on failure
 
            del opts['_safe']
 
            safe_call = True
 

	
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 
        assert isinstance(cmd, list), cmd
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
        if 'GIT_DIR' in gitenv:
 
            del gitenv['GIT_DIR']
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        _git_path = settings.GIT_EXECUTABLE_PATH
 
        cmd = [_git_path] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 

	
 
        try:
 
            _opts = dict(
 
                env=gitenv,
 
                shell=True,
 
                shell=False,
 
            )
 
            _opts.update(opts)
 
            p = subprocessio.SubprocessIOChunker(cmd, **_opts)
 
        except (EnvironmentError, OSError), err:
 
            tb_err = ("Couldn't run git command (%s).\n"
 
                      "Original error was:%s\n" % (cmd, err))
 
            log.error(tb_err)
 
            if safe_call:
 
                return '', err
 
            else:
 
                raise RepositoryError(tb_err)
 

	
 
        return ''.join(p.output), ''.join(p.error)
 

	
 
    def run_git_command(self, cmd):
 
        opts = {}
 
        if os.path.isdir(self.path):
 
            opts['cwd'] = self.path
 
        return self._run_git_command(cmd, **opts)
 

	
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that git will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, exception is also thrown
 
        when the return code is non 200
 
        """
 

	
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if '+' in url[:url.find('://')]:
 
            url = url[url.find('+') + 1:]
 

	
 
        handlers = []
 
        url_obj = hg_url(url)
 
        test_uri, authinfo = url_obj.authinfo()
 
        url_obj.passwd = '*****'
 
        cleaned_uri = str(url_obj)
 

	
 
        if not test_uri.endswith('info/refs'):
 
            test_uri = test_uri.rstrip('/') + '/info/refs'
 

	
 
        if authinfo:
 
            #create a password manager
 
            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 
            passmgr.add_password(*authinfo)
 

	
 
            handlers.extend((httpbasicauthhandler(passmgr),
 
                             httpdigestauthhandler(passmgr)))
 

	
 
        o = urllib2.build_opener(*handlers)
 
        o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
 

	
 
        q = {"service": 'git-upload-pack'}
 
        qs = '?%s' % urllib.urlencode(q)
 
        cu = "%s%s" % (test_uri, qs)
 
        req = urllib2.Request(cu, None, {})
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception, e:
 
            # means it cannot be cloned
 
            raise urllib2.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        # now detect if it's proper git repo
 
        gitdata = resp.read()
 
        if not 'service=git-upload-pack' in gitdata:
 
            raise urllib2.URLError(
 
                "url [%s] does not look like an git" % (cleaned_uri))
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False,
 
                  bare=False):
 
        if create and os.path.exists(self.path):
 
            raise RepositoryError("Location already exist")
 
        if src_url and not create:
 
            raise RepositoryError("Create should be set to True if src_url is "
 
                                  "given (clone operation creates repository)")
 
        try:
 
            if create and src_url:
 
                GitRepository._check_url(src_url)
 
                self.clone(src_url, update_after_clone, bare)
 
                return Repo(self.path)
 
            elif create:
 
                os.makedirs(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return self._repo
 
        except (NotGitRepository, OSError), err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 

	
 
        rev_filter = settings.GIT_REV_FILTER
 
        cmd = 'rev-list %s --reverse --date-order' % (rev_filter)
 
        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        #alternate implementation using dulwich
 
        includes = [x[1][0] for x in self._parsed_refs.iteritems()
 
                    if x[1][1] != 'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        For git backend we always return integer here. This way we ensure
 
        that changeset's revision attribute would become integer.
 
        """
 

	
 
        is_null = lambda o: len(o) == revision.count('0')
 

	
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            return self.revisions[-1]
 

	
 
        is_bstr = isinstance(revision, (str, unicode))
 
        if ((is_bstr and revision.isdigit() and len(revision) < 12)
 
            or isinstance(revision, int) or is_null(revision)):
 
            try:
 
                revision = self.revisions[int(revision)]
 
            except IndexError:
 
                msg = ("Revision %s does not exist for %s" % (revision, self))
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        elif is_bstr:
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(revision)
 
            if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
 
                return _ref_revision[0]
 

	
 
            _tags_shas = self.tags.values()
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            if revision in _tags_shas:
 
                return _tags_shas[_tags_shas.index(revision)]
 

	
 
            elif not SHA_PATTERN.match(revision) or revision not in self.revisions:
 
                msg = ("Revision %s does not exist for %s" % (revision, self))
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        # Ensure we return full id
 
        if not SHA_PATTERN.match(str(revision)):
 
            raise ChangesetDoesNotExistError("Given revision %s not recognized"
 
                % revision)
 
        return revision
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return self._get_revision(ref_name)
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
                yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        url = str(url)
 
        if url != 'default' and not '://' in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    def get_hook_location(self):
 
        """
 
        returns absolute path to location where hooks are stored
 
        """
 
        loc = os.path.join(self.path, 'hooks')
 
        if not self.bare:
 
            loc = os.path.join(self.path, '.git', 'hooks')
 
        return loc
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            idx_loc = '' if self.bare else '.git'
 
            # fallback to filesystem
 
            in_path = os.path.join(self.path, idx_loc, "index")
 
            he_path = os.path.join(self.path, idx_loc, "HEAD")
 
            if os.path.exists(in_path):
 
                return os.stat(in_path).st_mtime
 
            else:
 
                return os.stat(he_path).st_mtime
 

	
 
    @LazyProperty
 
    def description(self):
 
        undefined_description = u'unknown'
 
        _desc = self._repo.get_description()
 
        return safe_unicode(_desc or undefined_description)
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return undefined_contact
 

	
 
    @property
 
    def branches(self):
 
        if not self.revisions:
 
            return {}
 
        sortkey = lambda ctx: ctx[0]
 
        _branches = [(x[0], x[1][0])
 
                     for x in self._parsed_refs.iteritems() if x[1][1] == 'H']
 
        return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if not self.revisions:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]
 
        _tags = [(x[0], x[1][0])
 
                 for x in self._parsed_refs.iteritems() if x[1][1] == 'T']
 
        return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        message = message or "Added tag %s for commit %s" % (name,
 
            changeset.raw_id)
 
        self._repo.refs["refs/tags/%s" % name] = changeset._commit.id
 

	
 
        self._parsed_refs = self._get_parsed_refs()
 
        self.tags = self._get_tags()
 
        return changeset
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        tagpath = posixpath.join(self._repo.refs.path, 'refs', 'tags', name)
 
        try:
 
            os.remove(tagpath)
 
            self._parsed_refs = self._get_parsed_refs()
 
            self.tags = self._get_tags()
 
        except OSError, e:
 
            raise RepositoryError(e.strerror)
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Gets bookmarks for this repository
 
        """
 
        return {}
 

	
 
    @LazyProperty
 
    def _parsed_refs(self):
 
        return self._get_parsed_refs()
 

	
 
    def _get_parsed_refs(self):
 
        # cache the property
 
        _repo = self._repo
 
        refs = _repo.get_refs()
 
        keys = [('refs/heads/', 'H'),
 
                ('refs/remotes/origin/', 'RH'),
 
                ('refs/tags/', 'T')]
 
        _refs = {}
 
        for ref, sha in refs.iteritems():
 
            for k, type_ in keys:
 
                if ref.startswith(k):
 
                    _key = ref[len(k):]
 
                    if type_ == 'T':
 
                        obj = _repo.get_object(sha)
 
                        if isinstance(obj, Tag):
 
                            sha = _repo.get_object(sha).object[1]
 
                    _refs[_key] = [sha, type_]
 
                    break
 
        return _refs
 

	
 
    def _heads(self, reverse=False):
 
        refs = self._repo.get_refs()
 
        heads = {}
 

	
 
        for key, val in refs.items():
 
            for ref_key in ['refs/heads/', 'refs/remotes/origin/']:
 
                if key.startswith(ref_key):
 
                    n = key[len(ref_key):]
 
                    if n not in ['HEAD']:
 
                        heads[n] = val
 

	
 
        return heads if reverse else dict((y, x) for x, y in heads.iteritems())
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``GitChangeset`` object representing commit from git repository
 
        at the given revision or head (most recent commit) if None given.
 
        """
 
        if isinstance(revision, GitChangeset):
 
            return revision
 
        revision = self._get_revision(revision)
 
        changeset = GitChangeset(repository=self, revision=revision)
 
        return changeset
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
           end_date=None, branch_name=None, reverse=False):
 
        """
 
        Returns iterator of ``GitChangeset`` objects from start to end (both
 
        are inclusive), in ascending date order (unless ``reverse`` is set).
 

	
 
        :param start: changeset ID, as str; first returned changeset
 
        :param end: changeset ID, as str; last returned changeset
 
        :param start_date: if specified, changesets with commit date less than
 
          ``start_date`` would be filtered out from returned set
 
        :param end_date: if specified, changesets with commit date greater than
 
          ``end_date`` would be filtered out from returned set
 
        :param branch_name: if specified, changesets not reachable from given
 
          branch would be filtered out from returned set
 
        :param reverse: if ``True``, returned generator would be reversed
 
          (meaning that returned changesets would have descending date order)
 

	
 
        :raise BranchDoesNotExistError: If given ``branch_name`` does not
 
            exist.
 
        :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
 
          ``end`` could not be found.
 

	
 
        """
 
        if branch_name and branch_name not in self.branches:
 
            raise BranchDoesNotExistError("Branch '%s' not found" \
 
                                          % branch_name)
 
        # actually we should check now if it's not an empty repo to not spaw
 
        # subprocess commands
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        # %H at format means (full) commit hash, initial hashes are retrieved
 
        # in ascending date order
 
        cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
 
        cmd_params = {}
 
        cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
 
        if start_date:
 
            cmd_template += ' --since "$since"'
 
            cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if end_date:
 
            cmd_template += ' --until "$until"'
 
            cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if branch_name:
 
            cmd_template += ' $branch_name'
 
            cmd_params['branch_name'] = branch_name
 
            cmd.append(branch_name)
 
        else:
 
            rev_filter = settings.GIT_REV_FILTER
 
            cmd_template += ' %s' % (rev_filter)
 
            cmd.append(settings.GIT_REV_FILTER)
 

	
 
        cmd = string.Template(cmd_template).safe_substitute(**cmd_params)
 
        revs = self.run_git_command(cmd)[0].splitlines()
 
        start_pos = 0
 
        end_pos = len(revs)
 
        if start:
 
            _start = self._get_revision(start)
 
            try:
 
                start_pos = revs.index(_start)
 
            except ValueError:
 
                pass
 

	
 
        if end is not None:
 
            _end = self._get_revision(end)
 
            try:
 
                end_pos = revs.index(_end)
 
            except ValueError:
 
                pass
 

	
 
        if None not in [start, end] and start_pos > end_pos:
 
            raise RepositoryError('start cannot be after end')
 

	
 
        if end_pos is not None:
 
            end_pos += 1
 

	
 
        revs = revs[start_pos:end_pos]
 
        if reverse:
 
            revs = reversed(revs)
 
        return CollectionGenerator(self, revs)
 

	
 
    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
                 context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``.
 
        """
 
        flags = ['-U%s' % context, '--full-index', '--binary', '-p', '-M', '--abbrev=40']
 
        if ignore_whitespace:
 
            flags.append('-w')
 

	
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        if rev1 == self.EMPTY_CHANGESET:
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['show'] + flags + [rev2])
 
            cmd = ['show'] + flags + [rev2]
 
        else:
 
            rev1 = self.get_changeset(rev1).raw_id
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 
            cmd = ['diff'] + flags + [rev1, rev2]
 

	
 
        if path:
 
            cmd += ' -- "%s"' % path
 
            cmd += ['--', path]
 

	
 
        stdout, stderr = self.run_git_command(cmd)
 
        # TODO: don't ignore stderr
 
        # If we used 'show' command, strip first few lines (until actual diff
 
        # starts)
 
        if rev1 == self.EMPTY_CHANGESET:
 
            parts = stdout.split('\ndiff ', 1)
 
            if len(parts) > 1:
 
                stdout = 'diff ' + parts[1]
 
        return stdout
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        """
 
        Returns ``GitInMemoryChangeset`` object for this repository.
 
        """
 
        return GitInMemoryChangeset(self)
 

	
 
    def clone(self, url, update_after_clone=True, bare=False):
 
        """
 
        Tries to clone changes from external location.
 

	
 
        :param update_after_clone: If set to ``False``, git won't checkout
 
          working directory
 
        :param bare: If set to ``True``, repository would be cloned into
 
          *bare* git repository (no working directory at all).
 
        """
 
        url = self._get_url(url)
 
        cmd = ['clone', '-q']
 
        if bare:
 
            cmd.append('--bare')
 
        elif not update_after_clone:
 
            cmd.append('--no-checkout')
 
        cmd += ['--', quote(url), quote(self.path)]
 
        cmd = ' '.join(cmd)
 
        cmd += ['--', url, self.path]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def pull(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        cmd = ['pull', "--ff-only", quote(url)]
 
        cmd = ' '.join(cmd)
 
        cmd = ['pull', '--ff-only', url]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def fetch(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        so, se = self.run_git_command('ls-remote -h %s' % quote(url))
 
        refs = []
 
        so, se = self.run_git_command(['ls-remote', '-h', url])
 
        cmd = ['fetch', url, '--']
 
        for line in (x for x in so.splitlines()):
 
            sha, ref = line.split('\t')
 
            refs.append(ref)
 
        refs = ' '.join(('+%s:%s' % (r, r) for r in refs))
 
        cmd = '''fetch %s -- %s''' % (quote(url), refs)
 
            cmd.append('+%s:%s' % (ref, ref))
 
        self.run_git_command(cmd)
 

	
 
    def _update_server_info(self):
 
        """
 
        runs gits update-server-info command in this repo instance
 
        """
 
        from dulwich.server import update_server_info
 
        try:
 
            update_server_info(self._repo)
 
        except OSError, e:
 
            if e.errno != errno.ENOENT:
 
                raise
 
            # Workaround for dulwich crashing on for example its own dulwich/tests/data/repos/simple_merge.git/info/refs.lock
 
            log.error('Ignoring error running update-server-info: %s', e)
 

	
 
    @LazyProperty
 
    def workdir(self):
 
        """
 
        Returns ``Workdir`` instance for this repository.
 
        """
 
        return GitWorkdir(self)
 

	
 
    def get_config_value(self, section, name, config_file=None):
 
        """
 
        Returns configuration value for a given [``section``] and ``name``.
 

	
 
        :param section: Section we want to retrieve value from
 
        :param name: Name of configuration we want to retrieve
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        if config_file is None:
 
            config_file = []
 
        elif isinstance(config_file, basestring):
 
            config_file = [config_file]
 

	
 
        def gen_configs():
 
            for path in config_file + self._config_files:
 
                try:
 
                    yield ConfigFile.from_path(path)
 
                except (IOError, OSError, ValueError):
 
                    continue
 

	
 
        for config in gen_configs():
 
            try:
 
                return config.get(section, name)
 
            except KeyError:
 
                continue
 
        return None
 

	
 
    def get_user_name(self, config_file=None):
 
        """
 
        Returns user's name from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        return self.get_config_value('user', 'name', config_file)
 

	
 
    def get_user_email(self, config_file=None):
 
        """
 
        Returns user's email from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        return self.get_config_value('user', 'email', config_file)
kallithea/lib/vcs/subprocessio.py
Show inline comments
 
"""
 
Module provides a class allowing to wrap communication over subprocess.Popen
 
input, output, error streams into a meaningfull, non-blocking, concurrent
 
stream processor exposing the output data as an iterator fitting to be a
 
return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
 

	
 
Copyright (c) 2011  Daniel Dotsenko <dotsa[at]hotmail.com>
 

	
 
This file is part of git_http_backend.py Project.
 

	
 
git_http_backend.py Project is free software: you can redistribute it and/or
 
modify it under the terms of the GNU Lesser General Public License as
 
published by the Free Software Foundation, either version 2.1 of the License,
 
or (at your option) any later version.
 

	
 
git_http_backend.py Project is distributed in the hope that it will be useful,
 
but WITHOUT ANY WARRANTY; without even the implied warranty of
 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 
GNU Lesser General Public License for more details.
 

	
 
You should have received a copy of the GNU Lesser General Public License
 
along with git_http_backend.py Project.
 
If not, see <http://www.gnu.org/licenses/>.
 
"""
 
import os
 
import subprocess
 
from kallithea.lib.vcs.utils.compat import deque, Event, Thread, _bytes, _bytearray
 

	
 

	
 
class StreamFeeder(Thread):
 
    """
 
    Normal writing into pipe-like is blocking once the buffer is filled.
 
    This thread allows a thread to seep data from a file-like into a pipe
 
    without blocking the main thread.
 
    We close inpipe once the end of the source stream is reached.
 
    """
 

	
 
    def __init__(self, source):
 
        super(StreamFeeder, self).__init__()
 
        self.daemon = True
 
        filelike = False
 
        self.bytes = _bytes()
 
        if type(source) in (type(''), _bytes, _bytearray):  # string-like
 
            self.bytes = _bytes(source)
 
        else:  # can be either file pointer or file-like
 
            if type(source) in (int, long):  # file pointer it is
 
                ## converting file descriptor (int) stdin into file-like
 
                source = os.fdopen(source, 'rb', 16384)
 
            # let's see if source is file-like by now
 
            filelike = hasattr(source, 'read')
 
        if not filelike and not self.bytes:
 
            raise TypeError("StreamFeeder's source object must be a readable "
 
                            "file-like, a file descriptor, or a string-like.")
 
        self.source = source
 
        self.readiface, self.writeiface = os.pipe()
 

	
 
    def run(self):
 
        t = self.writeiface
 
        if self.bytes:
 
            os.write(t, self.bytes)
 
        else:
 
            s = self.source
 
            b = s.read(4096)
 
            while b:
 
                os.write(t, b)
 
                b = s.read(4096)
 
        os.close(t)
 

	
 
    @property
 
    def output(self):
 
        return self.readiface
 

	
 

	
 
class InputStreamChunker(Thread):
 
    def __init__(self, source, target, buffer_size, chunk_size):
 

	
 
        super(InputStreamChunker, self).__init__()
 

	
 
        self.daemon = True  # die die die.
 

	
 
        self.source = source
 
        self.target = target
 
        self.chunk_count_max = int(buffer_size / chunk_size) + 1
 
        self.chunk_size = chunk_size
 

	
 
        self.data_added = Event()
 
        self.data_added.clear()
 

	
 
        self.keep_reading = Event()
 
        self.keep_reading.set()
 

	
 
        self.EOF = Event()
 
        self.EOF.clear()
 

	
 
        self.go = Event()
 
        self.go.set()
 

	
 
    def stop(self):
 
        self.go.clear()
 
        self.EOF.set()
 
        try:
 
            # this is not proper, but is done to force the reader thread let
 
            # go of the input because, if successful, .close() will send EOF
 
            # down the pipe.
 
            self.source.close()
 
        except:
 
            pass
 

	
 
    def run(self):
 
        s = self.source
 
        t = self.target
 
        cs = self.chunk_size
 
        ccm = self.chunk_count_max
 
        kr = self.keep_reading
 
        da = self.data_added
 
        go = self.go
 

	
 
        try:
 
            b = s.read(cs)
 
        except ValueError:
 
            b = ''
 

	
 
        while b and go.is_set():
 
            if len(t) > ccm:
 
                kr.clear()
 
                kr.wait(2)
 
                # # this only works on 2.7.x and up
 
                # if not kr.wait(10):
 
                #     raise Exception("Timed out while waiting for input to be read.")
 
                # instead we'll use this
 
                if len(t) > ccm + 3:
 
                    raise IOError(
 
                        "Timed out while waiting for input from subprocess.")
 
            t.append(b)
 
            da.set()
 
            try:
 
                b = s.read(cs)
 
            except ValueError: # probably "I/O operation on closed file"
 
                b = ''
 

	
 
        self.EOF.set()
 
        da.set()  # for cases when done but there was no input.
 

	
 

	
 
class BufferedGenerator(object):
 
    """
 
    Class behaves as a non-blocking, buffered pipe reader.
 
    Reads chunks of data (through a thread)
 
    from a blocking pipe, and attaches these to an array (Deque) of chunks.
 
    Reading is halted in the thread when max chunks is internally buffered.
 
    The .next() may operate in blocking or non-blocking fashion by yielding
 
    '' if no data is ready
 
    to be sent or by not returning until there is some data to send
 
    When we get EOF from underlying source pipe we raise the marker to raise
 
    StopIteration after the last chunk of data is yielded.
 
    """
 

	
 
    def __init__(self, source, buffer_size=65536, chunk_size=4096,
 
                 starting_values=[], bottomless=False):
 

	
 
        if bottomless:
 
            maxlen = int(buffer_size / chunk_size)
 
        else:
 
            maxlen = None
 

	
 
        self.data = deque(starting_values, maxlen)
 
        self.worker = InputStreamChunker(source, self.data, buffer_size,
 
                                         chunk_size)
 
        if starting_values:
 
            self.worker.data_added.set()
 
        self.worker.start()
 

	
 
    ####################
 
    # Generator's methods
 
    ####################
 

	
 
    def __iter__(self):
 
        return self
 

	
 
    def next(self):
 
        while not len(self.data) and not self.worker.EOF.is_set():
 
            self.worker.data_added.clear()
 
            self.worker.data_added.wait(0.2)
 
        if len(self.data):
 
            self.worker.keep_reading.set()
 
            return _bytes(self.data.popleft())
 
        elif self.worker.EOF.is_set():
 
            raise StopIteration
 

	
 
    def throw(self, type, value=None, traceback=None):
 
        if not self.worker.EOF.is_set():
 
            raise type(value)
 

	
 
    def start(self):
 
        self.worker.start()
 

	
 
    def stop(self):
 
        self.worker.stop()
 

	
 
    def close(self):
 
        try:
 
            self.worker.stop()
 
            self.throw(GeneratorExit)
 
        except (GeneratorExit, StopIteration):
 
            pass
 

	
 
    def __del__(self):
 
        self.close()
 

	
 
    ####################
 
    # Threaded reader's infrastructure.
 
    ####################
 
    @property
 
    def input(self):
 
        return self.worker.w
 

	
 
    @property
 
    def data_added_event(self):
 
        return self.worker.data_added
 

	
 
    @property
 
    def data_added(self):
 
        return self.worker.data_added.is_set()
 

	
 
    @property
 
    def reading_paused(self):
 
        return not self.worker.keep_reading.is_set()
 

	
 
    @property
 
    def done_reading_event(self):
 
        """
 
        Done_reading does not mean that the iterator's buffer is empty.
 
        Iterator might have done reading from underlying source, but the read
 
        chunks might still be available for serving through .next() method.
 

	
 
        :returns: An Event class instance.
 
        """
 
        return self.worker.EOF
 

	
 
    @property
 
    def done_reading(self):
 
        """
 
        Done_reading does not mean that the iterator's buffer is empty.
 
        Iterator might have done reading from underlying source, but the read
 
        chunks might still be available for serving through .next() method.
 

	
 
        :returns: An Bool value.
 
        """
 
        return self.worker.EOF.is_set()
 

	
 
    @property
 
    def length(self):
 
        """
 
        returns int.
 

	
 
        This is the length of the queue of chunks, not the length of
 
        the combined contents in those chunks.
 

	
 
        __len__() cannot be meaningfully implemented because this
 
        reader is just flying through a bottomless pit content and
 
        can only know the length of what it already saw.
 

	
 
        If __len__() on WSGI server per PEP 3333 returns a value,
 
        the response's length will be set to that. In order not to
 
        confuse WSGI PEP3333 servers, we will not implement __len__
 
        at all.
 
        """
 
        return len(self.data)
 

	
 
    def prepend(self, x):
 
        self.data.appendleft(x)
 

	
 
    def append(self, x):
 
        self.data.append(x)
 

	
 
    def extend(self, o):
 
        self.data.extend(o)
 

	
 
    def __getitem__(self, i):
 
        return self.data[i]
 

	
 

	
 
class SubprocessIOChunker(object):
 
    """
 
    Processor class wrapping handling of subprocess IO.
 

	
 
    In a way, this is a "communicate()" replacement with a twist.
 

	
 
    - We are multithreaded. Writing in and reading out, err are all sep threads.
 
    - We support concurrent (in and out) stream processing.
 
    - The output is not a stream. It's a queue of read string (bytes, not unicode)
 
      chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
 
    - We are non-blocking in more respects than communicate()
 
      (reading from subprocess out pauses when internal buffer is full, but
 
       does not block the parent calling code. On the flip side, reading from
 
       slow-yielding subprocess may block the iteration until data shows up. This
 
       does not block the parallel inpipe reading occurring parallel thread.)
 

	
 
    The purpose of the object is to allow us to wrap subprocess interactions into
 
    an iterable that can be passed to a WSGI server as the application's return
 
    value. Because of stream-processing-ability, WSGI does not have to read ALL
 
    of the subprocess's output and buffer it, before handing it to WSGI server for
 
    HTTP response. Instead, the class initializer reads just a bit of the stream
 
    to figure out if error occurred or likely to occur and if not, just hands the
 
    further iteration over subprocess output to the server for completion of HTTP
 
    response.
 

	
 
    The real or perceived subprocess error is trapped and raised as one of
 
    EnvironmentError family of exceptions
 

	
 
    Example usage:
 
    #    try:
 
    #        answer = SubprocessIOChunker(
 
    #            cmd,
 
    #            input,
 
    #            buffer_size = 65536,
 
    #            chunk_size = 4096
 
    #            )
 
    #    except (EnvironmentError) as e:
 
    #        print str(e)
 
    #        raise e
 
    #
 
    #    return answer
 

	
 

	
 
    """
 

	
 
    def __init__(self, cmd, inputstream=None, buffer_size=65536,
 
                 chunk_size=4096, starting_values=[], **kwargs):
 
        """
 
        Initializes SubprocessIOChunker
 

	
 
        :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
 
        :param inputstream: (Default: None) A file-like, string, or file pointer.
 
        :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
 
        :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
 
        :param starting_values: (Default: []) An array of strings to put in front of output que.
 
        """
 

	
 
        if inputstream:
 
            input_streamer = StreamFeeder(inputstream)
 
            input_streamer.start()
 
            inputstream = input_streamer.output
 

	
 
        _shell = kwargs.get('shell', True)
 
        if isinstance(cmd, (list, tuple)):
 
            cmd = ' '.join(cmd)
 
        # Note: fragile cmd mangling has been removed for use in Kallithea
 
        assert isinstance(cmd, list), cmd
 

	
 
        kwargs['shell'] = _shell
 
        _p = subprocess.Popen(cmd, bufsize=-1,
 
                              stdin=inputstream,
 
                              stdout=subprocess.PIPE,
 
                              stderr=subprocess.PIPE,
 
                              **kwargs)
 

	
 
        bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size,
 
                                   starting_values)
 
        bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
 

	
 
        while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
 
            # doing this until we reach either end of file, or end of buffer.
 
            bg_out.data_added_event.wait(1)
 
            bg_out.data_added_event.clear()
 

	
 
        # at this point it's still ambiguous if we are done reading or just full buffer.
 
        # Either way, if error (returned by ended process, or implied based on
 
        # presence of stuff in stderr output) we error out.
 
        # Else, we are happy.
 
        _returncode = _p.poll()
 
        if _returncode or (_returncode is None and bg_err.length):
 
            try:
 
                _p.terminate()
 
            except Exception:
 
                pass
 
            bg_out.stop()
 
            out = ''.join(bg_out)
 
            bg_err.stop()
 
            err = ''.join(bg_err)
 
            if (err.strip() == 'fatal: The remote end hung up unexpectedly' and
 
                out.startswith('0034shallow ')):
 
                # hack inspired by https://github.com/schacon/grack/pull/7
 
                bg_out = iter([out])
 
                _p = None
 
            elif err:
 
                raise EnvironmentError(
 
                    "Subprocess exited due to an error:\n" + err)
 
            else:
 
                raise EnvironmentError(
 
                    "Subprocess exited with non 0 ret code:%s" % _returncode)
 
        self.process = _p
 
        self.output = bg_out
 
        self.error = bg_err
 
        self.inputstream = inputstream
 

	
 
    def __iter__(self):
 
        return self
 

	
 
    def next(self):
 
        if self.process and self.process.poll():
 
            err = '%s' % ''.join(self.error)
 
            raise EnvironmentError("Subprocess exited due to an error:\n" + err)
 
        return self.output.next()
 

	
 
    def throw(self, type, value=None, traceback=None):
 
        if self.output.length or not self.output.done_reading:
 
            raise type(value)
 

	
 
    def close(self):
 
        try:
 
            self.process.terminate()
 
        except:
 
            pass
 
        try:
 
            self.output.close()
 
        except:
 
            pass
 
        try:
 
            self.error.close()
 
        except:
 
            pass
 
        try:
 
            os.close(self.inputstream)
 
        except:
 
            pass
 

	
 
    def __del__(self):
 
        self.close()
kallithea/tests/vcs/test_git.py
Show inline comments
 
from __future__ import with_statement
 

	
 
import os
 
import sys
 
import mock
 
import datetime
 
import urllib2
 
from kallithea.lib.vcs.backends.git import GitRepository, GitChangeset
 
from kallithea.lib.vcs.exceptions import RepositoryError, VCSError, NodeDoesNotExistError
 
from kallithea.lib.vcs.nodes import NodeKind, FileNode, DirNode, NodeState
 
from kallithea.lib.vcs.utils.compat import unittest
 
from kallithea.tests.vcs.base import _BackendTestMixin
 
from kallithea.tests.vcs.conf import TEST_GIT_REPO, TEST_GIT_REPO_CLONE, get_new_dir
 

	
 

	
 
class GitRepositoryTest(unittest.TestCase):
 

	
 
    def __check_for_existing_repo(self):
 
        if os.path.exists(TEST_GIT_REPO_CLONE):
 
            self.fail('Cannot test git clone repo as location %s already '
 
                      'exists. You should manually remove it first.'
 
                      % TEST_GIT_REPO_CLONE)
 

	
 
    def setUp(self):
 
        self.repo = GitRepository(TEST_GIT_REPO)
 

	
 
    def test_wrong_repo_path(self):
 
        wrong_repo_path = '/tmp/errorrepo'
 
        self.assertRaises(RepositoryError, GitRepository, wrong_repo_path)
 

	
 
    def test_git_cmd_injection(self):
 
        repo_inject_path = TEST_GIT_REPO + '; echo "Cake";'
 
        with self.assertRaises(urllib2.URLError):
 
            # Should fail because URL will contain the parts after ; too
 
            urlerror_fail_repo = GitRepository(get_new_dir('injection-repo'), src_url=repo_inject_path, update_after_clone=True, create=True)
 

	
 
        with self.assertRaises(RepositoryError):
 
            # Should fail on direct clone call, which as of this writing does not happen outside of class
 
            clone_fail_repo = GitRepository(get_new_dir('injection-repo'), create=True)
 
            clone_fail_repo.clone(repo_inject_path, update_after_clone=True,)
 

	
 
        # Verify correct quoting of evil characters that should work on posix file systems
 
        if sys.platform == 'win32':
 
            # windows does not allow '"' in dir names
 
            tricky_path = get_new_dir("tricky-path-repo-$'`")
 
        else:
 
            tricky_path = get_new_dir("tricky-path-repo-$'\"`")
 
        successfully_cloned = GitRepository(tricky_path, src_url=TEST_GIT_REPO, update_after_clone=True, create=True)
 
        # Repo should have been created
 
        self.assertFalse(successfully_cloned._repo.bare)
 

	
 
        if sys.platform == 'win32':
 
            # windows does not allow '"' in dir names
 
            tricky_path_2 = get_new_dir("tricky-path-2-repo-$'`")
 
        else:
 
            tricky_path_2 = get_new_dir("tricky-path-2-repo-$'\"`")
 
        successfully_cloned2 = GitRepository(tricky_path_2, src_url=tricky_path, bare=True, create=True)
 
        # Repo should have been created and thus used correct quoting for clone
 
        self.assertTrue(successfully_cloned2._repo.bare)
 

	
 
        # Should pass because URL has been properly quoted
 
        successfully_cloned.pull(tricky_path_2)
 
        successfully_cloned2.fetch(tricky_path)
 

	
 
    def test_repo_create_with_spaces_in_path(self):
 
        repo_path = get_new_dir("path with spaces")
 
        repo = GitRepository(repo_path, src_url=None, bare=True, create=True)
 
        # Repo should have been created
 
        self.assertTrue(repo._repo.bare)
 

	
 
    def test_repo_clone(self):
 
        self.__check_for_existing_repo()
 
        repo = GitRepository(TEST_GIT_REPO)
 
        repo_clone = GitRepository(TEST_GIT_REPO_CLONE,
 
            src_url=TEST_GIT_REPO, create=True, update_after_clone=True)
 
        self.assertEqual(len(repo.revisions), len(repo_clone.revisions))
 
        # Checking hashes of changesets should be enough
 
        for changeset in repo.get_changesets():
 
            raw_id = changeset.raw_id
 
            self.assertEqual(raw_id, repo_clone.get_changeset(raw_id).raw_id)
 

	
 
    def test_repo_clone_with_spaces_in_path(self):
 
        repo_path = get_new_dir("path with spaces")
 
        successfully_cloned = GitRepository(repo_path, src_url=TEST_GIT_REPO, update_after_clone=True, create=True)
 
        # Repo should have been created
 
        self.assertFalse(successfully_cloned._repo.bare)
 

	
 
        successfully_cloned.pull(TEST_GIT_REPO)
 
        self.repo.fetch(repo_path)
 

	
 
    def test_repo_clone_without_create(self):
 
        self.assertRaises(RepositoryError, GitRepository,
 
            TEST_GIT_REPO_CLONE + '_wo_create', src_url=TEST_GIT_REPO)
 

	
 
    def test_repo_clone_with_update(self):
 
        repo = GitRepository(TEST_GIT_REPO)
 
        clone_path = TEST_GIT_REPO_CLONE + '_with_update'
 
        repo_clone = GitRepository(clone_path,
 
            create=True, src_url=TEST_GIT_REPO, update_after_clone=True)
 
        self.assertEqual(len(repo.revisions), len(repo_clone.revisions))
 

	
 
        #check if current workdir was updated
 
        fpath = os.path.join(clone_path, 'MANIFEST.in')
 
        self.assertEqual(True, os.path.isfile(fpath),
 
            'Repo was cloned and updated but file %s could not be found'
 
            % fpath)
 

	
 
    def test_repo_clone_without_update(self):
 
        repo = GitRepository(TEST_GIT_REPO)
 
        clone_path = TEST_GIT_REPO_CLONE + '_without_update'
 
        repo_clone = GitRepository(clone_path,
 
            create=True, src_url=TEST_GIT_REPO, update_after_clone=False)
 
        self.assertEqual(len(repo.revisions), len(repo_clone.revisions))
 
        #check if current workdir was *NOT* updated
 
        fpath = os.path.join(clone_path, 'MANIFEST.in')
 
        # Make sure it's not bare repo
 
        self.assertFalse(repo_clone._repo.bare)
 
        self.assertEqual(False, os.path.isfile(fpath),
 
            'Repo was cloned and updated but file %s was found'
 
            % fpath)
 

	
 
    def test_repo_clone_into_bare_repo(self):
 
        repo = GitRepository(TEST_GIT_REPO)
 
        clone_path = TEST_GIT_REPO_CLONE + '_bare.git'
 
        repo_clone = GitRepository(clone_path, create=True,
 
            src_url=repo.path, bare=True)
 
        self.assertTrue(repo_clone._repo.bare)
 

	
 
    def test_create_repo_is_not_bare_by_default(self):
 
        repo = GitRepository(get_new_dir('not-bare-by-default'), create=True)
 
        self.assertFalse(repo._repo.bare)
 

	
 
    def test_create_bare_repo(self):
 
        repo = GitRepository(get_new_dir('bare-repo'), create=True, bare=True)
 
        self.assertTrue(repo._repo.bare)
 

	
 
    def test_revisions(self):
 
        # there are 112 revisions (by now)
 
        # so we can assume they would be available from now on
 
        subset = set([
 
            'c1214f7e79e02fc37156ff215cd71275450cffc3',
 
            '38b5fe81f109cb111f549bfe9bb6b267e10bc557',
 
            'fa6600f6848800641328adbf7811fd2372c02ab2',
 
            '102607b09cdd60e2793929c4f90478be29f85a17',
 
            '49d3fd156b6f7db46313fac355dca1a0b94a0017',
 
            '2d1028c054665b962fa3d307adfc923ddd528038',
 
            'd7e0d30fbcae12c90680eb095a4f5f02505ce501',
 
            'ff7ca51e58c505fec0dd2491de52c622bb7a806b',
 
            'dd80b0f6cf5052f17cc738c2951c4f2070200d7f',
 
            '8430a588b43b5d6da365400117c89400326e7992',
 
            'd955cd312c17b02143c04fa1099a352b04368118',
 
            'f67b87e5c629c2ee0ba58f85197e423ff28d735b',
 
            'add63e382e4aabc9e1afdc4bdc24506c269b7618',
 
            'f298fe1189f1b69779a4423f40b48edf92a703fc',
 
            'bd9b619eb41994cac43d67cf4ccc8399c1125808',
 
            '6e125e7c890379446e98980d8ed60fba87d0f6d1',
 
            'd4a54db9f745dfeba6933bf5b1e79e15d0af20bd',
 
            '0b05e4ed56c802098dfc813cbe779b2f49e92500',
 
            '191caa5b2c81ed17c0794bf7bb9958f4dcb0b87e',
 
            '45223f8f114c64bf4d6f853e3c35a369a6305520',
 
            'ca1eb7957a54bce53b12d1a51b13452f95bc7c7e',
 
            'f5ea29fc42ef67a2a5a7aecff10e1566699acd68',
 
            '27d48942240f5b91dfda77accd2caac94708cc7d',
 
            '622f0eb0bafd619d2560c26f80f09e3b0b0d78af',
 
            'e686b958768ee96af8029fe19c6050b1a8dd3b2b'])
 
        self.assertTrue(subset.issubset(set(self.repo.revisions)))
 

	
 

	
 

	
 
    def test_slicing(self):
 
        #4 1 5 10 95
 
        for sfrom, sto, size in [(0, 4, 4), (1, 2, 1), (10, 15, 5),
 
                                 (10, 20, 10), (5, 100, 95)]:
 
            revs = list(self.repo[sfrom:sto])
 
            self.assertEqual(len(revs), size)
 
            self.assertEqual(revs[0], self.repo.get_changeset(sfrom))
 
            self.assertEqual(revs[-1], self.repo.get_changeset(sto - 1))
 

	
 

	
 
    def test_branches(self):
 
        # TODO: Need more tests here
 
        # Removed (those are 'remotes' branches for cloned repo)
 
        #self.assertTrue('master' in self.repo.branches)
 
        #self.assertTrue('gittree' in self.repo.branches)
 
        #self.assertTrue('web-branch' in self.repo.branches)
 
        for name, id in self.repo.branches.items():
 
            self.assertTrue(isinstance(
 
                self.repo.get_changeset(id), GitChangeset))
 

	
 
    def test_tags(self):
 
        # TODO: Need more tests here
 
        self.assertTrue('v0.1.1' in self.repo.tags)
 
        self.assertTrue('v0.1.2' in self.repo.tags)
 
        for name, id in self.repo.tags.items():
 
            self.assertTrue(isinstance(
 
                self.repo.get_changeset(id), GitChangeset))
 

	
 
    def _test_single_changeset_cache(self, revision):
 
        chset = self.repo.get_changeset(revision)
 
        self.assertTrue(revision in self.repo.changesets)
 
        self.assertTrue(chset is self.repo.changesets[revision])
 

	
 
    def test_initial_changeset(self):
 
        id = self.repo.revisions[0]
 
        init_chset = self.repo.get_changeset(id)
 
        self.assertEqual(init_chset.message, 'initial import\n')
 
        self.assertEqual(init_chset.author,
 
            'Marcin Kuzminski <marcin@python-blog.com>')
 
        for path in ('vcs/__init__.py',
 
                     'vcs/backends/BaseRepository.py',
 
                     'vcs/backends/__init__.py'):
 
            self.assertTrue(isinstance(init_chset.get_node(path), FileNode))
 
        for path in ('', 'vcs', 'vcs/backends'):
 
            self.assertTrue(isinstance(init_chset.get_node(path), DirNode))
 

	
 
        self.assertRaises(NodeDoesNotExistError, init_chset.get_node, path='foobar')
 

	
 
        node = init_chset.get_node('vcs/')
 
        self.assertTrue(hasattr(node, 'kind'))
 
        self.assertEqual(node.kind, NodeKind.DIR)
 

	
 
        node = init_chset.get_node('vcs')
 
        self.assertTrue(hasattr(node, 'kind'))
 
        self.assertEqual(node.kind, NodeKind.DIR)
 

	
 
        node = init_chset.get_node('vcs/__init__.py')
 
        self.assertTrue(hasattr(node, 'kind'))
 
        self.assertEqual(node.kind, NodeKind.FILE)
 

	
 
    def test_not_existing_changeset(self):
 
        self.assertRaises(RepositoryError, self.repo.get_changeset,
 
            'f' * 40)
 

	
 
    def test_changeset10(self):
 

	
 
        chset10 = self.repo.get_changeset(self.repo.revisions[9])
 
        README = """===
 
VCS
 
===
 

	
 
Various Version Control System management abstraction layer for Python.
 

	
 
Introduction
 
------------
 

	
 
TODO: To be written...
 

	
 
"""
 
        node = chset10.get_node('README.rst')
 
        self.assertEqual(node.kind, NodeKind.FILE)
 
        self.assertEqual(node.content, README)
 

	
 

	
 
class GitChangesetTest(unittest.TestCase):
 

	
 
    def setUp(self):
 
        self.repo = GitRepository(TEST_GIT_REPO)
 

	
 
    def test_default_changeset(self):
 
        tip = self.repo.get_changeset()
 
        self.assertEqual(tip, self.repo.get_changeset(None))
 
        self.assertEqual(tip, self.repo.get_changeset('tip'))
 

	
 
    def test_root_node(self):
 
        tip = self.repo.get_changeset()
 
        self.assertTrue(tip.root is tip.get_node(''))
 

	
 
    def test_lazy_fetch(self):
 
        """
 
        Test if changeset's nodes expands and are cached as we walk through
 
        the revision. This test is somewhat hard to write as order of tests
 
        is a key here. Written by running command after command in a shell.
 
        """
 
        hex = '2a13f185e4525f9d4b59882791a2d397b90d5ddc'
 
        self.assertTrue(hex in self.repo.revisions)
 
        chset = self.repo.get_changeset(hex)
 
        self.assertTrue(len(chset.nodes) == 0)
 
        root = chset.root
 
        self.assertTrue(len(chset.nodes) == 1)
 
        self.assertTrue(len(root.nodes) == 8)
 
        # accessing root.nodes updates chset.nodes
 
        self.assertTrue(len(chset.nodes) == 9)
 

	
 
        docs = root.get_node('docs')
 
        # we haven't yet accessed anything new as docs dir was already cached
 
        self.assertTrue(len(chset.nodes) == 9)
 
        self.assertTrue(len(docs.nodes) == 8)
 
        # accessing docs.nodes updates chset.nodes
 
        self.assertTrue(len(chset.nodes) == 17)
 

	
 
        self.assertTrue(docs is chset.get_node('docs'))
 
        self.assertTrue(docs is root.nodes[0])
 
        self.assertTrue(docs is root.dirs[0])
 
        self.assertTrue(docs is chset.get_node('docs'))
 

	
 
    def test_nodes_with_changeset(self):
 
        hex = '2a13f185e4525f9d4b59882791a2d397b90d5ddc'
 
        chset = self.repo.get_changeset(hex)
 
        root = chset.root
 
        docs = root.get_node('docs')
 
        self.assertTrue(docs is chset.get_node('docs'))
 
        api = docs.get_node('api')
 
        self.assertTrue(api is chset.get_node('docs/api'))
 
        index = api.get_node('index.rst')
 
        self.assertTrue(index is chset.get_node('docs/api/index.rst'))
 
        self.assertTrue(index is chset.get_node('docs')\
 
            .get_node('api')\
 
            .get_node('index.rst'))
 

	
 
    def test_branch_and_tags(self):
 
        """
 
        rev0 = self.repo.revisions[0]
 
        chset0 = self.repo.get_changeset(rev0)
 
        self.assertEqual(chset0.branch, 'master')
 
        self.assertEqual(chset0.tags, [])
 

	
 
        rev10 = self.repo.revisions[10]
 
        chset10 = self.repo.get_changeset(rev10)
 
        self.assertEqual(chset10.branch, 'master')
 
        self.assertEqual(chset10.tags, [])
 

	
 
        rev44 = self.repo.revisions[44]
 
        chset44 = self.repo.get_changeset(rev44)
 
        self.assertEqual(chset44.branch, 'web-branch')
 

	
 
        tip = self.repo.get_changeset('tip')
 
        self.assertTrue('tip' in tip.tags)
 
        """
 
        # Those tests would fail - branches are now going
 
        # to be changed at main API in order to support git backend
 
        pass
 

	
 
    def _test_slices(self, limit, offset):
 
        count = self.repo.count()
 
        changesets = self.repo.get_changesets(limit=limit, offset=offset)
 
        idx = 0
 
        for changeset in changesets:
 
            rev = offset + idx
 
            idx += 1
 
            rev_id = self.repo.revisions[rev]
 
            if idx > limit:
 
                self.fail("Exceeded limit already (getting revision %s, "
 
                    "there are %s total revisions, offset=%s, limit=%s)"
 
                    % (rev_id, count, offset, limit))
 
            self.assertEqual(changeset, self.repo.get_changeset(rev_id))
 
        result = list(self.repo.get_changesets(limit=limit, offset=offset))
 
        start = offset
 
        end = limit and offset + limit or None
 
        sliced = list(self.repo[start:end])
 
        self.failUnlessEqual(result, sliced,
 
            msg="Comparison failed for limit=%s, offset=%s"
 
            "(get_changeset returned: %s and sliced: %s"
 
            % (limit, offset, result, sliced))
 

	
 
    def _test_file_size(self, revision, path, size):
 
        node = self.repo.get_changeset(revision).get_node(path)
 
        self.assertTrue(node.is_file())
 
        self.assertEqual(node.size, size)
 

	
 
    def test_file_size(self):
 
        to_check = (
 
            ('c1214f7e79e02fc37156ff215cd71275450cffc3',
 
                'vcs/backends/BaseRepository.py', 502),
 
            ('d7e0d30fbcae12c90680eb095a4f5f02505ce501',
 
                'vcs/backends/hg.py', 854),
 
            ('6e125e7c890379446e98980d8ed60fba87d0f6d1',
 
                'setup.py', 1068),
 

	
 
            ('d955cd312c17b02143c04fa1099a352b04368118',
 
                'vcs/backends/base.py', 2921),
 
            ('ca1eb7957a54bce53b12d1a51b13452f95bc7c7e',
 
                'vcs/backends/base.py', 3936),
 
            ('f50f42baeed5af6518ef4b0cb2f1423f3851a941',
 
                'vcs/backends/base.py', 6189),
 
        )
 
        for revision, path, size in to_check:
 
            self._test_file_size(revision, path, size)
 

	
 
    def test_file_history(self):
 
        # we can only check if those revisions are present in the history
 
        # as we cannot update this test every time file is changed
 
        files = {
 
            'setup.py': [
 
                '54386793436c938cff89326944d4c2702340037d',
 
                '51d254f0ecf5df2ce50c0b115741f4cf13985dab',
 
                '998ed409c795fec2012b1c0ca054d99888b22090',
 
                '5e0eb4c47f56564395f76333f319d26c79e2fb09',
 
                '0115510b70c7229dbc5dc49036b32e7d91d23acd',
 
                '7cb3fd1b6d8c20ba89e2264f1c8baebc8a52d36e',
 
                '2a13f185e4525f9d4b59882791a2d397b90d5ddc',
 
                '191caa5b2c81ed17c0794bf7bb9958f4dcb0b87e',
 
                'ff7ca51e58c505fec0dd2491de52c622bb7a806b',
 
            ],
 
            'vcs/nodes.py': [
 
                '33fa3223355104431402a888fa77a4e9956feb3e',
 
                'fa014c12c26d10ba682fadb78f2a11c24c8118e1',
 
                'e686b958768ee96af8029fe19c6050b1a8dd3b2b',
 
                'ab5721ca0a081f26bf43d9051e615af2cc99952f',
 
                'c877b68d18e792a66b7f4c529ea02c8f80801542',
 
                '4313566d2e417cb382948f8d9d7c765330356054',
 
                '6c2303a793671e807d1cfc70134c9ca0767d98c2',
 
                '54386793436c938cff89326944d4c2702340037d',
 
                '54000345d2e78b03a99d561399e8e548de3f3203',
 
                '1c6b3677b37ea064cb4b51714d8f7498f93f4b2b',
 
                '2d03ca750a44440fb5ea8b751176d1f36f8e8f46',
 
                '2a08b128c206db48c2f0b8f70df060e6db0ae4f8',
 
                '30c26513ff1eb8e5ce0e1c6b477ee5dc50e2f34b',
 
                'ac71e9503c2ca95542839af0ce7b64011b72ea7c',
 
                '12669288fd13adba2a9b7dd5b870cc23ffab92d2',
 
                '5a0c84f3e6fe3473e4c8427199d5a6fc71a9b382',
 
                '12f2f5e2b38e6ff3fbdb5d722efed9aa72ecb0d5',
 
                '5eab1222a7cd4bfcbabc218ca6d04276d4e27378',
 
                'f50f42baeed5af6518ef4b0cb2f1423f3851a941',
 
                'd7e390a45f6aa96f04f5e7f583ad4f867431aa25',
 
                'f15c21f97864b4f071cddfbf2750ec2e23859414',
 
                'e906ef056cf539a4e4e5fc8003eaf7cf14dd8ade',
 
                'ea2b108b48aa8f8c9c4a941f66c1a03315ca1c3b',
 
                '84dec09632a4458f79f50ddbbd155506c460b4f9',
 
                '0115510b70c7229dbc5dc49036b32e7d91d23acd',
 
                '2a13f185e4525f9d4b59882791a2d397b90d5ddc',
 
                '3bf1c5868e570e39569d094f922d33ced2fa3b2b',
 
                'b8d04012574729d2c29886e53b1a43ef16dd00a1',
 
                '6970b057cffe4aab0a792aa634c89f4bebf01441',
 
                'dd80b0f6cf5052f17cc738c2951c4f2070200d7f',
 
                'ff7ca51e58c505fec0dd2491de52c622bb7a806b',
 
            ],
 
            'vcs/backends/git.py': [
 
                '4cf116ad5a457530381135e2f4c453e68a1b0105',
 
                '9a751d84d8e9408e736329767387f41b36935153',
 
                'cb681fb539c3faaedbcdf5ca71ca413425c18f01',
 
                '428f81bb652bcba8d631bce926e8834ff49bdcc6',
 
                '180ab15aebf26f98f714d8c68715e0f05fa6e1c7',
 
                '2b8e07312a2e89e92b90426ab97f349f4bce2a3a',
 
                '50e08c506174d8645a4bb517dd122ac946a0f3bf',
 
                '54000345d2e78b03a99d561399e8e548de3f3203',
 
            ],
 
        }
 
        for path, revs in files.items():
 
            node = self.repo.get_changeset(revs[0]).get_node(path)
 
            node_revs = [chset.raw_id for chset in node.history]
 
            self.assertTrue(set(revs).issubset(set(node_revs)),
 
                "We assumed that %s is subset of revisions for which file %s "
 
                "has been changed, and history of that node returned: %s"
 
                % (revs, path, node_revs))
 

	
 
    def test_file_annotate(self):
 
        files = {
 
            'vcs/backends/__init__.py': {
 
                'c1214f7e79e02fc37156ff215cd71275450cffc3': {
 
                    'lines_no': 1,
 
                    'changesets': [
 
                        'c1214f7e79e02fc37156ff215cd71275450cffc3',
 
                    ],
 
                },
 
                '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647': {
 
                    'lines_no': 21,
 
                    'changesets': [
 
                        '49d3fd156b6f7db46313fac355dca1a0b94a0017',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                    ],
 
                },
 
                'e29b67bd158580fc90fc5e9111240b90e6e86064': {
 
                    'lines_no': 32,
 
                    'changesets': [
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '5eab1222a7cd4bfcbabc218ca6d04276d4e27378',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '54000345d2e78b03a99d561399e8e548de3f3203',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '78c3f0c23b7ee935ec276acb8b8212444c33c396',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '2a13f185e4525f9d4b59882791a2d397b90d5ddc',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '78c3f0c23b7ee935ec276acb8b8212444c33c396',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '992f38217b979d0b0987d0bae3cc26dac85d9b19',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                        '16fba1ae9334d79b66d7afed2c2dfbfa2ae53647',
 
                    ],
 
                },
 
            },
 
        }
 

	
 
        for fname, revision_dict in files.items():
 
            for rev, data in revision_dict.items():
 
                cs = self.repo.get_changeset(rev)
 

	
 
                l1_1 = [x[1] for x in cs.get_file_annotate(fname)]
 
                l1_2 = [x[2]().raw_id for x in cs.get_file_annotate(fname)]
 
                self.assertEqual(l1_1, l1_2)
 
                l1 = l1_1
 
                l2 = files[fname][rev]['changesets']
 
                self.assertTrue(l1 == l2 , "The lists of revision for %s@rev %s"
 
                                "from annotation list should match each other, "
 
                                "got \n%s \nvs \n%s " % (fname, rev, l1, l2))
 

	
 
    def test_files_state(self):
 
        """
 
        Tests state of FileNodes.
 
        """
 
        node = self.repo\
 
            .get_changeset('e6ea6d16e2f26250124a1f4b4fe37a912f9d86a0')\
 
            .get_node('vcs/utils/diffs.py')
 
        self.assertTrue(node.state, NodeState.ADDED)
 
        self.assertTrue(node.added)
 
        self.assertFalse(node.changed)
 
        self.assertFalse(node.not_changed)
 
        self.assertFalse(node.removed)
 

	
 
        node = self.repo\
 
            .get_changeset('33fa3223355104431402a888fa77a4e9956feb3e')\
 
            .get_node('.hgignore')
 
        self.assertTrue(node.state, NodeState.CHANGED)
 
        self.assertFalse(node.added)
 
        self.assertTrue(node.changed)
 
        self.assertFalse(node.not_changed)
 
        self.assertFalse(node.removed)
 

	
 
        node = self.repo\
 
            .get_changeset('e29b67bd158580fc90fc5e9111240b90e6e86064')\
 
            .get_node('setup.py')
 
        self.assertTrue(node.state, NodeState.NOT_CHANGED)
 
        self.assertFalse(node.added)
 
        self.assertFalse(node.changed)
 
        self.assertTrue(node.not_changed)
 
        self.assertFalse(node.removed)
 

	
 
        # If node has REMOVED state then trying to fetch it would raise
 
        # ChangesetError exception
 
        chset = self.repo.get_changeset(
 
            'fa6600f6848800641328adbf7811fd2372c02ab2')
 
        path = 'vcs/backends/BaseRepository.py'
 
        self.assertRaises(NodeDoesNotExistError, chset.get_node, path)
 
        # but it would be one of ``removed`` (changeset's attribute)
 
        self.assertTrue(path in [rf.path for rf in chset.removed])
 

	
 
        chset = self.repo.get_changeset(
 
            '54386793436c938cff89326944d4c2702340037d')
 
        changed = ['setup.py', 'tests/test_nodes.py', 'vcs/backends/hg.py',
 
            'vcs/nodes.py']
 
        self.assertEqual(set(changed), set([f.path for f in chset.changed]))
 

	
 
    def test_commit_message_is_unicode(self):
 
        for cs in self.repo:
 
            self.assertEqual(type(cs.message), unicode)
 

	
 
    def test_changeset_author_is_unicode(self):
 
        for cs in self.repo:
 
            self.assertEqual(type(cs.author), unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
        changeset = self.repo.get_changeset()
 
        for node in changeset.get_node('/'):
 
            if node.is_file():
 
                self.assertEqual(type(node.content), unicode)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
 
        path = 'foo/bar/setup.py'
 
        tip = self.repo.get_changeset()
 
        self.assertRaises(VCSError, tip.get_node, path)
 

	
 
    def test_author_email(self):
 
        self.assertEqual('marcin@python-blog.com',
 
          self.repo.get_changeset('c1214f7e79e02fc37156ff215cd71275450cffc3')\
 
          .author_email)
 
        self.assertEqual('lukasz.balcerzak@python-center.pl',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b')\
 
          .author_email)
 
        self.assertEqual('none@none',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992')\
 
          .author_email)
 

	
 
    def test_author_username(self):
 
        self.assertEqual('Marcin Kuzminski',
 
          self.repo.get_changeset('c1214f7e79e02fc37156ff215cd71275450cffc3')\
 
          .author_name)
 
        self.assertEqual('Lukasz Balcerzak',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b')\
 
          .author_name)
 
        self.assertEqual('marcink',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992')\
 
          .author_name)
 

	
 

	
 
class GitSpecificTest(unittest.TestCase):
 

	
 
    def test_error_is_raised_for_added_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
 
        with self.assertRaises(VCSError):
 
            changeset.added
 

	
 
    def test_error_is_raised_for_changed_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
 
        with self.assertRaises(VCSError):
 
            changeset.added
 

	
 
    def test_error_is_raised_for_removed_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
 
        with self.assertRaises(VCSError):
 
            changeset.added
 

	
 

	
 
class GitSpecificWithRepoTest(_BackendTestMixin, unittest.TestCase):
 
    backend_alias = 'git'
 

	
 
    @classmethod
 
    def _get_commits(cls):
 
        return [
 
            {
 
                'message': 'Initial',
 
                'author': 'Joe Doe <joe.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 20),
 
                'added': [
 
                    FileNode('foobar/static/js/admin/base.js', content='base'),
 
                    FileNode('foobar/static/admin', content='admin',
 
                        mode=0120000), # this is a link
 
                    FileNode('foo', content='foo'),
 
                ],
 
            },
 
            {
 
                'message': 'Second',
 
                'author': 'Joe Doe <joe.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 22),
 
                'added': [
 
                    FileNode('foo2', content='foo2'),
 
                ],
 
            },
 
        ]
 

	
 
    def test_paths_slow_traversing(self):
 
        cs = self.repo.get_changeset()
 
        self.assertEqual(cs.get_node('foobar').get_node('static').get_node('js')
 
            .get_node('admin').get_node('base.js').content, 'base')
 

	
 
    def test_paths_fast_traversing(self):
 
        cs = self.repo.get_changeset()
 
        self.assertEqual(cs.get_node('foobar/static/js/admin/base.js').content,
 
            'base')
 

	
 
    def test_workdir_get_branch(self):
 
        self.repo.run_git_command('checkout -b production')
 
        self.repo.run_git_command(['checkout', '-b', 'production'])
 
        # Regression test: one of following would fail if we don't check
 
        # .git/HEAD file
 
        self.repo.run_git_command('checkout production')
 
        self.repo.run_git_command(['checkout', 'production'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'production')
 
        self.repo.run_git_command('checkout master')
 
        self.repo.run_git_command(['checkout', 'master'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'master')
 

	
 
    def test_get_diff_runs_git_command_with_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' %
 
            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_str_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
            'show -U%s --full-index --binary -p -M --abbrev=40 %s' %
 
            (3, self.repo._get_revision(1)))
 
            ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_path_if_its_given(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1, 'foo')
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"'
 
            % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo'])
 

	
 

	
 
class GitRegressionTest(_BackendTestMixin, unittest.TestCase):
 
    backend_alias = 'git'
 

	
 
    @classmethod
 
    def _get_commits(cls):
 
        return [
 
            {
 
                'message': 'Initial',
 
                'author': 'Joe Doe <joe.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 20),
 
                'added': [
 
                    FileNode('bot/__init__.py', content='base'),
 
                    FileNode('bot/templates/404.html', content='base'),
 
                    FileNode('bot/templates/500.html', content='base'),
 
                ],
 
            },
 
            {
 
                'message': 'Second',
 
                'author': 'Joe Doe <joe.doe@example.com>',
 
                'date': datetime.datetime(2010, 1, 1, 22),
 
                'added': [
 
                    FileNode('bot/build/migrations/1.py', content='foo2'),
 
                    FileNode('bot/build/migrations/2.py', content='foo2'),
 
                    FileNode('bot/build/static/templates/f.html', content='foo2'),
 
                    FileNode('bot/build/static/templates/f1.html', content='foo2'),
 
                    FileNode('bot/build/templates/err.html', content='foo2'),
 
                    FileNode('bot/build/templates/err2.html', content='foo2'),
 
                ],
 
            },
 
        ]
 

	
 
    def test_similar_paths(self):
 
        cs = self.repo.get_changeset()
 
        paths = lambda *n:[x.path for x in n]
 
        self.assertEqual(paths(*cs.get_nodes('bot')), ['bot/build', 'bot/templates', 'bot/__init__.py'])
 
        self.assertEqual(paths(*cs.get_nodes('bot/build')), ['bot/build/migrations', 'bot/build/static', 'bot/build/templates'])
 
        self.assertEqual(paths(*cs.get_nodes('bot/build/static')), ['bot/build/static/templates'])
 
        # this get_nodes below causes troubles !
 
        self.assertEqual(paths(*cs.get_nodes('bot/build/static/templates')), ['bot/build/static/templates/f.html', 'bot/build/static/templates/f1.html'])
 
        self.assertEqual(paths(*cs.get_nodes('bot/build/templates')), ['bot/build/templates/err.html', 'bot/build/templates/err2.html'])
 
        self.assertEqual(paths(*cs.get_nodes('bot/templates/')), ['bot/templates/404.html', 'bot/templates/500.html'])
 

	
 
if __name__ == '__main__':
 
    unittest.main()
0 comments (0 inline, 0 general)