Changeset - e51ad2cd400e
kallithea/bin/kallithea_cli_repo.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
This file was forked by the Kallithea project in July 2014 and later moved.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Feb 9, 2013
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import datetime
 
import os
 
import re
 
import shutil
 

	
 
import click
 

	
 
import kallithea.bin.kallithea_cli_base as cli_base
 
from kallithea.lib.utils import REMOVED_REPO_PAT, repo2db_mapper
 
from kallithea.lib.utils2 import ask_ok, safe_str
 
from kallithea.lib.utils2 import ask_ok
 
from kallithea.model.db import Repository, Ui
 
from kallithea.model.meta import Session
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
@cli_base.register_command(config_file_initialize_app=True)
 
@click.option('--remove-missing', is_flag=True,
 
        help='Remove missing repositories from the Kallithea database.')
 
def repo_scan(remove_missing):
 
    """Scan filesystem for repositories.
 

	
 
    Search the configured repository root for new repositories and add them
 
    into Kallithea.
 
    Additionally, report repositories that were previously known to Kallithea
 
    but are no longer present on the filesystem. If option --remove-missing is
 
    given, remove the missing repositories from the Kallithea database.
 
    """
 
    click.echo('Now scanning root location for new repos ...')
 
    added, removed = repo2db_mapper(ScmModel().repo_scan(),
 
                                    remove_obsolete=remove_missing)
 
    click.echo('Scan completed.')
 
    if added:
 
        click.echo('Added: %s' % ', '.join(added))
 
    if removed:
 
        click.echo('%s: %s' % ('Removed' if remove_missing else 'Missing',
 
                          ', '.join(removed)))
 

	
 
@cli_base.register_command(config_file_initialize_app=True)
 
@click.argument('repositories', nargs=-1)
 
def repo_update_metadata(repositories):
 
    """
 
    Update repository metadata in database from repository content.
 

	
 
    In normal operation, Kallithea will keep caches up-to-date
 
    automatically. However, if repositories are externally modified, e.g. by
 
    a direct push via the filesystem rather than via a Kallithea URL,
 
    Kallithea is not aware of it. In this case, you should manually run this
 
    command to update the repository cache.
 

	
 
    If no repositories are specified, the caches of all repositories are
 
    updated.
 
    """
 
    if not repositories:
 
        repo_list = Repository.query().all()
 
    else:
 
        repo_names = [n.strip() for n in repositories]
 
        repo_list = list(Repository.query()
 
                        .filter(Repository.repo_name.in_(repo_names)))
 

	
 
    for repo in repo_list:
 
        # update latest revision metadata in database
 
        repo.update_changeset_cache()
 
        # invalidate in-memory VCS object cache... will be repopulated on
 
        # first access
 
        repo.set_invalidate()
 

	
 
    Session().commit()
 

	
 
    click.echo('Updated database with information about latest change in the following %s repositories:' % (len(repo_list)))
 
    click.echo('\n'.join(repo.repo_name for repo in repo_list))
 

	
 
@cli_base.register_command(config_file_initialize_app=True)
 
@click.option('--ask/--no-ask', default=True, help='Ask for confirmation or not. Default is --ask.')
 
@click.option('--older-than',
 
        help="""Only purge repositories that have been removed at least the given time ago.
 
        For example, '--older-than=30d' purges repositories deleted 30 days ago or longer.
 
        Possible suffixes: d (days), h (hours), m (minutes), s (seconds).""")
 
def repo_purge_deleted(ask, older_than):
 
    """Purge backups of deleted repositories.
 

	
 
    When a repository is deleted via the Kallithea web interface, the actual
 
    data is still present on the filesystem but set aside using a special name.
 
    This command allows to delete these files permanently.
 
    """
 
    def _parse_older_than(val):
 
        regex = re.compile(r'((?P<days>\d+?)d)?((?P<hours>\d+?)h)?((?P<minutes>\d+?)m)?((?P<seconds>\d+?)s)?')
 
        parts = regex.match(val)
 
        if not parts:
 
            return
 
        parts = parts.groupdict()
 
        time_params = {}
 
        for name, param in parts.items():
 
            if param:
 
                time_params[name] = int(param)
 
        return datetime.timedelta(**time_params)
 

	
 
    def _extract_date(name):
 
        """
 
        Extract the date part from rm__<date> pattern of removed repos,
 
        and convert it to datetime object
 

	
 
        :param name:
 
        """
 
        date_part = name[4:19]  # 4:19 since we don't parse milliseconds
 
        return datetime.datetime.strptime(date_part, '%Y%m%d_%H%M%S')
 

	
 
    repos_location = Ui.get_repos_location()
 
    to_remove = []
 
    for dn_, dirs, f in os.walk(safe_str(repos_location)):
 
    for dn_, dirs, f in os.walk(repos_location):
 
        alldirs = list(dirs)
 
        del dirs[:]
 
        if ('.hg' in alldirs or
 
            '.git' in alldirs or
 
            '.svn' in alldirs or
 
            'objects' in alldirs and ('refs' in alldirs or 'packed-refs' in f)
 
        ):
 
            continue
 
        for loc in alldirs:
 
            if REMOVED_REPO_PAT.match(loc):
 
                to_remove.append([os.path.join(dn_, loc),
 
                                  _extract_date(loc)])
 
            else:
 
                dirs.append(loc)
 
        if dirs:
 
            click.echo('Scanning: %s' % dn_)
 

	
 
    if not to_remove:
 
        click.echo('There are no deleted repositories.')
 
        return
 

	
 
    # filter older than (if present)!
 
    if older_than:
 
        now = datetime.datetime.now()
 
        to_remove_filtered = []
 
        older_than_date = _parse_older_than(older_than)
 
        for name, date_ in to_remove:
 
            repo_age = now - date_
 
            if repo_age > older_than_date:
 
                to_remove_filtered.append([name, date_])
 

	
 
        to_remove = to_remove_filtered
 

	
 
        if not to_remove:
 
            click.echo('There are no deleted repositories older than %s (%s)'
 
                    % (older_than, older_than_date))
 
            return
 

	
 
        click.echo('Considering %s deleted repositories older than %s (%s).'
 
            % (len(to_remove), older_than, older_than_date))
 
    else:
 
        click.echo('Considering %s deleted repositories.' % len(to_remove))
 

	
 
    if not ask:
 
        remove = True
 
    else:
 
        remove = ask_ok('The following repositories will be removed completely:\n%s\n'
 
                'Do you want to proceed? [y/n] '
 
                % '\n'.join(['%s deleted on %s' % (safe_str(x[0]), safe_str(x[1]))
 
                                     for x in to_remove]))
 
            'Do you want to proceed? [y/n] ' %
 
            '\n'.join('%s deleted on %s' % (path, date_) for path, date_ in to_remove))
 

	
 
    if remove:
 
        for path, date_ in to_remove:
 
            click.echo('Purging repository %s' % path)
 
            shutil.rmtree(path)
 
    else:
 
        click.echo('Nothing done, exiting...')
kallithea/controllers/api/__init__.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.api
 
~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
JSON RPC controller
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Aug 20, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import inspect
 
import itertools
 
import logging
 
import time
 
import traceback
 
import types
 

	
 
from tg import Response, TGController, request, response
 
from webob.exc import HTTPError, HTTPException
 

	
 
from kallithea.lib import ext_json
 
from kallithea.lib.auth import AuthUser
 
from kallithea.lib.base import _get_ip_addr as _get_ip
 
from kallithea.lib.base import get_path_info
 
from kallithea.lib.utils2 import ascii_bytes, safe_str
 
from kallithea.lib.utils2 import ascii_bytes
 
from kallithea.model.db import User
 

	
 

	
 
log = logging.getLogger('JSONRPC')
 

	
 

	
 
class JSONRPCError(BaseException):
 

	
 
    def __init__(self, message):
 
        self.message = message
 
        super(JSONRPCError, self).__init__()
 

	
 
    def __str__(self):
 
        return safe_str(self.message)
 
        return self.message
 

	
 

	
 
class JSONRPCErrorResponse(Response, HTTPException):
 
    """
 
    Generate a Response object with a JSON-RPC error body
 
    """
 

	
 
    def __init__(self, message=None, retid=None, code=None):
 
        HTTPException.__init__(self, message, self)
 
        Response.__init__(self,
 
                          json_body=dict(id=retid, result=None, error=message),
 
                          status=code,
 
                          content_type='application/json')
 

	
 

	
 
class JSONRPCController(TGController):
 
    """
 
     A WSGI-speaking JSON-RPC controller class
 

	
 
     See the specification:
 
     <http://json-rpc.org/wiki/specification>`.
 

	
 
     Valid controller return values should be json-serializable objects.
 

	
 
     Sub-classes should catch their exceptions and raise JSONRPCError
 
     if they want to pass meaningful errors to the client.
 

	
 
     """
 

	
 
    def _get_ip_addr(self, environ):
 
        return _get_ip(environ)
 

	
 
    def _get_method_args(self):
 
        """
 
        Return `self._rpc_args` to dispatched controller method
 
        chosen by __call__
 
        """
 
        return self._rpc_args
 

	
 
    def _dispatch(self, state, remainder=None):
 
        """
 
        Parse the request body as JSON, look up the method on the
 
        controller and if it exists, dispatch to it.
 
        """
 
        # Since we are here we should respond as JSON
 
        response.content_type = 'application/json'
 

	
 
        environ = state.request.environ
 
        start = time.time()
 
        ip_addr = self._get_ip_addr(environ)
 
        self._req_id = None
 
        if 'CONTENT_LENGTH' not in environ:
 
            log.debug("No Content-Length")
 
            raise JSONRPCErrorResponse(retid=self._req_id,
 
                                       message="No Content-Length in request")
 
        else:
 
            length = environ['CONTENT_LENGTH'] or 0
 
            length = int(environ['CONTENT_LENGTH'])
 
            log.debug('Content-Length: %s', length)
 

	
 
        if length == 0:
 
            raise JSONRPCErrorResponse(retid=self._req_id,
 
                                       message="Content-Length is 0")
 

	
 
        raw_body = environ['wsgi.input'].read(length)
 

	
 
        try:
 
            json_body = ext_json.loads(raw_body)
 
        except ValueError as e:
 
            # catch JSON errors Here
 
            raise JSONRPCErrorResponse(retid=self._req_id,
 
                                       message="JSON parse error ERR:%s RAW:%r"
 
                                                % (e, raw_body))
 

	
 
        # check AUTH based on API key
 
        try:
 
            self._req_api_key = json_body['api_key']
 
            self._req_id = json_body['id']
 
            self._req_method = json_body['method']
 
            self._request_params = json_body['args']
 
            if not isinstance(self._request_params, dict):
 
                self._request_params = {}
 

	
 
            log.debug('method: %s, params: %s',
 
                      self._req_method, self._request_params)
 
        except KeyError as e:
 
            raise JSONRPCErrorResponse(retid=self._req_id,
 
                                       message='Incorrect JSON query missing %s' % e)
 

	
 
        # check if we can find this session using api_key
 
        try:
 
            u = User.get_by_api_key(self._req_api_key)
 
            auth_user = AuthUser.make(dbuser=u, ip_addr=ip_addr)
 
            if auth_user is None:
 
                raise JSONRPCErrorResponse(retid=self._req_id,
 
                                           message='Invalid API key')
kallithea/controllers/compare.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.compare
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
compare controller showing differences between two
 
repos, branches, bookmarks or tips
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: May 6, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import re
 

	
 
import mercurial.unionrepo
 
from tg import request
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPBadRequest, HTTPFound, HTTPNotFound
 

	
 
from kallithea.config.routing import url
 
from kallithea.controllers.changeset import _context_url, _ignorews_url
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, render
 
from kallithea.lib.graphmod import graph_data
 
from kallithea.lib.utils2 import ascii_bytes, ascii_str, safe_bytes, safe_int, safe_str
 
from kallithea.lib.utils2 import ascii_bytes, ascii_str, safe_bytes, safe_int
 
from kallithea.model.db import Repository
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class CompareController(BaseRepoController):
 

	
 
    def _before(self, *args, **kwargs):
 
        super(CompareController, self)._before(*args, **kwargs)
 

	
 
        # The base repository has already been retrieved.
 
        c.a_repo = c.db_repo
 

	
 
        # Retrieve the "changeset" repository (default: same as base).
 
        other_repo = request.GET.get('other_repo', None)
 
        if other_repo is None:
 
            c.cs_repo = c.a_repo
 
        else:
 
            c.cs_repo = Repository.get_by_repo_name(other_repo)
 
            if c.cs_repo is None:
 
                msg = _('Could not find other repository %s') % other_repo
 
                h.flash(msg, category='error')
 
                raise HTTPFound(location=url('compare_home', repo_name=c.a_repo.repo_name))
 

	
 
        # Verify that it's even possible to compare these two repositories.
 
        if c.a_repo.scm_instance.alias != c.cs_repo.scm_instance.alias:
 
            msg = _('Cannot compare repositories of different types')
 
            h.flash(msg, category='error')
 
            raise HTTPFound(location=url('compare_home', repo_name=c.a_repo.repo_name))
 

	
 
    @staticmethod
 
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from org_repo@org_rev
 
        to other_repo@other_rev
 
        ... and the other way
 
        ... and the ancestors that would be used for merge
 

	
 
        :param org_repo: repo object, that is most likely the original repo we forked from
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        ancestors = None
 
        if org_rev == other_rev:
 
            org_changesets = []
 
            other_changesets = []
 

	
 
        elif alias == 'hg':
 
            # case two independent repos
 
            if org_repo != other_repo:
 
                hgrepo = mercurial.unionrepo.makeunionrepository(other_repo.baseui,
 
                                                       safe_bytes(other_repo.path),
 
                                                       safe_bytes(org_repo.path))
 
                # all ancestors of other_rev will be in other_repo and
 
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
 

	
 
            # no remote compare do it on the same repository
 
            else:
 
                hgrepo = other_repo._repo
 

	
 
            ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                         hgrepo.revs(b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev))]
 
            if ancestors:
 
                log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev)
 
            else:
 
                log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev)
 
                ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                             hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev))] # FIXME: expensive!
 

	
 
            other_changesets = [
 
                other_repo.get_changeset(rev)
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(other_rev), ascii_bytes(org_rev), ascii_bytes(org_rev))
 
            ]
 
            org_changesets = [
 
                org_repo.get_changeset(ascii_str(hgrepo[rev].hex()))
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(org_rev), ascii_bytes(other_rev), ascii_bytes(other_rev))
 
            ]
 

	
 
        elif alias == 'git':
 
            if org_repo != other_repo:
 
                from dulwich.repo import Repo
 
                from dulwich.client import SubprocessGitClient
 

	
 
                gitrepo = Repo(org_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(safe_str(other_repo.path), gitrepo)
 
                SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
 

	
 
                gitrepo_remote = Repo(other_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(safe_str(org_repo.path), gitrepo_remote)
 
                SubprocessGitClient(thin_packs=False).fetch(org_repo.path, gitrepo_remote)
 

	
 
                revs = [
 
                    ascii_str(x.commit.id)
 
                    for x in gitrepo_remote.get_walker(include=[ascii_bytes(other_rev)],
 
                                                       exclude=[ascii_bytes(org_rev)])
 
                ]
 
                other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
 
                if other_changesets:
 
                    ancestors = [other_changesets[0].parents[0].raw_id]
 
                else:
 
                    # no changesets from other repo, ancestor is the other_rev
 
                    ancestors = [other_rev]
 

	
 
                gitrepo.close()
 
                gitrepo_remote.close()
 

	
 
            else:
 
                so = org_repo.run_git_command(
 
                    ['log', '--reverse', '--pretty=format:%H',
 
                     '-s', '%s..%s' % (org_rev, other_rev)]
 
                )
 
                other_changesets = [org_repo.get_changeset(cs)
 
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
                so = org_repo.run_git_command(
 
                    ['merge-base', org_rev, other_rev]
 
                )
 
                ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
 
            org_changesets = []
 

	
 
        else:
 
            raise Exception('Bad alias only git and hg is allowed')
 

	
 
        return other_changesets, org_changesets, ancestors
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def index(self, repo_name):
 
        c.compare_home = True
 
        c.a_ref_name = c.cs_ref_name = None
 
        return render('compare/compare_diff.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def compare(self, repo_name, org_ref_type, org_ref_name, other_ref_type, other_ref_name):
 
        org_ref_name = org_ref_name.strip()
 
        other_ref_name = other_ref_name.strip()
 

	
 
        # If merge is True:
 
        #   Show what org would get if merged with other:
 
        #   List changesets that are ancestors of other but not of org.
 
        #   New changesets in org is thus ignored.
 
        #   Diff will be from common ancestor, and merges of org to other will thus be ignored.
 
        # If merge is False:
 
        #   Make a raw diff from org to other, no matter if related or not.
 
        #   Changesets in one and not in the other will be ignored
 
        merge = bool(request.GET.get('merge'))
 
        # fulldiff disables cut_off_limit
 
        fulldiff = request.GET.get('fulldiff')
 
        # partial uses compare_cs.html template directly
 
        partial = request.environ.get('HTTP_X_PARTIAL_XHR')
 
        # is_ajax_preview puts hidden input field with changeset revisions
 
        c.is_ajax_preview = partial and request.GET.get('is_ajax_preview')
 
        # swap url for compare_diff page - never partial and never is_ajax_preview
 
        c.swap_url = h.url('compare_url',
 
            repo_name=c.cs_repo.repo_name,
 
            org_ref_type=other_ref_type, org_ref_name=other_ref_name,
 
            other_repo=c.a_repo.repo_name,
 
            other_ref_type=org_ref_type, other_ref_name=org_ref_name,
 
            merge=merge or '')
 

	
 
        # set callbacks for generating markup for icons
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 

	
 
        c.a_rev = self._get_ref_rev(c.a_repo, org_ref_type, org_ref_name,
 
            returnempty=True)
 
        c.cs_rev = self._get_ref_rev(c.cs_repo, other_ref_type, other_ref_name)
 

	
 
        c.compare_home = False
 
        c.a_ref_name = org_ref_name
 
        c.a_ref_type = org_ref_type
 
        c.cs_ref_name = other_ref_name
 
        c.cs_ref_type = other_ref_type
 

	
 
        c.cs_ranges, c.cs_ranges_org, c.ancestors = self._get_changesets(
 
            c.a_repo.scm_instance.alias, c.a_repo.scm_instance, c.a_rev,
 
            c.cs_repo.scm_instance, c.cs_rev)
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
        c.jsdata = graph_data(c.cs_repo.scm_instance, revs)
 

	
kallithea/controllers/files.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.files
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Files controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 21, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import os
 
import posixpath
 
import shutil
 
import tempfile
 
import traceback
 
from collections import OrderedDict
 

	
 
from tg import request, response
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPFound, HTTPNotFound
 

	
 
from kallithea.config.routing import url
 
from kallithea.controllers.changeset import _context_url, _ignorews_url, anchor_url, get_ignore_ws, get_line_ctx
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, jsonify, render
 
from kallithea.lib.exceptions import NonRelativePathError
 
from kallithea.lib.utils import action_logger
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_unicode, str2bool
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (
 
    ChangesetDoesNotExistError, ChangesetError, EmptyRepositoryError, ImproperArchiveTypeError, NodeAlreadyExistsError, NodeDoesNotExistError, NodeError, RepositoryError, VCSError)
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.model.db import Repository
 
from kallithea.model.repo import RepoModel
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class FilesController(BaseRepoController):
 

	
 
    def _before(self, *args, **kwargs):
 
        super(FilesController, self)._before(*args, **kwargs)
 

	
 
    def __get_cs(self, rev, silent_empty=False):
 
        """
 
        Safe way to get changeset if error occur it redirects to tip with
 
        proper message
 

	
 
        :param rev: revision to fetch
 
        :silent_empty: return None if repository is empty
 
        """
 

	
 
        try:
 
            return c.db_repo_scm_instance.get_changeset(rev)
 
        except EmptyRepositoryError as e:
 
            if silent_empty:
 
                return None
 
            url_ = url('files_add_home',
 
                       repo_name=c.repo_name,
 
                       revision=0, f_path='', anchor='edit')
 
            add_new = h.link_to(_('Click here to add new file'), url_, class_="alert-link")
 
            h.flash(_('There are no files yet.') + ' ' + add_new, category='warning')
 
            raise HTTPNotFound()
 
        except (ChangesetDoesNotExistError, LookupError):
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        except RepositoryError as e:
 
            h.flash(e, category='error')
 
            raise HTTPNotFound()
 

	
 
    def __get_filenode(self, cs, path):
 
        """
 
        Returns file_node or raise HTTP error.
 

	
 
        :param cs: given changeset
 
        :param path: path to lookup
 
        """
 

	
 
        try:
 
            file_node = cs.get_node(path)
 
            if file_node.is_dir():
 
                raise RepositoryError('given path is a directory')
 
        except ChangesetDoesNotExistError:
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        except RepositoryError as e:
 
            h.flash(e, category='error')
 
            raise HTTPNotFound()
 

	
 
        return file_node
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def index(self, repo_name, revision, f_path, annotate=False):
 
        # redirect to given revision from form if given
 
        post_revision = request.POST.get('at_rev', None)
 
        if post_revision:
 
            cs = self.__get_cs(post_revision) # FIXME - unused!
 

	
 
        c.revision = revision
 
        c.changeset = self.__get_cs(revision)
 
        c.branch = request.GET.get('branch', None)
 
        c.f_path = f_path
 
        c.annotate = annotate
 
        cur_rev = c.changeset.revision
 
        # used in files_source.html:
 
        c.cut_off_limit = self.cut_off_limit
 
        c.fulldiff = request.GET.get('fulldiff')
 

	
 
        # prev link
 
        try:
 
            prev_rev = c.db_repo_scm_instance.get_changeset(cur_rev).prev(c.branch)
 
            c.url_prev = url('files_home', repo_name=c.repo_name,
 
                         revision=prev_rev.raw_id, f_path=f_path)
 
            if c.branch:
 
                c.url_prev += '?branch=%s' % c.branch
 
        except (ChangesetDoesNotExistError, VCSError):
 
            c.url_prev = '#'
 

	
 
        # next link
 
        try:
 
            next_rev = c.db_repo_scm_instance.get_changeset(cur_rev).next(c.branch)
 
            c.url_next = url('files_home', repo_name=c.repo_name,
 
                     revision=next_rev.raw_id, f_path=f_path)
 
            if c.branch:
 
                c.url_next += '?branch=%s' % c.branch
 
        except (ChangesetDoesNotExistError, VCSError):
 
            c.url_next = '#'
 

	
 
        # files or dirs
 
        try:
 
            c.file = c.changeset.get_node(f_path)
 

	
 
            if c.file.is_submodule():
 
                raise HTTPFound(location=c.file.url)
 
            elif c.file.is_file():
 
                c.load_full_history = False
 
                # determine if we're on branch head
 
                _branches = c.db_repo_scm_instance.branches
 
                c.on_branch_head = revision in _branches or revision in _branches.values()
 
                _hist = []
 
                c.file_history = []
 
                if c.load_full_history:
 
                    c.file_history, _hist = self._get_node_history(c.changeset, f_path)
 

	
 
                c.authors = []
 
                for a in set([x.author for x in _hist]):
 
                    c.authors.append((h.email(a), h.person(a)))
 
            else:
 
                c.authors = c.file_history = []
 
        except RepositoryError as e:
 
            h.flash(e, category='error')
 
            raise HTTPNotFound()
 

	
 
        if request.environ.get('HTTP_X_PARTIAL_XHR'):
 
            return render('files/files_ypjax.html')
 

	
 
        # TODO: tags and bookmarks?
 
        c.revision_options = [(c.changeset.raw_id,
 
                              _('%s at %s') % (b, h.short_id(c.changeset.raw_id))) for b in c.changeset.branches] + \
 
            [(n, b) for b, n in c.db_repo_scm_instance.branches.items()]
 
        if c.db_repo_scm_instance.closed_branches:
 
            prefix = _('(closed)') + ' '
 
            c.revision_options += [('-', '-')] + \
 
                [(n, prefix + b) for b, n in c.db_repo_scm_instance.closed_branches.items()]
 

	
 
        return render('files/files.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def history(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 

	
 
            res = []
 
            for obj in file_history:
 
                res.append({
 
                    'text': obj[1],
 
                    'children': [{'id': o[0], 'text': o[1]} for o in obj[0]]
 
                })
 

	
 
            data = {
 
                'more': False,
 
                'results': res
 
            }
 
            return data
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def authors(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 
            c.authors = []
 
            for a in set([x.author for x in _hist]):
 
                c.authors.append((h.email(a), h.person(a)))
 
            return render('files/files_history_box.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def rawfile(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        response.content_disposition = 'attachment; filename=%s' % \
 
            safe_str(f_path.split(Repository.url_sep())[-1])
 
        response.content_disposition = \
 
            'attachment; filename=%s' % f_path.split(Repository.url_sep())[-1]
 

	
 
        response.content_type = file_node.mimetype
 
        return file_node.content
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def raw(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        raw_mimetype_mapping = {
 
            # map original mimetype to a mimetype used for "show as raw"
 
            # you can also provide a content-disposition to override the
 
            # default "attachment" disposition.
 
            # orig_type: (new_type, new_dispo)
 

	
 
            # show images inline:
 
            'image/x-icon': ('image/x-icon', 'inline'),
 
            'image/png': ('image/png', 'inline'),
 
            'image/gif': ('image/gif', 'inline'),
 
            'image/jpeg': ('image/jpeg', 'inline'),
 
            'image/svg+xml': ('image/svg+xml', 'inline'),
 
        }
 

	
 
        mimetype = file_node.mimetype
 
        try:
 
            mimetype, dispo = raw_mimetype_mapping[mimetype]
 
        except KeyError:
 
            # we don't know anything special about this, handle it safely
 
            if file_node.is_binary:
 
                # do same as download raw for binary files
 
                mimetype, dispo = 'application/octet-stream', 'attachment'
 
            else:
 
                # do not just use the original mimetype, but force text/plain,
 
                # otherwise it would serve text/html and that might be unsafe.
 
                # Note: underlying vcs library fakes text/plain mimetype if the
 
                # mimetype can not be determined and it thinks it is not
 
                # binary.This might lead to erroneous text display in some
 
                # cases, but helps in other cases, like with text files
 
                # without extension.
 
                mimetype, dispo = 'text/plain', 'inline'
 

	
 
        if dispo == 'attachment':
 
            dispo = 'attachment; filename=%s' % \
 
                        safe_str(f_path.split(os.sep)[-1])
 
            dispo = 'attachment; filename=%s' % f_path.split(os.sep)[-1]
 

	
 
        response.content_disposition = dispo
 
        response.content_type = mimetype
 
        return file_node.content
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('write')
 
    def delete(self, repo_name, revision, f_path):
 
        repo = c.db_repo
 
        # check if revision is a branch identifier- basically we cannot
 
        # create multiple heads via file editing
 
        _branches = repo.scm_instance.branches
 
        # check if revision is a branch name or branch hash
 
        if revision not in _branches and revision not in _branches.values():
 
            h.flash(_('You can only delete files with revision '
 
                      'being a valid branch'), category='warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip',
 
                                  f_path=f_path))
 

	
 
        r_post = request.POST
 

	
 
        c.cs = self.__get_cs(revision)
 
        c.file = self.__get_filenode(c.cs, f_path)
 

	
 
        c.default_message = _('Deleted file %s via Kallithea') % (f_path)
 
        c.f_path = f_path
 
        node_path = f_path
 
        author = request.authuser.full_contact
 

	
 
        if r_post:
 
            message = r_post.get('message') or c.default_message
 

	
 
            try:
 
                nodes = {
 
                    node_path: {
 
                        'content': ''
 
                    }
 
                }
 
                self.scm_model.delete_nodes(
 
                    user=request.authuser.user_id,
 
                    ip_addr=request.ip_addr,
 
                    repo=c.db_repo,
 
                    message=message,
 
                    nodes=nodes,
 
                    parent_cs=c.cs,
 
                    author=author,
 
                )
 

	
 
                h.flash(_('Successfully deleted file %s') % f_path,
 
                        category='success')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                h.flash(_('Error occurred during commit'), category='error')
 
            raise HTTPFound(location=url('changeset_home',
 
                                repo_name=c.repo_name, revision='tip'))
 

	
 
        return render('files/files_delete.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('write')
 
    def edit(self, repo_name, revision, f_path):
 
        repo = c.db_repo
 
        # check if revision is a branch identifier- basically we cannot
 
        # create multiple heads via file editing
 
        _branches = repo.scm_instance.branches
 
        # check if revision is a branch name or branch hash
 
        if revision not in _branches and revision not in _branches.values():
 
            h.flash(_('You can only edit files with revision '
 
                      'being a valid branch'), category='warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip',
 
                                  f_path=f_path))
 

	
 
        r_post = request.POST
 

	
 
        c.cs = self.__get_cs(revision)
 
        c.file = self.__get_filenode(c.cs, f_path)
 

	
 
        if c.file.is_binary:
 
            raise HTTPFound(location=url('files_home', repo_name=c.repo_name,
 
                            revision=c.cs.raw_id, f_path=f_path))
 
        c.default_message = _('Edited file %s via Kallithea') % (f_path)
 
        c.f_path = f_path
 

	
 
        if r_post:
 
            old_content = safe_unicode(c.file.content)
 
            sl = old_content.splitlines(1)
 
            first_line = sl[0] if sl else ''
 
            # modes:  0 - Unix, 1 - Mac, 2 - DOS
 
            mode = detect_mode(first_line, 0)
 
            content = convert_line_endings(r_post.get('content', ''), mode)
 

	
 
            message = r_post.get('message') or c.default_message
 
            author = request.authuser.full_contact
 

	
 
@@ -415,194 +414,193 @@ class FilesController(BaseRepoController
 
            message = r_post.get('message') or c.default_message
 
            filename = r_post.get('filename')
 
            location = r_post.get('location', '')
 
            file_obj = r_post.get('upload_file', None)
 

	
 
            if file_obj is not None and hasattr(file_obj, 'filename'):
 
                filename = file_obj.filename
 
                content = file_obj.file
 

	
 
                if hasattr(content, 'file'):
 
                    # non posix systems store real file under file attr
 
                    content = content.file
 

	
 
            if not content:
 
                h.flash(_('No content'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            if not filename:
 
                h.flash(_('No filename'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            # strip all crap out of file, just leave the basename
 
            filename = os.path.basename(filename)
 
            node_path = posixpath.join(location, filename)
 
            author = request.authuser.full_contact
 

	
 
            try:
 
                nodes = {
 
                    node_path: {
 
                        'content': content
 
                    }
 
                }
 
                self.scm_model.create_nodes(
 
                    user=request.authuser.user_id,
 
                    ip_addr=request.ip_addr,
 
                    repo=c.db_repo,
 
                    message=message,
 
                    nodes=nodes,
 
                    parent_cs=c.cs,
 
                    author=author,
 
                )
 

	
 
                h.flash(_('Successfully committed to %s') % node_path,
 
                        category='success')
 
            except NonRelativePathError as e:
 
                h.flash(_('Location must be relative path and must not '
 
                          'contain .. in path'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            except (NodeError, NodeAlreadyExistsError) as e:
 
                h.flash(_(e), category='error')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                h.flash(_('Error occurred during commit'), category='error')
 
            raise HTTPFound(location=url('changeset_home',
 
                                repo_name=c.repo_name, revision='tip'))
 

	
 
        return render('files/files_add.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def archivefile(self, repo_name, fname):
 
        fileformat = None
 
        revision = None
 
        ext = None
 
        subrepos = request.GET.get('subrepos') == 'true'
 

	
 
        for a_type, ext_data in settings.ARCHIVE_SPECS.items():
 
            archive_spec = fname.split(ext_data[1])
 
            if len(archive_spec) == 2 and archive_spec[1] == '':
 
                fileformat = a_type or ext_data[1]
 
                revision = archive_spec[0]
 
                ext = ext_data[1]
 

	
 
        try:
 
            dbrepo = RepoModel().get_by_repo_name(repo_name)
 
            if not dbrepo.enable_downloads:
 
                return _('Downloads disabled') # TODO: do something else?
 

	
 
            if c.db_repo_scm_instance.alias == 'hg':
 
                # patch and reset hooks section of UI config to not run any
 
                # hooks on fetching archives with subrepos
 
                for k, v in c.db_repo_scm_instance._repo.ui.configitems('hooks'):
 
                    c.db_repo_scm_instance._repo.ui.setconfig('hooks', k, None)
 

	
 
            cs = c.db_repo_scm_instance.get_changeset(revision)
 
            content_type = settings.ARCHIVE_SPECS[fileformat][0]
 
        except ChangesetDoesNotExistError:
 
            return _('Unknown revision %s') % revision
 
        except EmptyRepositoryError:
 
            return _('Empty repository')
 
        except (ImproperArchiveTypeError, KeyError):
 
            return _('Unknown archive type')
 

	
 
        from kallithea import CONFIG
 
        rev_name = cs.raw_id[:12]
 
        archive_name = '%s-%s%s' % (safe_str(repo_name.replace('/', '_')),
 
                                    safe_str(rev_name), ext)
 
        archive_name = '%s-%s%s' % (repo_name.replace('/', '_'), rev_name, ext)
 

	
 
        archive_path = None
 
        cached_archive_path = None
 
        archive_cache_dir = CONFIG.get('archive_cache_dir')
 
        if archive_cache_dir and not subrepos: # TODO: subrepo caching?
 
            if not os.path.isdir(archive_cache_dir):
 
                os.makedirs(archive_cache_dir)
 
            cached_archive_path = os.path.join(archive_cache_dir, archive_name)
 
            if os.path.isfile(cached_archive_path):
 
                log.debug('Found cached archive in %s', cached_archive_path)
 
                archive_path = cached_archive_path
 
            else:
 
                log.debug('Archive %s is not yet cached', archive_name)
 

	
 
        if archive_path is None:
 
            # generate new archive
 
            fd, archive_path = tempfile.mkstemp()
 
            log.debug('Creating new temp archive in %s', archive_path)
 
            with os.fdopen(fd, 'wb') as stream:
 
                cs.fill_archive(stream=stream, kind=fileformat, subrepos=subrepos)
 
                # stream (and thus fd) has been closed by cs.fill_archive
 
            if cached_archive_path is not None:
 
                # we generated the archive - move it to cache
 
                log.debug('Storing new archive in %s', cached_archive_path)
 
                shutil.move(archive_path, cached_archive_path)
 
                archive_path = cached_archive_path
 

	
 
        def get_chunked_archive(archive_path):
 
            stream = open(archive_path, 'rb')
 
            while True:
 
                data = stream.read(16 * 1024)
 
                if not data:
 
                    break
 
                yield data
 
            stream.close()
 
            if archive_path != cached_archive_path:
 
                log.debug('Destroying temp archive %s', archive_path)
 
                os.remove(archive_path)
 

	
 
        action_logger(user=request.authuser,
 
                      action='user_downloaded_archive:%s' % (archive_name),
 
                      repo=repo_name, ipaddr=request.ip_addr, commit=True)
 

	
 
        response.content_disposition = str('attachment; filename=%s' % (archive_name))
 
        response.content_type = str(content_type)
 
        return get_chunked_archive(archive_path)
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def diff(self, repo_name, f_path):
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        diff2 = request.GET.get('diff2', '')
 
        diff1 = request.GET.get('diff1', '') or diff2
 
        c.action = request.GET.get('diff')
 
        c.no_changes = diff1 == diff2
 
        c.f_path = f_path
 
        c.big_diff = False
 
        fulldiff = request.GET.get('fulldiff')
 
        c.anchor_url = anchor_url
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        c.changes = OrderedDict()
 
        c.changes[diff2] = []
 

	
 
        # special case if we want a show rev only, it's impl here
 
        # to reduce JS and callbacks
 

	
 
        if request.GET.get('show_rev'):
 
            if str2bool(request.GET.get('annotate', 'False')):
 
                _url = url('files_annotate_home', repo_name=c.repo_name,
 
                           revision=diff1, f_path=c.f_path)
 
            else:
 
                _url = url('files_home', repo_name=c.repo_name,
 
                           revision=diff1, f_path=c.f_path)
 

	
 
            raise HTTPFound(location=_url)
 
        try:
 
            if diff1 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_1 = c.db_repo_scm_instance.get_changeset(diff1)
 
                try:
 
                    node1 = c.changeset_1.get_node(f_path)
 
                    if node1.is_dir():
 
                        raise NodeError('%s path is a %s not a file'
 
                                        % (node1, type(node1)))
 
                except NodeDoesNotExistError:
 
                    c.changeset_1 = EmptyChangeset(cs=diff1,
 
                                                   revision=c.changeset_1.revision,
 
                                                   repo=c.db_repo_scm_instance)
 
                    node1 = FileNode(f_path, '', changeset=c.changeset_1)
 
            else:
 
                c.changeset_1 = EmptyChangeset(repo=c.db_repo_scm_instance)
 
                node1 = FileNode(f_path, '', changeset=c.changeset_1)
 

	
 
            if diff2 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_2 = c.db_repo_scm_instance.get_changeset(diff2)
kallithea/controllers/login.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.login
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Login controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 22, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import re
 

	
 
import formencode
 
from formencode import htmlfill
 
from tg import request, session
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPBadRequest, HTTPFound
 

	
 
import kallithea.lib.helpers as h
 
from kallithea.config.routing import url
 
from kallithea.lib.auth import AuthUser, HasPermissionAnyDecorator
 
from kallithea.lib.base import BaseController, log_in_user, render
 
from kallithea.lib.exceptions import UserCreationError
 
from kallithea.lib.utils2 import safe_str
 
from kallithea.model.db import Setting, User
 
from kallithea.model.forms import LoginForm, PasswordResetConfirmationForm, PasswordResetRequestForm, RegisterForm
 
from kallithea.model.meta import Session
 
from kallithea.model.user import UserModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class LoginController(BaseController):
 

	
 
    def _validate_came_from(self, came_from,
 
            _re=re.compile(r"/(?!/)[-!#$%&'()*+,./:;=?@_~0-9A-Za-z]*$")):
 
        """Return True if came_from is valid and can and should be used.
 

	
 
        Determines if a URI reference is valid and relative to the origin;
 
        or in RFC 3986 terms, whether it matches this production:
 

	
 
          origin-relative-ref = path-absolute [ "?" query ] [ "#" fragment ]
 

	
 
        with the exception that '%' escapes are not validated and '#' is
 
        allowed inside the fragment part.
 
        """
 
        return _re.match(came_from) is not None
 

	
 
    def index(self):
 
        c.came_from = safe_str(request.GET.get('came_from', ''))
 
        c.came_from = request.GET.get('came_from', '')
 
        if c.came_from:
 
            if not self._validate_came_from(c.came_from):
 
                log.error('Invalid came_from (not server-relative): %r', c.came_from)
 
                raise HTTPBadRequest()
 
        else:
 
            c.came_from = url('home')
 

	
 
        if request.POST:
 
            # import Login Form validator class
 
            login_form = LoginForm()()
 
            try:
 
                c.form_result = login_form.to_python(dict(request.POST))
 
                # form checks for username/password, now we're authenticated
 
                username = c.form_result['username']
 
                user = User.get_by_username_or_email(username, case_insensitive=True)
 
            except formencode.Invalid as errors:
 
                defaults = errors.value
 
                # remove password from filling in form again
 
                defaults.pop('password', None)
 
                return htmlfill.render(
 
                    render('/login.html'),
 
                    defaults=errors.value,
 
                    errors=errors.error_dict or {},
 
                    prefix_error=False,
 
                    encoding="UTF-8",
 
                    force_defaults=False)
 
            except UserCreationError as e:
 
                # container auth or other auth functions that create users on
 
                # the fly can throw this exception signaling that there's issue
 
                # with user creation, explanation should be provided in
 
                # Exception itself
 
                h.flash(e, 'error')
 
            else:
 
                auth_user = log_in_user(user, c.form_result['remember'], is_external_auth=False, ip_addr=request.ip_addr)
 
                # TODO: handle auth_user is None as failed authentication?
 
                raise HTTPFound(location=c.came_from)
 
        else:
 
            # redirect if already logged in
 
            if not request.authuser.is_anonymous:
 
                raise HTTPFound(location=c.came_from)
 
            # continue to show login to default user
 

	
 
        return render('/login.html')
 

	
 
    @HasPermissionAnyDecorator('hg.admin', 'hg.register.auto_activate',
 
                               'hg.register.manual_activate')
 
    def register(self):
 
        def_user_perms = AuthUser(dbuser=User.get_default_user()).permissions['global']
 
        c.auto_active = 'hg.register.auto_activate' in def_user_perms
 

	
 
        settings = Setting.get_app_settings()
 
        captcha_private_key = settings.get('captcha_private_key')
 
        c.captcha_active = bool(captcha_private_key)
 
        c.captcha_public_key = settings.get('captcha_public_key')
 

	
 
        if request.POST:
 
            register_form = RegisterForm()()
 
            try:
 
                form_result = register_form.to_python(dict(request.POST))
 
                form_result['active'] = c.auto_active
 

	
 
                if c.captcha_active:
 
                    from kallithea.lib.recaptcha import submit
 
                    response = submit(request.POST.get('g-recaptcha-response'),
 
                                      private_key=captcha_private_key,
 
                                      remoteip=request.ip_addr)
 
                    if not response.is_valid:
 
                        _value = form_result
 
                        _msg = _('Bad captcha')
 
                        error_dict = {'recaptcha_field': _msg}
 
                        raise formencode.Invalid(_msg, _value, None,
 
                                                 error_dict=error_dict)
 

	
 
                UserModel().create_registration(form_result)
 
                h.flash(_('You have successfully registered with %s') % (c.site_name or 'Kallithea'),
 
                        category='success')
 
                Session().commit()
 
                raise HTTPFound(location=url('login_home'))
 

	
 
            except formencode.Invalid as errors:
 
                return htmlfill.render(
 
                    render('/register.html'),
 
                    defaults=errors.value,
 
                    errors=errors.error_dict or {},
 
                    prefix_error=False,
 
                    encoding="UTF-8",
 
                    force_defaults=False)
 
            except UserCreationError as e:
 
                # container auth or other auth functions that create users on
 
                # the fly can throw this exception signaling that there's issue
 
                # with user creation, explanation should be provided in
 
                # Exception itself
 
                h.flash(e, 'error')
 

	
 
        return render('/register.html')
 

	
kallithea/controllers/pullrequests.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.pullrequests
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
pull requests controller for Kallithea for initializing pull requests
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: May 7, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import traceback
 

	
 
import formencode
 
import mercurial.unionrepo
 
from tg import request
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPBadRequest, HTTPForbidden, HTTPFound, HTTPNotFound
 

	
 
from kallithea.config.routing import url
 
from kallithea.controllers.changeset import _context_url, _ignorews_url, create_cs_pr_comment, delete_cs_pr_comment
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, jsonify, render
 
from kallithea.lib.graphmod import graph_data
 
from kallithea.lib.page import Page
 
from kallithea.lib.utils2 import ascii_bytes, safe_bytes, safe_int, safe_str
 
from kallithea.lib.utils2 import ascii_bytes, safe_bytes, safe_int
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, EmptyRepositoryError
 
from kallithea.model.changeset_status import ChangesetStatusModel
 
from kallithea.model.comment import ChangesetCommentsModel
 
from kallithea.model.db import ChangesetStatus, PullRequest, PullRequestReviewer, Repository, User
 
from kallithea.model.forms import PullRequestForm, PullRequestPostForm
 
from kallithea.model.meta import Session
 
from kallithea.model.pull_request import CreatePullRequestAction, CreatePullRequestIterationAction, PullRequestModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def _get_reviewer(user_id):
 
    """Look up user by ID and validate it as a potential reviewer."""
 
    try:
 
        user = User.get(int(user_id))
 
    except ValueError:
 
        user = None
 

	
 
    if user is None or user.is_default_user:
 
        h.flash(_('Invalid reviewer "%s" specified') % user_id, category='error')
 
        raise HTTPBadRequest()
 

	
 
    return user
 

	
 

	
 
class PullrequestsController(BaseRepoController):
 

	
 
    def _get_repo_refs(self, repo, rev=None, branch=None, branch_rev=None):
 
        """return a structure with repo's interesting changesets, suitable for
 
        the selectors in pullrequest.html
 

	
 
        rev: a revision that must be in the list somehow and selected by default
 
        branch: a branch that must be in the list and selected by default - even if closed
 
        branch_rev: a revision of which peers should be preferred and available."""
 
        # list named branches that has been merged to this named branch - it should probably merge back
 
        peers = []
 

	
 
        if rev:
 
            rev = safe_str(rev)
 

	
 
        if branch:
 
            branch = safe_str(branch)
 

	
 
        if branch_rev:
 
            # a revset not restricting to merge() would be better
 
            # (especially because it would get the branch point)
 
            # ... but is currently too expensive
 
            # including branches of children could be nice too
 
            peerbranches = set()
 
            for i in repo._repo.revs(
 
                b"sort(parents(branch(id(%s)) and merge()) - branch(id(%s)), -rev)",
 
                ascii_bytes(branch_rev), ascii_bytes(branch_rev),
 
            ):
 
                for abranch in repo.get_changeset(i).branches:
 
                    if abranch not in peerbranches:
 
                        n = 'branch:%s:%s' % (abranch, repo.get_changeset(abranch).raw_id)
 
                        peers.append((n, abranch))
 
                        peerbranches.add(abranch)
 

	
 
        selected = None
 
        tiprev = repo.tags.get('tip')
 
        tipbranch = None
 

	
 
        branches = []
 
        for abranch, branchrev in repo.branches.items():
 
            n = 'branch:%s:%s' % (abranch, branchrev)
 
            desc = abranch
 
            if branchrev == tiprev:
 
                tipbranch = abranch
 
                desc = '%s (current tip)' % desc
 
            branches.append((n, desc))
 
            if rev == branchrev:
 
                selected = n
 
            if branch == abranch:
 
                if not rev:
 
                    selected = n
 
                branch = None
 
        if branch:  # branch not in list - it is probably closed
 
            branchrev = repo.closed_branches.get(branch)
 
            if branchrev:
 
                n = 'branch:%s:%s' % (branch, branchrev)
 
                branches.append((n, _('%s (closed)') % branch))
 
                selected = n
 
                branch = None
 
            if branch:
 
                log.debug('branch %r not found in %s', branch, repo)
 

	
 
        bookmarks = []
 
        for bookmark, bookmarkrev in repo.bookmarks.items():
 
            n = 'book:%s:%s' % (bookmark, bookmarkrev)
 
            bookmarks.append((n, bookmark))
 
            if rev == bookmarkrev:
 
                selected = n
 

	
 
        tags = []
 
        for tag, tagrev in repo.tags.items():
 
            if tag == 'tip':
 
                continue
 
            n = 'tag:%s:%s' % (tag, tagrev)
 
            tags.append((n, tag))
 
            # note: even if rev == tagrev, don't select the static tag - it must be chosen explicitly
 

	
 
        # prio 1: rev was selected as existing entry above
 

	
 
        # prio 2: create special entry for rev; rev _must_ be used
 
        specials = []
 
        if rev and selected is None:
 
            selected = 'rev:%s:%s' % (rev, rev)
 
            specials = [(selected, '%s: %s' % (_("Changeset"), rev[:12]))]
 

	
 
        # prio 3: most recent peer branch
 
        if peers and not selected:
 
            selected = peers[0][0]
 

	
 
        # prio 4: tip revision
 
        if not selected:
 
            if h.is_hg(repo):
 
                if tipbranch:
 
                    selected = 'branch:%s:%s' % (tipbranch, tiprev)
 
                else:
 
                    selected = 'tag:null:' + repo.EMPTY_CHANGESET
 
                    tags.append((selected, 'null'))
 
            else:
 
                if 'master' in repo.branches:
 
                    selected = 'branch:master:%s' % repo.branches['master']
 
                else:
 
                    k, v = list(repo.branches.items())[0]
 
                    selected = 'branch:%s:%s' % (k, v)
 

	
 
        groups = [(specials, _("Special")),
 
                  (peers, _("Peer branches")),
 
                  (bookmarks, _("Bookmarks")),
 
                  (branches, _("Branches")),
 
                  (tags, _("Tags")),
 
                  ]
 
        return [g for g in groups if g[0]], selected
 

	
 
    def _is_allowed_to_change_status(self, pull_request):
 
        if pull_request.is_closed():
 
@@ -493,152 +487,152 @@ class PullrequestsController(BaseRepoCon
 
                root_parents = c.cs_ranges[0].parents
 
                c.is_range = cs_a in root_parents
 
                #c.merge_root = len(root_parents) > 1 # a range starting with a merge might deserve a warning
 
        except ChangesetDoesNotExistError: # probably because c.a_rev not found
 
            pass
 
        except IndexError: # probably because c.cs_ranges is empty, probably because revisions are missing
 
            pass
 

	
 
        avail_revs = set()
 
        avail_show = []
 
        c.cs_branch_name = c.cs_ref_name
 
        c.a_branch_name = None
 
        other_scm_instance = c.a_repo.scm_instance
 
        c.update_msg = ""
 
        c.update_msg_other = ""
 
        try:
 
            if not c.cs_ranges:
 
                c.update_msg = _('Error: changesets not found when displaying pull request from %s.') % c.cs_rev
 
            elif org_scm_instance.alias == 'hg' and c.a_ref_name != 'ancestor':
 
                if c.cs_ref_type != 'branch':
 
                    c.cs_branch_name = org_scm_instance.get_changeset(c.cs_ref_name).branch # use ref_type ?
 
                c.a_branch_name = c.a_ref_name
 
                if c.a_ref_type != 'branch':
 
                    try:
 
                        c.a_branch_name = other_scm_instance.get_changeset(c.a_ref_name).branch # use ref_type ?
 
                    except EmptyRepositoryError:
 
                        c.a_branch_name = 'null' # not a branch name ... but close enough
 
                # candidates: descendants of old head that are on the right branch
 
                #             and not are the old head itself ...
 
                #             and nothing at all if old head is a descendant of target ref name
 
                if not c.is_range and other_scm_instance._repo.revs('present(%s)::&%s', c.cs_ranges[-1].raw_id, c.a_branch_name):
 
                    c.update_msg = _('This pull request has already been merged to %s.') % c.a_branch_name
 
                elif c.pull_request.is_closed():
 
                    c.update_msg = _('This pull request has been closed and can not be updated.')
 
                else: # look for descendants of PR head on source branch in org repo
 
                    avail_revs = org_scm_instance._repo.revs('%s:: & branch(%s)',
 
                                                             revs[0], c.cs_branch_name)
 
                    if len(avail_revs) > 1: # more than just revs[0]
 
                        # also show changesets that not are descendants but would be merged in
 
                        targethead = other_scm_instance.get_changeset(c.a_branch_name).raw_id
 
                        if org_scm_instance.path != other_scm_instance.path:
 
                            # Note: org_scm_instance.path must come first so all
 
                            # valid revision numbers are 100% org_scm compatible
 
                            # - both for avail_revs and for revset results
 
                            hgrepo = mercurial.unionrepo.makeunionrepository(org_scm_instance.baseui,
 
                                                                   safe_bytes(org_scm_instance.path),
 
                                                                   safe_bytes(other_scm_instance.path))
 
                        else:
 
                            hgrepo = org_scm_instance._repo
 
                        show = set(hgrepo.revs('::%ld & !::parents(%s) & !::%s',
 
                                               avail_revs, revs[0], targethead))
 
                        if show:
 
                            c.update_msg = _('The following additional changes are available on %s:') % c.cs_branch_name
 
                        else:
 
                            c.update_msg = _('No additional changesets found for iterating on this pull request.')
 
                    else:
 
                        show = set()
 
                        avail_revs = set() # drop revs[0]
 
                        c.update_msg = _('No additional changesets found for iterating on this pull request.')
 

	
 
                    # TODO: handle branch heads that not are tip-most
 
                    brevs = org_scm_instance._repo.revs('%s - %ld - %s', c.cs_branch_name, avail_revs, revs[0])
 
                    if brevs:
 
                        # also show changesets that are on branch but neither ancestors nor descendants
 
                        show.update(org_scm_instance._repo.revs('::%ld - ::%ld - ::%s', brevs, avail_revs, c.a_branch_name))
 
                        show.add(revs[0]) # make sure graph shows this so we can see how they relate
 
                        c.update_msg_other = _('Note: Branch %s has another head: %s.') % (c.cs_branch_name,
 
                            h.short_id(org_scm_instance.get_changeset((max(brevs))).raw_id))
 

	
 
                    avail_show = sorted(show, reverse=True)
 

	
 
            elif org_scm_instance.alias == 'git':
 
                c.cs_repo.scm_instance.get_changeset(c.cs_rev) # check it exists - raise ChangesetDoesNotExistError if not
 
                c.update_msg = _("Git pull requests don't support iterating yet.")
 
        except ChangesetDoesNotExistError:
 
            c.update_msg = _('Error: some changesets not found when displaying pull request from %s.') % c.cs_rev
 

	
 
        c.avail_revs = avail_revs
 
        c.avail_cs = [org_scm_instance.get_changeset(r) for r in avail_show]
 
        c.avail_jsdata = graph_data(org_scm_instance, avail_show)
 

	
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        fulldiff = request.GET.get('fulldiff')
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        # we swap org/other ref since we run a simple diff on one repo
 
        log.debug('running diff between %s and %s in %s',
 
                  c.a_rev, c.cs_rev, org_scm_instance.path)
 
        try:
 
            raw_diff = diffs.get_diff(org_scm_instance, rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
            raw_diff = diffs.get_diff(org_scm_instance, rev1=c.a_rev, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace, context=line_context)
 
        except ChangesetDoesNotExistError:
 
            raw_diff = safe_bytes(_("The diff can't be shown - the PR revisions could not be found."))
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 

	
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            html_diff = diffs.as_html(enable_comments=True, parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, html_diff, st))
 

	
 
        # inline comments
 
        c.inline_cnt = 0
 
        c.inline_comments = cc_model.get_inline_comments(
 
                                c.db_repo.repo_id,
 
                                pull_request=pull_request_id)
 
        # count inline comments
 
        for __, lines in c.inline_comments:
 
            for comments in lines.values():
 
                c.inline_cnt += len(comments)
 
        # comments
 
        c.comments = cc_model.get_comments(c.db_repo.repo_id, pull_request=pull_request_id)
 

	
 
        # (badly named) pull-request status calculation based on reviewer votes
 
        (c.pull_request_reviewers,
 
         c.pull_request_pending_reviewers,
 
         c.current_voting_result,
 
         ) = cs_model.calculate_pull_request_result(c.pull_request)
 
        c.changeset_statuses = ChangesetStatus.STATUSES
 

	
 
        c.is_ajax_preview = False
 
        c.ancestors = None # [c.a_rev] ... but that is shown in an other way
 
        return render('/pullrequests/pullrequest_show.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def comment(self, repo_name, pull_request_id):
 
        pull_request = PullRequest.get_or_404(pull_request_id)
 
        allowed_to_change_status = self._is_allowed_to_change_status(pull_request)
 
        return create_cs_pr_comment(repo_name, pull_request=pull_request,
 
                allowed_to_change_status=allowed_to_change_status)
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def delete_comment(self, repo_name, comment_id):
 
        return delete_cs_pr_comment(repo_name, comment_id)
kallithea/controllers/search.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.search
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Search controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Aug 7, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import traceback
 

	
 
from tg import config, request
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from whoosh.index import EmptyIndexError, exists_in, open_dir
 
from whoosh.qparser import QueryParser, QueryParserError
 
from whoosh.query import Phrase, Prefix
 

	
 
from kallithea.lib.auth import LoginRequired
 
from kallithea.lib.base import BaseRepoController, render
 
from kallithea.lib.indexers import CHGSET_IDX_NAME, CHGSETS_SCHEMA, IDX_NAME, SCHEMA, WhooshResultWrapper
 
from kallithea.lib.page import Page
 
from kallithea.lib.utils2 import safe_int, safe_str
 
from kallithea.lib.utils2 import safe_int
 
from kallithea.model.repo import RepoModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class SearchController(BaseRepoController):
 

	
 
    @LoginRequired(allow_default_user=True)
 
    def index(self, repo_name=None):
 
        c.repo_name = repo_name
 
        c.formated_results = []
 
        c.runtime = ''
 
        c.cur_query = request.GET.get('q', None)
 
        c.cur_type = request.GET.get('type', 'content')
 
        c.cur_search = search_type = {'content': 'content',
 
                                      'commit': 'message',
 
                                      'path': 'path',
 
                                      'repository': 'repository'
 
                                      }.get(c.cur_type, 'content')
 

	
 
        index_name = {
 
            'content': IDX_NAME,
 
            'commit': CHGSET_IDX_NAME,
 
            'path': IDX_NAME
 
        }.get(c.cur_type, IDX_NAME)
 

	
 
        schema_defn = {
 
            'content': SCHEMA,
 
            'commit': CHGSETS_SCHEMA,
 
            'path': SCHEMA
 
        }.get(c.cur_type, SCHEMA)
 

	
 
        log.debug('IDX: %s', index_name)
 
        log.debug('SCHEMA: %s', schema_defn)
 

	
 
        if c.cur_query:
 
            cur_query = c.cur_query.lower()
 
            log.debug(cur_query)
 

	
 
        if c.cur_query:
 
            p = safe_int(request.GET.get('page'), 1)
 
            highlight_items = set()
 
            index_dir = config['index_dir']
 
            try:
 
                if not exists_in(index_dir, index_name):
 
                    raise EmptyIndexError
 
                idx = open_dir(index_dir, indexname=index_name)
 
                searcher = idx.searcher()
 

	
 
                qp = QueryParser(search_type, schema=schema_defn)
 
                if c.repo_name:
 
                    # use "repository_rawname:" instead of "repository:"
 
                    # for case-sensitive matching
 
                    cur_query = u'repository_rawname:%s %s' % (c.repo_name, cur_query)
 
                try:
 
                    query = qp.parse(unicode(cur_query))
 
                    # extract words for highlight
 
                    if isinstance(query, Phrase):
 
                        highlight_items.update(query.words)
 
                    elif isinstance(query, Prefix):
 
                        highlight_items.add(query.text)
 
                    else:
 
                        for i in query.all_terms():
 
                            if i[0] in ['content', 'message']:
 
                                highlight_items.add(i[1])
 

	
 
                    matcher = query.matcher(searcher)
 

	
 
                    log.debug('query: %s', query)
 
                    log.debug('hl terms: %s', highlight_items)
 
                    results = searcher.search(query)
 
                    res_ln = len(results)
 
                    c.runtime = '%s results (%.3f seconds)' % (
 
                        res_ln, results.runtime
 
                    )
 

	
 
                    repo_location = RepoModel().repos_path
 
                    c.formated_results = Page(
 
                        WhooshResultWrapper(search_type, searcher, matcher,
 
                                            highlight_items, repo_location),
 
                        page=p,
 
                        item_count=res_ln,
 
                        items_per_page=10,
 
                        type=safe_str(c.cur_type),
 
                        q=safe_str(c.cur_query),
 
                        type=c.cur_type,
 
                        q=c.cur_query,
 
                    )
 

	
 
                except QueryParserError:
 
                    c.runtime = _('Invalid search query. Try quoting it.')
 
                searcher.close()
 
            except EmptyIndexError:
 
                log.error("Empty search index - run 'kallithea-cli index-create' regularly")
 
                c.runtime = _('The server has no search index.')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                c.runtime = _('An error occurred during search operation.')
 

	
 
        # Return a rendered template
 
        return render('/search/search.html')
kallithea/lib/auth_modules/auth_container.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.auth_modules.auth_container
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Kallithea container based authentication plugin
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Created on Nov 17, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 

	
 
from kallithea.lib import auth_modules
 
from kallithea.lib.compat import hybrid_property
 
from kallithea.lib.utils2 import safe_str, str2bool
 
from kallithea.lib.utils2 import str2bool
 
from kallithea.model.db import Setting
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class KallitheaAuthPlugin(auth_modules.KallitheaExternalAuthPlugin):
 
    def __init__(self):
 
        pass
 

	
 
    @hybrid_property
 
    def name(self):
 
        return "container"
 

	
 
    @hybrid_property
 
    def is_container_auth(self):
 
        return True
 

	
 
    def settings(self):
 

	
 
        settings = [
 
            {
 
                "name": "header",
 
                "validator": self.validators.UnicodeString(strip=True, not_empty=True),
 
                "type": "string",
 
                "description": "Request header to extract the username from",
 
                "default": "REMOTE_USER",
 
                "formname": "Username header"
 
            },
 
            {
 
                "name": "email_header",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Optional request header to extract the email from",
 
                "default": "",
 
                "formname": "Email header"
 
            },
 
            {
 
                "name": "firstname_header",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Optional request header to extract the first name from",
 
                "default": "",
 
                "formname": "Firstname header"
 
            },
 
            {
 
                "name": "lastname_header",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Optional request header to extract the last name from",
 
                "default": "",
 
                "formname": "Lastname header"
 
            },
 
            {
 
                "name": "fallback_header",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Request header to extract the user from when main one fails",
 
                "default": "HTTP_X_FORWARDED_USER",
 
                "formname": "Fallback header"
 
            },
 
            {
 
                "name": "clean_username",
 
                "validator": self.validators.StringBoolean(if_missing=False),
 
                "type": "bool",
 
                "description": "Perform cleaning of user, if passed user has @ in username "
 
                               "then first part before @ is taken. "
 
                               "If there's \\ in the username only the part after \\ is taken",
 
                "default": "True",
 
                "formname": "Clean username"
 
            },
 
        ]
 
        return settings
 

	
 
    def use_fake_password(self):
 
        return True
 

	
 
    def _clean_username(self, username):
 
        # Removing realm and domain from username
 
        username = username.partition('@')[0]
 
        username = username.rpartition('\\')[2]
 
        return username
 

	
 
    def _get_username(self, environ, settings):
 
        username = None
 
        environ = environ or {}
 
        if not environ:
 
            log.debug('got empty environ: %s', environ)
 

	
 
        settings = settings or {}
 
        if settings.get('header'):
 
            header = settings.get('header')
 
            username = environ.get(header)
 
            log.debug('extracted %s:%s', header, username)
 

	
 
        # fallback mode
 
        if not username and settings.get('fallback_header'):
 
            header = settings.get('fallback_header')
 
            username = environ.get(header)
 
            log.debug('extracted %s:%s', header, username)
 

	
 
        if username and str2bool(settings.get('clean_username')):
 
            log.debug('Received username %s from container', username)
 
            username = self._clean_username(username)
 
            log.debug('New cleanup user is: %s', username)
 
        return username
 

	
 
    def get_user(self, username=None, **kwargs):
 
        """
 
        Helper method for user fetching in plugins, by default it's using
 
        simple fetch by username, but this method can be customized in plugins
 
        eg. container auth plugin to fetch user by environ params
 
        :param username: username if given to fetch
 
        :param kwargs: extra arguments needed for user fetching.
 
        """
 
        environ = kwargs.get('environ') or {}
 
        settings = kwargs.get('settings') or {}
 
        username = self._get_username(environ, settings)
 
        # we got the username, so use default method now
 
        return super(KallitheaAuthPlugin, self).get_user(username)
 

	
 
    def auth(self, userobj, username, password, settings, **kwargs):
 
        """
 
        Gets the container_auth username (or email). It tries to get username
 
        from REMOTE_USER if this plugin is enabled, if that fails
 
        it tries to get username from HTTP_X_FORWARDED_USER if fallback header
 
        is set. clean_username extracts the username from this data if it's
 
        having @ in it.
 
        Return None on failure. On success, return a dictionary of the form:
 

	
 
            see: KallitheaAuthPluginBase.auth_func_attrs
 

	
 
        :param userobj:
 
        :param username:
 
        :param password:
 
        :param settings:
 
        :param kwargs:
 
        """
 
        environ = kwargs.get('environ')
 
        if not environ:
 
            log.debug('Empty environ data skipping...')
 
            return None
 

	
 
        if not userobj:
 
            userobj = self.get_user('', environ=environ, settings=settings)
 

	
 
        # we don't care passed username/password for container auth plugins.
 
        # only way to log in is using environ
 
        username = None
 
        if userobj:
 
            username = safe_str(getattr(userobj, 'username'))
 
            username = getattr(userobj, 'username')
 

	
 
        if not username:
 
            # we don't have any objects in DB, user doesn't exist, extract
 
            # username from environ based on the settings
 
            username = self._get_username(environ, settings)
 

	
 
        # if cannot fetch username, it's a no-go for this plugin to proceed
 
        if not username:
 
            return None
 

	
 
        # old attrs fetched from Kallithea database
 
        admin = getattr(userobj, 'admin', False)
 
        email = environ.get(settings.get('email_header'), getattr(userobj, 'email', ''))
 
        firstname = environ.get(settings.get('firstname_header'), getattr(userobj, 'firstname', ''))
 
        lastname = environ.get(settings.get('lastname_header'), getattr(userobj, 'lastname', ''))
 

	
 
        user_data = {
 
            'username': username,
 
            'firstname': firstname or username,
 
            'lastname': lastname or '',
 
            'groups': [],
 
            'email': email or '',
 
            'admin': admin or False,
 
            'extern_name': username,
 
        }
 

	
 
        log.info('user `%s` authenticated correctly', user_data['username'])
 
        return user_data
 

	
 
    def get_managed_fields(self):
 
        fields = ['username', 'password']
 
        if(Setting.get_by_name('auth_container_email_header').app_settings_value):
 
            fields.append('email')
 
        if(Setting.get_by_name('auth_container_firstname_header').app_settings_value):
 
            fields.append('firstname')
 
        if(Setting.get_by_name('auth_container_lastname_header').app_settings_value):
 
            fields.append('lastname')
 
        return fields
kallithea/lib/auth_modules/auth_ldap.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.auth_modules.auth_ldap
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Kallithea authentication plugin for LDAP
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Created on Nov 17, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 

	
 
from kallithea.lib import auth_modules
 
from kallithea.lib.compat import hybrid_property
 
from kallithea.lib.exceptions import LdapConnectionError, LdapImportError, LdapPasswordError, LdapUsernameError
 
from kallithea.lib.utils2 import safe_str
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 
try:
 
    import ldap
 
    import ldap.filter
 
except ImportError:
 
    # means that python-ldap is not installed
 
    ldap = None
 

	
 

	
 
class AuthLdap(object):
 

	
 
    def __init__(self, server, base_dn, port=None, bind_dn='', bind_pass='',
 
                 tls_kind='LDAPS', tls_reqcert='DEMAND', cacertdir=None, ldap_version=3,
 
                 ldap_filter='(&(objectClass=user)(!(objectClass=computer)))',
 
                 search_scope='SUBTREE', attr_login='uid'):
 
        if ldap is None:
 
            raise LdapImportError
 

	
 
        self.ldap_version = ldap_version
 

	
 
        self.TLS_KIND = tls_kind
 
        OPT_X_TLS_DEMAND = 2
 
        self.TLS_REQCERT = getattr(ldap, 'OPT_X_TLS_%s' % tls_reqcert,
 
                                   OPT_X_TLS_DEMAND)
 
        self.cacertdir = cacertdir
 

	
 
        protocol = 'ldaps' if self.TLS_KIND == 'LDAPS' else 'ldap'
 
        if not port:
 
            port = 636 if self.TLS_KIND == 'LDAPS' else 389
 
        self.LDAP_SERVER = str(', '.join(
 
            "%s://%s:%s" % (protocol,
 
                            host.strip(),
 
                            port)
 
            for host in server.split(',')))
 

	
 
        self.LDAP_BIND_DN = safe_str(bind_dn)
 
        self.LDAP_BIND_PASS = safe_str(bind_pass)
 
        self.LDAP_BIND_DN = bind_dn
 
        self.LDAP_BIND_PASS = bind_pass
 

	
 
        self.BASE_DN = safe_str(base_dn)
 
        self.LDAP_FILTER = safe_str(ldap_filter)
 
        self.BASE_DN = base_dn
 
        self.LDAP_FILTER = ldap_filter
 
        self.SEARCH_SCOPE = getattr(ldap, 'SCOPE_%s' % search_scope)
 
        self.attr_login = attr_login
 

	
 
    def authenticate_ldap(self, username, password):
 
        """
 
        Authenticate a user via LDAP and return his/her LDAP properties.
 

	
 
        Raises AuthenticationError if the credentials are rejected, or
 
        EnvironmentError if the LDAP server can't be reached.
 

	
 
        :param username: username
 
        :param password: password
 
        """
 

	
 
        if not password:
 
            log.debug("Attempt to authenticate LDAP user "
 
                      "with blank password rejected.")
 
            raise LdapPasswordError()
 
        if "," in username:
 
            raise LdapUsernameError("invalid character in username: ,")
 
        try:
 
            if self.cacertdir:
 
                if hasattr(ldap, 'OPT_X_TLS_CACERTDIR'):
 
                    ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, self.cacertdir)
 
                else:
 
                    log.debug("OPT_X_TLS_CACERTDIR is not available - can't set %s", self.cacertdir)
 
            ldap.set_option(ldap.OPT_REFERRALS, ldap.OPT_OFF)
 
            ldap.set_option(ldap.OPT_RESTART, ldap.OPT_ON)
 
            ldap.set_option(ldap.OPT_TIMEOUT, 20)
 
            ldap.set_option(ldap.OPT_NETWORK_TIMEOUT, 10)
 
            ldap.set_option(ldap.OPT_TIMELIMIT, 15)
 
            if self.TLS_KIND != 'PLAIN':
 
                ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, self.TLS_REQCERT)
 
            server = ldap.initialize(self.LDAP_SERVER)
 
            if self.ldap_version == 2:
 
                server.protocol = ldap.VERSION2
 
            else:
 
                server.protocol = ldap.VERSION3
 

	
 
            if self.TLS_KIND == 'START_TLS':
 
                server.start_tls_s()
 

	
 
            if self.LDAP_BIND_DN and self.LDAP_BIND_PASS:
 
                log.debug('Trying simple_bind with password and given DN: %s',
 
                          self.LDAP_BIND_DN)
 
                server.simple_bind_s(self.LDAP_BIND_DN, self.LDAP_BIND_PASS)
 

	
 
            filter_ = '(&%s(%s=%s))' % (self.LDAP_FILTER,
 
                                        ldap.filter.escape_filter_chars(self.attr_login),
 
                                        ldap.filter.escape_filter_chars(username))
 
            log.debug("Authenticating %r filter %s at %s", self.BASE_DN,
 
                      filter_, self.LDAP_SERVER)
 
            lobjects = server.search_ext_s(self.BASE_DN, self.SEARCH_SCOPE,
 
                                           filter_)
 

	
 
            if not lobjects:
 
                raise ldap.NO_SUCH_OBJECT()
 

	
 
            for (dn, _attrs) in lobjects:
 
                if dn is None:
 
                    continue
 

	
 
                try:
 
                    log.debug('Trying simple bind with %s', dn)
 
                    server.simple_bind_s(dn, safe_str(password))
 
                    server.simple_bind_s(dn, password)
 
                    results = server.search_ext_s(dn, ldap.SCOPE_BASE,
 
                                                  '(objectClass=*)')
 
                    if len(results) == 1:
 
                        dn_, attrs = results[0]
 
                        assert dn_ == dn
 
                        return dn, attrs
 

	
 
                except ldap.INVALID_CREDENTIALS:
 
                    log.debug("LDAP rejected password for user '%s': %s",
 
                              username, dn)
 
                    continue # accept authentication as another ldap user with same username
 

	
 
            log.debug("No matching LDAP objects for authentication "
 
                      "of '%s'", username)
 
            raise LdapPasswordError()
 

	
 
        except ldap.NO_SUCH_OBJECT:
 
            log.debug("LDAP says no such user '%s'", username)
 
            raise LdapUsernameError()
 
        except ldap.SERVER_DOWN:
 
            # [0] might be {'info': "TLS error -8179:Peer's Certificate issuer is not recognized.", 'desc': "Can't contact LDAP server"}
 
            raise LdapConnectionError("LDAP can't connect to authentication server")
 

	
 

	
 
class KallitheaAuthPlugin(auth_modules.KallitheaExternalAuthPlugin):
 
    def __init__(self):
 
        self._logger = logging.getLogger(__name__)
 
        self._tls_kind_values = ["PLAIN", "LDAPS", "START_TLS"]
 
        self._tls_reqcert_values = ["NEVER", "ALLOW", "TRY", "DEMAND", "HARD"]
 
        self._search_scopes = ["BASE", "ONELEVEL", "SUBTREE"]
 

	
 
    @hybrid_property
 
    def name(self):
 
        return "ldap"
 

	
 
    def settings(self):
 
        settings = [
 
            {
 
                "name": "host",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Host of the LDAP Server",
 
                "formname": "LDAP Host"
 
            },
 
            {
 
                "name": "port",
 
                "validator": self.validators.Number(strip=True),
 
                "type": "string",
 
                "description": "Port that the LDAP server is listening on. Defaults to 389 for PLAIN/START_TLS and 636 for LDAPS.",
 
                "default": "",
 
                "formname": "Custom LDAP Port"
 
            },
 
            {
 
                "name": "dn_user",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "User to connect to LDAP",
 
                "formname": "Account"
 
            },
 
            {
 
                "name": "dn_pass",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "password",
 
                "description": "Password to connect to LDAP",
 
                "formname": "Password"
 
            },
 
            {
 
                "name": "tls_kind",
 
                "validator": self.validators.OneOf(self._tls_kind_values),
 
                "type": "select",
 
                "values": self._tls_kind_values,
 
                "description": "TLS Type",
 
                "default": 'LDAPS',
 
                "formname": "Connection Security"
 
            },
 
            {
 
                "name": "tls_reqcert",
 
                "validator": self.validators.OneOf(self._tls_reqcert_values),
 
                "type": "select",
 
                "values": self._tls_reqcert_values,
 
                "description": "Require Cert over TLS?",
 
                "formname": "Certificate Checks"
 
            },
 
            {
 
                "name": "cacertdir",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Optional: Custom CA certificate directory for validating LDAPS",
 
                "formname": "Custom CA Certificates"
 
            },
 
            {
 
                "name": "base_dn",
 
                "validator": self.validators.UnicodeString(strip=True),
 
                "type": "string",
 
                "description": "Base DN to search (e.g., dc=mydomain,dc=com)",
 
                "formname": "Base DN"
kallithea/lib/base.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
"""
 
kallithea.lib.base
 
~~~~~~~~~~~~~~~~~~
 

	
 
The base Controller API
 
Provides the BaseController class for subclassing. And usage in different
 
controllers
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Oct 06, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import base64
 
import datetime
 
import logging
 
import traceback
 
import warnings
 

	
 
import decorator
 
import paste.auth.basic
 
import paste.httpexceptions
 
import paste.httpheaders
 
import webob.exc
 
from tg import TGController, config, render_template, request, response, session
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 

	
 
from kallithea import BACKENDS, __version__
 
from kallithea.config.routing import url
 
from kallithea.lib import auth_modules, ext_json
 
from kallithea.lib.auth import AuthUser, HasPermissionAnyMiddleware
 
from kallithea.lib.exceptions import UserCreationError
 
from kallithea.lib.utils import get_repo_slug, is_valid_repo
 
from kallithea.lib.utils2 import AttributeDict, ascii_bytes, safe_int, safe_str, safe_unicode, set_hook_environment, str2bool
 
from kallithea.lib.utils2 import AttributeDict, ascii_bytes, safe_int, safe_unicode, set_hook_environment, str2bool
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError
 
from kallithea.model import meta
 
from kallithea.model.db import PullRequest, Repository, Setting, User
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def render(template_path):
 
    return render_template({'url': url}, 'mako', template_path)
 

	
 

	
 
def _filter_proxy(ip):
 
    """
 
    HEADERS can have multiple ips inside the left-most being the original
 
    client, and each successive proxy that passed the request adding the IP
 
    address where it received the request from.
 

	
 
    :param ip:
 
    """
 
    if ',' in ip:
 
        _ips = ip.split(',')
 
        _first_ip = _ips[0].strip()
 
        log.debug('Got multiple IPs %s, using %s', ','.join(_ips), _first_ip)
 
        return _first_ip
 
    return ip
 

	
 

	
 
def _get_ip_addr(environ):
 
    proxy_key = 'HTTP_X_REAL_IP'
 
    proxy_key2 = 'HTTP_X_FORWARDED_FOR'
 
    def_key = 'REMOTE_ADDR'
 

	
 
    ip = environ.get(proxy_key)
 
    if ip:
 
        return _filter_proxy(ip)
 

	
 
    ip = environ.get(proxy_key2)
 
    if ip:
 
        return _filter_proxy(ip)
 

	
 
    ip = environ.get(def_key, '0.0.0.0')
 
    return _filter_proxy(ip)
 

	
 

	
 
def get_path_info(environ):
 
    """Return unicode PATH_INFO from environ ... using tg.original_request if available.
 
    """
 
    org_req = environ.get('tg.original_request')
 
    if org_req is not None:
 
        environ = org_req.environ
 
    return safe_unicode(environ['PATH_INFO'])
 

	
 

	
 
def log_in_user(user, remember, is_external_auth, ip_addr):
 
    """
 
    Log a `User` in and update session and cookies. If `remember` is True,
 
    the session cookie is set to expire in a year; otherwise, it expires at
 
    the end of the browser session.
 

	
 
    Returns populated `AuthUser` object.
 
    """
 
    # It should not be possible to explicitly log in as the default user.
 
    assert not user.is_default_user, user
 

	
 
    auth_user = AuthUser.make(dbuser=user, is_external_auth=is_external_auth, ip_addr=ip_addr)
 
    if auth_user is None:
 
        return None
 

	
 
    user.update_lastlogin()
 
    meta.Session().commit()
 

	
 
    # Start new session to prevent session fixation attacks.
 
    session.invalidate()
 
    session['authuser'] = cookie = auth_user.to_cookie()
 

	
 
    # If they want to be remembered, update the cookie.
 
    # NOTE: Assumes that beaker defaults to browser session cookie.
 
    if remember:
 
        t = datetime.datetime.now() + datetime.timedelta(days=365)
 
        session._set_cookie_expires(t)
 

	
 
    session.save()
 

	
 
    log.info('user %s is now authenticated and stored in '
 
             'session, session attrs %s', user.username, cookie)
 

	
 
    # dumps session attrs back to cookie
 
    session._update_cookie_out()
 

	
 
    return auth_user
 

	
 

	
 
class BasicAuth(paste.auth.basic.AuthBasicAuthenticator):
 

	
 
    def __init__(self, realm, authfunc, auth_http_code=None):
 
        self.realm = realm
 
        self.authfunc = authfunc
 
        self._rc_auth_http_code = auth_http_code
 

	
 
    def build_authentication(self, environ):
 
        head = paste.httpheaders.WWW_AUTHENTICATE.tuples('Basic realm="%s"' % self.realm)
 
        # Consume the whole body before sending a response
 
        try:
 
            request_body_size = int(environ.get('CONTENT_LENGTH', 0))
 
        except (ValueError):
 
            request_body_size = 0
 
        environ['wsgi.input'].read(request_body_size)
 
        if self._rc_auth_http_code and self._rc_auth_http_code == '403':
 
            # return 403 if alternative http return code is specified in
 
            # Kallithea config
 
            return paste.httpexceptions.HTTPForbidden(headers=head)
 
        return paste.httpexceptions.HTTPUnauthorized(headers=head)
 

	
 
    def authenticate(self, environ):
 
        authorization = paste.httpheaders.AUTHORIZATION(environ)
 
        if not authorization:
 
            return self.build_authentication(environ)
 
        (authmeth, auth) = authorization.split(' ', 1)
 
        if 'basic' != authmeth.lower():
 
            return self.build_authentication(environ)
 
        auth = base64.b64decode(auth.strip())
 
        _parts = auth.split(':', 1)
 
        if len(_parts) == 2:
 
            username, password = _parts
 
            if self.authfunc(username, password, environ) is not None:
 
                return username
 
        return self.build_authentication(environ)
 

	
 
    __call__ = authenticate
 

	
 

	
 
class BaseVCSController(object):
 
    """Base controller for handling Mercurial/Git protocol requests
 
    (coming from a VCS client, and not a browser).
 
    """
 

	
 
    scm_alias = None # 'hg' / 'git'
 

	
 
    def __init__(self, application, config):
 
        self.application = application
 
        self.config = config
 
        # base path of repo locations
 
        self.basepath = self.config['base_path']
 
        # authenticate this VCS request using the authentication modules
 
        self.authenticate = BasicAuth('', auth_modules.authenticate,
 
                                      config.get('auth_ret_code'))
 

	
 
    @classmethod
 
    def parse_request(cls, environ):
 
        """If request is parsed as a request for this VCS, return a namespace with the parsed request.
 
        If the request is unknown, return None.
 
        """
 
        raise NotImplementedError()
 

	
 
    def _authorize(self, environ, action, repo_name, ip_addr):
 
        """Authenticate and authorize user.
 

	
 
        Since we're dealing with a VCS client and not a browser, we only
 
        support HTTP basic authentication, either directly via raw header
 
        inspection, or by using container authentication to delegate the
 
        authentication to the web server.
 

	
 
        Returns (user, None) on successful authentication and authorization.
 
        Returns (None, wsgi_app) to send the wsgi_app response to the client.
 
        """
 
        # Use anonymous access if allowed for action on repo.
 
        default_user = User.get_default_user(cache=True)
 
        default_authuser = AuthUser.make(dbuser=default_user, ip_addr=ip_addr)
 
        if default_authuser is None:
 
            log.debug('No anonymous access at all') # move on to proper user auth
 
        else:
 
            if self._check_permission(action, default_authuser, repo_name):
 
                return default_authuser, None
 
            log.debug('Not authorized to access this repository as anonymous user')
 

	
 
        username = None
 
        #==============================================================
 
        # DEFAULT PERM FAILED OR ANONYMOUS ACCESS IS DISABLED SO WE
 
        # NEED TO AUTHENTICATE AND ASK FOR AUTH USER PERMISSIONS
 
        #==============================================================
 

	
 
        # try to auth based on environ, container auth methods
 
        log.debug('Running PRE-AUTH for container based authentication')
 
        pre_auth = auth_modules.authenticate('', '', environ)
 
        if pre_auth is not None and pre_auth.get('username'):
 
            username = pre_auth['username']
 
        log.debug('PRE-AUTH got %s as username', username)
 

	
 
        # If not authenticated by the container, running basic auth
 
        if not username:
 
            self.authenticate.realm = safe_str(self.config['realm'])
 
            self.authenticate.realm = self.config['realm']
 
            result = self.authenticate(environ)
 
            if isinstance(result, str):
 
                paste.httpheaders.AUTH_TYPE.update(environ, 'basic')
 
                paste.httpheaders.REMOTE_USER.update(environ, result)
 
                username = result
 
            else:
 
                return None, result.wsgi_application
 

	
 
        #==============================================================
 
        # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME
 
        #==============================================================
 
        try:
 
            user = User.get_by_username_or_email(username)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return None, webob.exc.HTTPInternalServerError()
 

	
 
        authuser = AuthUser.make(dbuser=user, ip_addr=ip_addr)
 
        if authuser is None:
 
            return None, webob.exc.HTTPForbidden()
 
        if not self._check_permission(action, authuser, repo_name):
 
            return None, webob.exc.HTTPForbidden()
 

	
 
        return user, None
 

	
 
    def _handle_request(self, environ, start_response):
 
        raise NotImplementedError()
 

	
 
    def _check_permission(self, action, authuser, repo_name):
 
        """
 
        Checks permissions using action (push/pull) user and repository
 
        name
 

	
 
        :param action: 'push' or 'pull' action
 
        :param user: `User` instance
 
        :param repo_name: repository name
 
        """
 
        if action == 'push':
 
            if not HasPermissionAnyMiddleware('repository.write',
 
                                              'repository.admin')(authuser,
 
                                                                  repo_name):
 
                return False
 

	
 
        else:
 
            #any other action need at least read permission
 
            if not HasPermissionAnyMiddleware('repository.read',
 
                                              'repository.write',
 
                                              'repository.admin')(authuser,
 
                                                                  repo_name):
 
                return False
 

	
 
        return True
 

	
 
    def _get_ip_addr(self, environ):
 
        return _get_ip_addr(environ)
 

	
 
    def __call__(self, environ, start_response):
 
        try:
 
            # try parsing a request for this VCS - if it fails, call the wrapped app
 
            parsed_request = self.parse_request(environ)
 
            if parsed_request is None:
 
                return self.application(environ, start_response)
 

	
 
            # skip passing error to error controller
 
            environ['pylons.status_code_redirect'] = True
 

	
 
            # quick check if repo exists...
 
            if not is_valid_repo(parsed_request.repo_name, self.basepath, self.scm_alias):
 
                raise webob.exc.HTTPNotFound()
 

	
 
            if parsed_request.action is None:
 
                # Note: the client doesn't get the helpful error message
 
                raise webob.exc.HTTPBadRequest('Unable to detect pull/push action for %r! Are you using a nonstandard command or client?' % parsed_request.repo_name)
 

	
 
            #======================================================================
 
            # CHECK PERMISSIONS
 
            #======================================================================
 
            ip_addr = self._get_ip_addr(environ)
 
            user, response_app = self._authorize(environ, parsed_request.action, parsed_request.repo_name, ip_addr)
 
            if response_app is not None:
 
                return response_app(environ, start_response)
 

	
 
            #======================================================================
 
            # REQUEST HANDLING
 
            #======================================================================
 
            set_hook_environment(user.username, ip_addr,
 
                parsed_request.repo_name, self.scm_alias, parsed_request.action)
 

	
 
            try:
 
                log.info('%s action on %s repo "%s" by "%s" from %s',
 
                         parsed_request.action, self.scm_alias, parsed_request.repo_name, safe_str(user.username), ip_addr)
 
                         parsed_request.action, self.scm_alias, parsed_request.repo_name, user.username, ip_addr)
 
                app = self._make_app(parsed_request)
 
                return app(environ, start_response)
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                raise webob.exc.HTTPInternalServerError()
 

	
 
        except webob.exc.HTTPException as e:
 
            return e(environ, start_response)
 

	
 

	
 
class BaseController(TGController):
 

	
 
    def _before(self, *args, **kwargs):
 
        """
 
        _before is called before controller methods and after __call__
 
        """
 
        if request.needs_csrf_check:
 
            # CSRF protection: Whenever a request has ambient authority (whether
 
            # through a session cookie or its origin IP address), it must include
 
            # the correct token, unless the HTTP method is GET or HEAD (and thus
 
            # guaranteed to be side effect free. In practice, the only situation
 
            # where we allow side effects without ambient authority is when the
 
            # authority comes from an API key; and that is handled above.
 
            from kallithea.lib import helpers as h
 
            token = request.POST.get(h.session_csrf_secret_name)
 
            if not token or token != h.session_csrf_secret_token():
 
                log.error('CSRF check failed')
 
                raise webob.exc.HTTPForbidden()
 

	
 
        c.kallithea_version = __version__
 
        rc_config = Setting.get_app_settings()
 

	
 
        # Visual options
 
        c.visual = AttributeDict({})
 

	
 
        ## DB stored
 
        c.visual.show_public_icon = str2bool(rc_config.get('show_public_icon'))
 
        c.visual.show_private_icon = str2bool(rc_config.get('show_private_icon'))
 
        c.visual.stylify_metalabels = str2bool(rc_config.get('stylify_metalabels'))
 
        c.visual.page_size = safe_int(rc_config.get('dashboard_items', 100))
 
        c.visual.admin_grid_items = safe_int(rc_config.get('admin_grid_items', 100))
 
        c.visual.repository_fields = str2bool(rc_config.get('repository_fields'))
 
        c.visual.show_version = str2bool(rc_config.get('show_version'))
 
        c.visual.use_gravatar = str2bool(rc_config.get('use_gravatar'))
 
        c.visual.gravatar_url = rc_config.get('gravatar_url')
 

	
 
        c.ga_code = rc_config.get('ga_code')
 
        # TODO: replace undocumented backwards compatibility hack with db upgrade and rename ga_code
 
        if c.ga_code and '<' not in c.ga_code:
 
            c.ga_code = '''<script type="text/javascript">
 
                var _gaq = _gaq || [];
 
                _gaq.push(['_setAccount', '%s']);
 
                _gaq.push(['_trackPageview']);
 

	
 
                (function() {
 
                    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
 
                    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
 
                    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
 
                    })();
 
            </script>''' % c.ga_code
 
        c.site_name = rc_config.get('title')
 
        c.clone_uri_tmpl = rc_config.get('clone_uri_tmpl') or Repository.DEFAULT_CLONE_URI
 
        c.clone_ssh_tmpl = rc_config.get('clone_ssh_tmpl') or Repository.DEFAULT_CLONE_SSH
 

	
 
        ## INI stored
 
        c.visual.allow_repo_location_change = str2bool(config.get('allow_repo_location_change', True))
 
        c.visual.allow_custom_hooks_settings = str2bool(config.get('allow_custom_hooks_settings', True))
 
        c.ssh_enabled = str2bool(config.get('ssh_enabled', False))
 

	
 
        c.instance_id = config.get('instance_id')
 
        c.issues_url = config.get('bugtracker', url('issues_url'))
 
        # END CONFIG VARS
 

	
 
        c.repo_name = get_repo_slug(request)  # can be empty
 
        c.backends = list(BACKENDS)
 

	
 
        self.cut_off_limit = safe_int(config.get('cut_off_limit'))
 

	
 
        c.my_pr_count = PullRequest.query(reviewer_id=request.authuser.user_id, include_closed=False).count()
 

	
 
        self.scm_model = ScmModel()
 

	
 
    @staticmethod
 
    def _determine_auth_user(session_authuser, ip_addr):
 
        """
 
        Create an `AuthUser` object given the API key/bearer token
 
        (if any) and the value of the authuser session cookie.
 
        Returns None if no valid user is found (like not active or no access for IP).
 
        """
 

	
 
        # Authenticate by session cookie
 
        # In ancient login sessions, 'authuser' may not be a dict.
 
        # In that case, the user will have to log in again.
 
        # v0.3 and earlier included an 'is_authenticated' key; if present,
 
        # this must be True.
 
        if isinstance(session_authuser, dict) and session_authuser.get('is_authenticated', True):
kallithea/lib/caching_query.py
Show inline comments
 
# apparently based on https://github.com/sqlalchemy/sqlalchemy/blob/rel_0_7/examples/beaker_caching/caching_query.py
 

	
 
"""caching_query.py
 

	
 
Represent persistence structures which allow the usage of
 
Beaker caching with SQLAlchemy.
 

	
 
The three new concepts introduced here are:
 

	
 
 * CachingQuery - a Query subclass that caches and
 
   retrieves results in/from Beaker.
 
 * FromCache - a query option that establishes caching
 
   parameters on a Query
 
 * _params_from_query - extracts value parameters from
 
   a Query.
 

	
 
The rest of what's here are standard SQLAlchemy and
 
Beaker constructs.
 

	
 
"""
 
import beaker
 
from beaker.exceptions import BeakerException
 
from sqlalchemy.orm.interfaces import MapperOption
 
from sqlalchemy.orm.query import Query
 
from sqlalchemy.sql import visitors
 

	
 
from kallithea.lib.utils2 import safe_str
 

	
 

	
 
class CachingQuery(Query):
 
    """A Query subclass which optionally loads full results from a Beaker
 
    cache region.
 

	
 
    The CachingQuery stores additional state that allows it to consult
 
    a Beaker cache before accessing the database:
 

	
 
    * A "region", which is a cache region argument passed to a
 
      Beaker CacheManager, specifies a particular cache configuration
 
      (including backend implementation, expiration times, etc.)
 
    * A "namespace", which is a qualifying name that identifies a
 
      group of keys within the cache.  A query that filters on a name
 
      might use the name "by_name", a query that filters on a date range
 
      to a joined table might use the name "related_date_range".
 

	
 
    When the above state is present, a Beaker cache is retrieved.
 

	
 
    The "namespace" name is first concatenated with
 
    a string composed of the individual entities and columns the Query
 
    requests, i.e. such as ``Query(User.id, User.name)``.
 

	
 
    The Beaker cache is then loaded from the cache manager based
 
    on the region and composed namespace.  The key within the cache
 
    itself is then constructed against the bind parameters specified
 
    by this query, which are usually literals defined in the
 
    WHERE clause.
 

	
 
    The FromCache mapper option below represent
 
    the "public" method of configuring this state upon the CachingQuery.
 

	
 
    """
 

	
 
    def __init__(self, manager, *args, **kw):
 
        self.cache_manager = manager
 
        Query.__init__(self, *args, **kw)
 

	
 
    def __iter__(self):
 
        """override __iter__ to pull results from Beaker
 
           if particular attributes have been configured.
 

	
 
           Note that this approach does *not* detach the loaded objects from
 
           the current session. If the cache backend is an in-process cache
 
           (like "memory") and lives beyond the scope of the current session's
 
           transaction, those objects may be expired. The method here can be
 
           modified to first expunge() each loaded item from the current
 
           session before returning the list of items, so that the items
 
           in the cache are not the same ones in the current Session.
 

	
 
        """
 
        if hasattr(self, '_cache_parameters'):
 
            return self.get_value(createfunc=lambda:
 
                                  list(Query.__iter__(self)))
 
        else:
 
            return Query.__iter__(self)
 

	
 
    def invalidate(self):
 
        """Invalidate the value represented by this Query."""
 

	
 
        cache, cache_key = _get_cache_parameters(self)
 
        cache.remove(cache_key)
 

	
 
    def get_value(self, merge=True, createfunc=None):
 
        """Return the value from the cache for this query.
 

	
 
        Raise KeyError if no value present and no
 
        createfunc specified.
 

	
 
        """
 
        cache, cache_key = _get_cache_parameters(self)
 
        ret = cache.get_value(cache_key, createfunc=createfunc)
 
        if merge:
 
            ret = self.merge_result(ret, load=False)
 
        return ret
 

	
 
    def set_value(self, value):
 
        """Set the value in the cache for this query."""
 

	
 
        cache, cache_key = _get_cache_parameters(self)
 
        cache.put(cache_key, value)
 

	
 

	
 
def query_callable(manager, query_cls=CachingQuery):
 
    def query(*arg, **kw):
 
        return query_cls(manager, *arg, **kw)
 
    return query
 

	
 

	
 
def get_cache_region(name, region):
 
    if region not in beaker.cache.cache_regions:
 
        raise BeakerException('Cache region `%s` not configured '
 
            'Check if proper cache settings are in the .ini files' % region)
 
    kw = beaker.cache.cache_regions[region]
 
    return beaker.cache.Cache._get_cache(name, kw)
 

	
 

	
 
def _get_cache_parameters(query):
 
    """For a query with cache_region and cache_namespace configured,
 
    return the corresponding Cache instance and cache key, based
 
    on this query's current criterion and parameter values.
 

	
 
    """
 
    if not hasattr(query, '_cache_parameters'):
 
        raise ValueError("This Query does not have caching "
 
                         "parameters configured.")
 

	
 
    region, namespace, cache_key = query._cache_parameters
 

	
 
    namespace = _namespace_from_query(namespace, query)
 

	
 
    if cache_key is None:
 
        # cache key - the value arguments from this query's parameters.
 
        args = _params_from_query(query)
 
        args.append(query._limit)
 
        args.append(query._offset)
 
        cache_key = " ".join(str(x) for x in args)
 

	
 
    if cache_key is None:
 
        raise Exception('Cache key cannot be None')
 

	
 
    # get cache
 
    #cache = query.cache_manager.get_cache_region(namespace, region)
 
    cache = get_cache_region(namespace, region)
 
    # optional - hash the cache_key too for consistent length
 
    # import uuid
 
    # cache_key= str(uuid.uuid5(uuid.NAMESPACE_DNS, cache_key))
 

	
 
    return cache, cache_key
 

	
 

	
 
def _namespace_from_query(namespace, query):
 
    # cache namespace - the token handed in by the
 
    # option + class we're querying against
 
    namespace = " ".join([namespace] + [str(x) for x in query._entities])
 

	
 
    # memcached wants this
 
    namespace = namespace.replace(' ', '_')
 

	
 
    return namespace
 

	
 

	
 
def _set_cache_parameters(query, region, namespace, cache_key):
 

	
 
    if hasattr(query, '_cache_parameters'):
 
        region, namespace, cache_key = query._cache_parameters
 
        raise ValueError("This query is already configured "
 
                        "for region %r namespace %r" %
 
                        (region, namespace)
 
                    )
 
    query._cache_parameters = region, safe_str(namespace), cache_key
 
    query._cache_parameters = region, namespace, cache_key
 

	
 

	
 
class FromCache(MapperOption):
 
    """Specifies that a Query should load results from a cache."""
 

	
 
    propagate_to_loaders = False
 

	
 
    def __init__(self, region, namespace, cache_key=None):
 
        """Construct a new FromCache.
 

	
 
        :param region: the cache region.  Should be a
 
        region configured in the Beaker CacheManager.
 

	
 
        :param namespace: the cache namespace.  Should
 
        be a name uniquely describing the target Query's
 
        lexical structure.
 

	
 
        :param cache_key: optional.  A string cache key
 
        that will serve as the key to the query.   Use this
 
        if your query has a huge amount of parameters (such
 
        as when using in_()) which correspond more simply to
 
        some other identifier.
 

	
 
        """
 
        self.region = region
 
        self.namespace = namespace
 
        self.cache_key = cache_key
 

	
 
    def process_query(self, query):
 
        """Process a Query during normal loading operation."""
 

	
 
        _set_cache_parameters(query, self.region, self.namespace,
 
                              self.cache_key)
 

	
 

	
 
def _params_from_query(query):
 
    """Pull the bind parameter values from a query.
 

	
 
    This takes into account any scalar attribute bindparam set up.
 

	
 
    E.g. params_from_query(query.filter(Cls.foo==5).filter(Cls.bar==7)))
 
    would return [5, 7].
 

	
 
    """
 
    v = []
 

	
 
    def visit_bindparam(bind):
 
        if bind.key in query._params:
 
            value = query._params[bind.key]
 
        elif bind.callable:
 
            # lazyloader may dig a callable in here, intended
 
            # to late-evaluate params after autoflush is called.
 
            # convert to a scalar value.
 
            value = bind.callable()
 
        else:
 
            value = bind.value
 

	
 
        v.append(value)
 
    if query._criterion is not None:
 
        visitors.traverse(query._criterion, {}, {'bindparam': visit_bindparam})
 
    for f in query._from_obj:
 
        visitors.traverse(f, {}, {'bindparam': visit_bindparam})
 
    return v
kallithea/lib/hooks.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.hooks
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Hooks run by Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Aug 6, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import os
 
import sys
 
import time
 

	
 
import mercurial.scmutil
 

	
 
from kallithea.lib import helpers as h
 
from kallithea.lib.exceptions import UserCreationError
 
from kallithea.lib.utils import action_logger, make_ui
 
from kallithea.lib.utils2 import HookEnvironmentError, ascii_str, get_hook_environment, safe_bytes, safe_str
 
from kallithea.lib.utils2 import HookEnvironmentError, ascii_str, get_hook_environment, safe_bytes
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.model.db import Repository, User
 

	
 

	
 
def _get_scm_size(alias, root_path):
 
    if not alias.startswith('.'):
 
        alias += '.'
 

	
 
    size_scm, size_root = 0, 0
 
    for path, dirs, files in os.walk(safe_str(root_path)):
 
    for path, dirs, files in os.walk(root_path):
 
        if path.find(alias) != -1:
 
            for f in files:
 
                try:
 
                    size_scm += os.path.getsize(os.path.join(path, f))
 
                except OSError:
 
                    pass
 
        else:
 
            for f in files:
 
                try:
 
                    size_root += os.path.getsize(os.path.join(path, f))
 
                except OSError:
 
                    pass
 

	
 
    size_scm_f = h.format_byte_size(size_scm)
 
    size_root_f = h.format_byte_size(size_root)
 
    size_total_f = h.format_byte_size(size_root + size_scm)
 

	
 
    return size_scm_f, size_root_f, size_total_f
 

	
 

	
 
def repo_size(ui, repo, hooktype=None, **kwargs):
 
    """Show size of Mercurial repository, to be called after push."""
 
    size_hg_f, size_root_f, size_total_f = _get_scm_size('.hg', repo.root)
 

	
 
    last_cs = repo[len(repo) - 1]
 

	
 
    msg = ('Repository size .hg: %s Checkout: %s Total: %s\n'
 
           'Last revision is now r%s:%s\n') % (
 
        size_hg_f, size_root_f, size_total_f, last_cs.rev(), ascii_str(last_cs.hex())[:12]
 
    )
 
    ui.status(safe_bytes(msg))
 

	
 

	
 
def log_pull_action(ui, repo, **kwargs):
 
    """Logs user last pull action
 

	
 
    Called as Mercurial hook outgoing.pull_logger or from Kallithea before invoking Git.
 

	
 
    Does *not* use the action from the hook environment but is always 'pull'.
 
    """
 
    ex = get_hook_environment()
 

	
 
    user = User.get_by_username(ex.username)
 
    action = 'pull'
 
    action_logger(user, action, ex.repository, ex.ip, commit=True)
 
    # extension hook call
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'PULL_HOOK', None)
 
    if callable(callback):
 
        kw = {}
 
        kw.update(ex)
 
        callback(**kw)
 

	
 
    return 0
 

	
 

	
 
def log_push_action(ui, repo, node, node_last, **kwargs):
 
    """
 
    Entry point for Mercurial hook changegroup.push_logger.
 

	
 
    The pushed changesets is given by the revset 'node:node_last'.
 

	
 
    Note: This hook is not only logging, but also the side effect invalidating
 
    cahes! The function should perhaps be renamed.
 
    """
 
    revs = [ascii_str(repo[r].hex()) for r in mercurial.scmutil.revrange(repo, [b'%s:%s' % (node, node_last)])]
 
    process_pushed_raw_ids(revs)
 
    return 0
 

	
 

	
 
def process_pushed_raw_ids(revs):
 
    """
 
    Register that changes have been added to the repo - log the action *and* invalidate caches.
 

	
 
    Called from  Mercurial changegroup.push_logger calling hook log_push_action,
 
    or from the Git post-receive hook calling handle_git_post_receive ...
 
    or from scm _handle_push.
 
    """
 
    ex = get_hook_environment()
 

	
 
    action = '%s:%s' % (ex.action, ','.join(revs))
 
    action_logger(ex.username, action, ex.repository, ex.ip, commit=True)
 

	
 
    from kallithea.model.scm import ScmModel
 
    ScmModel().mark_for_invalidation(ex.repository)
 

	
 
    # extension hook call
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'PUSH_HOOK', None)
 
    if callable(callback):
 
        kw = {'pushed_revs': revs}
 
        kw.update(ex)
 
        callback(**kw)
 

	
 

	
 
def log_create_repository(repository_dict, created_by, **kwargs):
 
@@ -225,177 +225,176 @@ def log_delete_repository(repository_dic
 
    """
 
    Post delete repository Hook.
 

	
 
    :param repository: dict dump of repository object
 
    :param deleted_by: username who deleted the repository
 

	
 
    available keys of repository_dict:
 

	
 
     'repo_type',
 
     'description',
 
     'private',
 
     'created_on',
 
     'enable_downloads',
 
     'repo_id',
 
     'owner_id',
 
     'enable_statistics',
 
     'clone_uri',
 
     'fork_id',
 
     'group_id',
 
     'repo_name'
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'DELETE_REPO_HOOK', None)
 
    if callable(callback):
 
        kw = {}
 
        kw.update(repository_dict)
 
        kw.update({'deleted_by': deleted_by,
 
                   'deleted_on': time.time()})
 
        kw.update(kwargs)
 
        return callback(**kw)
 

	
 
    return 0
 

	
 

	
 
def log_delete_user(user_dict, deleted_by, **kwargs):
 
    """
 
    Post delete user Hook.
 

	
 
    :param user_dict: dict dump of user object
 

	
 
    available keys for user_dict:
 

	
 
     'username',
 
     'full_name_or_username',
 
     'full_contact',
 
     'user_id',
 
     'name',
 
     'firstname',
 
     'short_contact',
 
     'admin',
 
     'lastname',
 
     'ip_addresses',
 
     'ldap_dn',
 
     'email',
 
     'api_key',
 
     'last_login',
 
     'full_name',
 
     'active',
 
     'password',
 
     'emails',
 

	
 
    """
 
    from kallithea import EXTENSIONS
 
    callback = getattr(EXTENSIONS, 'DELETE_USER_HOOK', None)
 
    if callable(callback):
 
        return callback(deleted_by=deleted_by, **user_dict)
 

	
 
    return 0
 

	
 

	
 
def _hook_environment(repo_path):
 
    """
 
    Create a light-weight environment for stand-alone scripts and return an UI and the
 
    db repository.
 

	
 
    Git hooks are executed as subprocess of Git while Kallithea is waiting, and
 
    they thus need enough info to be able to create an app environment and
 
    connect to the database.
 
    """
 
    import paste.deploy
 
    import kallithea.config.middleware
 

	
 
    extras = get_hook_environment()
 

	
 
    path_to_ini_file = extras['config']
 
    kallithea.CONFIG = paste.deploy.appconfig('config:' + path_to_ini_file)
 
    #logging.config.fileConfig(ini_file_path) # Note: we are in a different process - don't use configured logging
 
    kallithea.config.middleware.make_app(kallithea.CONFIG.global_conf, **kallithea.CONFIG.local_conf)
 

	
 
    # fix if it's not a bare repo
 
    if repo_path.endswith(os.sep + '.git'):
 
        repo_path = repo_path[:-5]
 

	
 
    repo = Repository.get_by_full_path(repo_path)
 
    if not repo:
 
        raise OSError('Repository %s not found in database'
 
                      % (safe_str(repo_path)))
 
        raise OSError('Repository %s not found in database' % repo_path)
 

	
 
    baseui = make_ui()
 
    return baseui, repo
 

	
 

	
 
def handle_git_pre_receive(repo_path, git_stdin_lines):
 
    """Called from Git pre-receive hook"""
 
    # Currently unused. TODO: remove?
 
    return 0
 

	
 

	
 
def handle_git_post_receive(repo_path, git_stdin_lines):
 
    """Called from Git post-receive hook"""
 
    try:
 
        baseui, repo = _hook_environment(repo_path)
 
    except HookEnvironmentError as e:
 
        sys.stderr.write("Skipping Kallithea Git post-recieve hook %r.\nGit was apparently not invoked by Kallithea: %s\n" % (sys.argv[0], e))
 
        return 0
 

	
 
    # the post push hook should never use the cached instance
 
    scm_repo = repo.scm_instance_no_cache()
 

	
 
    rev_data = []
 
    for l in git_stdin_lines:
 
        old_rev, new_rev, ref = l.strip().split(' ')
 
        _ref_data = ref.split('/')
 
        if _ref_data[1] in ['tags', 'heads']:
 
            rev_data.append({'old_rev': old_rev,
 
                             'new_rev': new_rev,
 
                             'ref': ref,
 
                             'type': _ref_data[1],
 
                             'name': '/'.join(_ref_data[2:])})
 

	
 
    git_revs = []
 
    for push_ref in rev_data:
 
        _type = push_ref['type']
 
        if _type == 'heads':
 
            if push_ref['old_rev'] == EmptyChangeset().raw_id:
 
                # update the symbolic ref if we push new repo
 
                if scm_repo.is_empty():
 
                    scm_repo._repo.refs.set_symbolic_ref(
 
                        b'HEAD',
 
                        b'refs/heads/%s' % safe_bytes(push_ref['name']))
 

	
 
                # build exclude list without the ref
 
                cmd = ['for-each-ref', '--format=%(refname)', 'refs/heads/*']
 
                stdout = scm_repo.run_git_command(cmd)
 
                ref = push_ref['ref']
 
                heads = [head for head in stdout.splitlines() if head != ref]
 
                # now list the git revs while excluding from the list
 
                cmd = ['log', push_ref['new_rev'], '--reverse', '--pretty=format:%H']
 
                cmd.append('--not')
 
                cmd.extend(heads) # empty list is ok
 
                stdout = scm_repo.run_git_command(cmd)
 
                git_revs += stdout.splitlines()
 

	
 
            elif push_ref['new_rev'] == EmptyChangeset().raw_id:
 
                # delete branch case
 
                git_revs += ['delete_branch=>%s' % push_ref['name']]
 
            else:
 
                cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref,
 
                       '--reverse', '--pretty=format:%H']
 
                stdout = scm_repo.run_git_command(cmd)
 
                git_revs += stdout.splitlines()
 

	
 
        elif _type == 'tags':
 
            git_revs += ['tag=>%s' % push_ref['name']]
 

	
 
    process_pushed_raw_ids(git_revs)
 

	
 
    return 0
 

	
 

	
 
# Almost exactly like Mercurial contrib/hg-ssh:
 
def rejectpush(ui, **kwargs):
 
    """Mercurial hook to be installed as pretxnopen and prepushkey for read-only repos"""
 
    ex = get_hook_environment()
 
    ui.warn(safe_bytes("Push access to %r denied\n" % safe_str(ex.repository)))
 
    ui.warn(safe_bytes("Push access to %r denied\n" % ex.repository))
 
    return 1
kallithea/lib/indexers/daemon.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.indexers.daemon
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
A daemon will read from task table and run tasks
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Jan 26, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import os
 
import sys
 
import traceback
 
from os.path import dirname
 
from shutil import rmtree
 
from time import mktime
 

	
 
from whoosh.index import create_in, exists_in, open_dir
 
from whoosh.qparser import QueryParser
 

	
 
from kallithea.config.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
 
from kallithea.lib.indexers import CHGSET_IDX_NAME, CHGSETS_SCHEMA, IDX_NAME, SCHEMA
 
from kallithea.lib.utils2 import safe_str, safe_unicode
 
from kallithea.lib.utils2 import safe_unicode
 
from kallithea.lib.vcs.exceptions import ChangesetError, NodeDoesNotExistError, RepositoryError
 
from kallithea.model.db import Repository
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
# Add location of top level folder to sys.path
 
project_path = dirname(dirname(dirname(dirname(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 

	
 

	
 

	
 
log = logging.getLogger('whoosh_indexer')
 

	
 

	
 
class WhooshIndexingDaemon(object):
 
    """
 
    Daemon for atomic indexing jobs
 
    """
 

	
 
    def __init__(self, indexname=IDX_NAME, index_location=None,
 
                 repo_location=None, repo_list=None,
 
                 repo_update_list=None):
 
        self.indexname = indexname
 

	
 
        self.index_location = index_location
 
        if not index_location:
 
            raise Exception('You have to provide index location')
 

	
 
        self.repo_location = repo_location
 
        if not repo_location:
 
            raise Exception('You have to provide repositories location')
 

	
 
        self.repo_paths = ScmModel().repo_scan(self.repo_location)
 

	
 
        # filter repo list
 
        if repo_list:
 
            repo_list = set(repo_list)
 
            self.filtered_repo_paths = {}
 
            for repo_name, repo in self.repo_paths.items():
 
                if repo_name in repo_list:
 
                    self.filtered_repo_paths[repo_name] = repo
 

	
 
            self.repo_paths = self.filtered_repo_paths
 

	
 
        # filter update repo list
 
        self.filtered_repo_update_paths = {}
 
        if repo_update_list:
 
            self.filtered_repo_update_paths = {}
 
            for repo_name, repo in self.repo_paths.items():
 
                if repo_name in repo_update_list:
 
                    self.filtered_repo_update_paths[repo_name] = repo
 
            self.repo_paths = self.filtered_repo_update_paths
 

	
 
        self.initial = True
 
        if not os.path.isdir(self.index_location):
 
            os.makedirs(self.index_location)
 
            log.info('Cannot run incremental index since it does not '
 
                     'yet exist - running full build')
 
        elif not exists_in(self.index_location, IDX_NAME):
 
            log.info('Running full index build, as the file content '
 
                     'index does not exist')
 
        elif not exists_in(self.index_location, CHGSET_IDX_NAME):
 
            log.info('Running full index build, as the changeset '
 
                     'index does not exist')
 
        else:
 
            self.initial = False
 

	
 
    def _get_index_revision(self, repo):
 
        db_repo = Repository.get_by_repo_name(repo.name)
 
        landing_rev = 'tip'
 
        if db_repo:
 
            _rev_type, _rev = db_repo.landing_rev
 
            landing_rev = _rev
 
        return landing_rev
 

	
 
    def _get_index_changeset(self, repo, index_rev=None):
 
        if not index_rev:
 
            index_rev = self._get_index_revision(repo)
 
        cs = repo.get_changeset(index_rev)
 
        return cs
 

	
 
    def get_paths(self, repo):
 
        """
 
        recursive walk in root dir and return a set of all path in that dir
 
        based on repository walk function
 
        """
 
        index_paths_ = set()
 
        try:
 
            cs = self._get_index_changeset(repo)
 
            for _topnode, _dirs, files in cs.walk('/'):
 
                for f in files:
 
                    index_paths_.add(os.path.join(safe_str(repo.path), safe_str(f.path)))
 
                    index_paths_.add(os.path.join(repo.path, f.path))
 

	
 
        except RepositoryError:
 
            log.debug(traceback.format_exc())
 
            pass
 
        return index_paths_
 

	
 
    def get_node(self, repo, path, index_rev=None):
 
        """
 
        gets a filenode based on given full path. It operates on string for
 
        hg git compatibility.
 
        gets a filenode based on given full path.
 

	
 
        :param repo: scm repo instance
 
        :param path: full path including root location
 
        :return: FileNode
 
        """
 
        # FIXME: paths should be normalized ... or even better: don't include repo.path
 
        path = safe_str(path)
 
        repo_path = safe_str(repo.path)
 
        assert path.startswith(repo_path)
 
        assert path[len(repo_path)] in (os.path.sep, os.path.altsep)
 
        node_path = path[len(repo_path) + 1:]
 
        assert path.startswith(repo.path)
 
        assert path[len(repo.path)] in (os.path.sep, os.path.altsep)
 
        node_path = path[len(repo.path) + 1:]
 
        cs = self._get_index_changeset(repo, index_rev=index_rev)
 
        node = cs.get_node(node_path)
 
        return node
 

	
 
    def is_indexable_node(self, node):
 
        """
 
        Just index the content of chosen files, skipping binary files
 
        """
 
        return (node.extension in INDEX_EXTENSIONS or node.name in INDEX_FILENAMES) and \
 
               not node.is_binary
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo, repo_name, index_rev=None):
 
        """
 
        Adding doc to writer this function itself fetches data from
 
        the instance of vcs backend
 
        """
 
        try:
 
            node = self.get_node(repo, path, index_rev)
 
        except (ChangesetError, NodeDoesNotExistError):
 
            log.debug("    >> %s - not found in %s %s", path, repo, index_rev)
 
            return 0, 0
 

	
 
        indexed = indexed_w_content = 0
 
        if self.is_indexable_node(node):
 
            bytes_content = node.content
 
            if b'\0' in bytes_content:
 
                log.warning('    >> %s - no text content', path)
 
                u_content = u''
 
            else:
 
                log.debug('    >> %s', path)
 
                u_content = safe_unicode(bytes_content)
 
                indexed_w_content += 1
 

	
 
        else:
 
            log.debug('    >> %s - not indexable', path)
 
            # just index file name without it's content
 
            u_content = u''
 
            indexed += 1
 

	
 
        writer.add_document(
 
            fileid=path,
 
            owner=repo.contact,
 
            repository_rawname=repo_name,
 
            repository=repo_name,
 
            path=path,
 
            content=u_content,
 
            modtime=self.get_node_mtime(node),
 
            extension=node.extension
 
        )
 
        return indexed, indexed_w_content
 

	
 
    def index_changesets(self, writer, repo_name, repo, start_rev=None):
 
        """
 
        Add all changeset in the vcs repo starting at start_rev
 
        to the index writer
 

	
 
        :param writer: the whoosh index writer to add to
 
        :param repo_name: name of the repository from whence the
 
          changeset originates including the repository group
 
        :param repo: the vcs repository instance to index changesets for,
 
          the presumption is the repo has changesets to index
 
        :param start_rev=None: the full sha id to start indexing from
 
          if start_rev is None then index from the first changeset in
 
          the repo
 
        """
 

	
 
        if start_rev is None:
 
            start_rev = repo[0].raw_id
 

	
 
        log.debug('Indexing changesets in %s, starting at rev %s',
 
                  repo_name, start_rev)
 

	
 
        indexed = 0
 
        cs_iter = repo.get_changesets(start=start_rev)
 
        total = len(cs_iter)
 
        for cs in cs_iter:
 
            indexed += 1
 
            log.debug('    >> %s %s/%s', cs, indexed, total)
 
            writer.add_document(
 
                raw_id=cs.raw_id,
 
                owner=repo.contact,
 
                date=cs._timestamp,
 
                repository_rawname=repo_name,
 
                repository=repo_name,
 
                author=cs.author,
 
                message=cs.message,
 
                last=cs.last,
 
                added=u' '.join(node.path for node in cs.added).lower(),
 
                removed=u' '.join(node.path for node in cs.removed).lower(),
 
                changed=u' '.join(node.path for node in cs.changed).lower(),
 
                parents=u' '.join(cs.raw_id for cs in cs.parents),
 
            )
 

	
kallithea/lib/middleware/simplehg.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.middleware.simplehg
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
SimpleHg middleware for handling Mercurial protocol requests (push/clone etc.).
 
It's implemented with basic auth function
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 28, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 

	
 
"""
 

	
 

	
 
import logging
 
import os
 
import urllib.parse
 

	
 
import mercurial.hgweb
 

	
 
from kallithea.lib.base import BaseVCSController, get_path_info
 
from kallithea.lib.utils import make_ui
 
from kallithea.lib.utils2 import safe_bytes, safe_str
 
from kallithea.lib.utils2 import safe_bytes
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def get_header_hgarg(environ):
 
    """Decode the special Mercurial encoding of big requests over multiple headers.
 
    >>> get_header_hgarg({})
 
    ''
 
    >>> get_header_hgarg({'HTTP_X_HGARG_0': ' ', 'HTTP_X_HGARG_1': 'a','HTTP_X_HGARG_2': '','HTTP_X_HGARG_3': 'b+c %20'})
 
    'ab+c %20'
 
    """
 
    chunks = []
 
    i = 1
 
    while True:
 
        v = environ.get('HTTP_X_HGARG_%d' % i)
 
        if v is None:
 
            break
 
        chunks.append(v)
 
        i += 1
 
    return ''.join(chunks)
 

	
 

	
 
cmd_mapping = {
 
    # 'batch' is not in this list - it is handled explicitly
 
    'between': 'pull',
 
    'branches': 'pull',
 
    'branchmap': 'pull',
 
    'capabilities': 'pull',
 
    'changegroup': 'pull',
 
    'changegroupsubset': 'pull',
 
    'changesetdata': 'pull',
 
    'clonebundles': 'pull',
 
    'debugwireargs': 'pull',
 
    'filedata': 'pull',
 
    'getbundle': 'pull',
 
    'getlfile': 'pull',
 
    'heads': 'pull',
 
    'hello': 'pull',
 
    'known': 'pull',
 
    'lheads': 'pull',
 
    'listkeys': 'pull',
 
    'lookup': 'pull',
 
    'manifestdata': 'pull',
 
    'narrow_widen': 'pull',
 
    'protocaps': 'pull',
 
    'statlfile': 'pull',
 
    'stream_out': 'pull',
 
    'pushkey': 'push',
 
    'putlfile': 'push',
 
    'unbundle': 'push',
 
    }
 

	
 

	
 
class SimpleHg(BaseVCSController):
 

	
 
    scm_alias = 'hg'
 

	
 
    @classmethod
 
    def parse_request(cls, environ):
 
        http_accept = environ.get('HTTP_ACCEPT', '')
 
        if not http_accept.startswith('application/mercurial'):
 
            return None
 
        path_info = get_path_info(environ)
 
        if not path_info.startswith('/'): # it must!
 
            return None
 

	
 
        class parsed_request(object):
 
            repo_name = path_info[1:].rstrip('/')
 

	
 
            query_string = environ['QUERY_STRING']
 

	
 
            action = None
 
            for qry in query_string.split('&'):
 
                parts = qry.split('=', 1)
 
                if len(parts) == 2 and parts[0] == 'cmd':
 
                    cmd = parts[1]
 
                    if cmd == 'batch':
 
                        hgarg = get_header_hgarg(environ)
 
                        if not hgarg.startswith('cmds='):
 
                            action = 'push' # paranoid and safe
 
                            break
 
                        action = 'pull'
 
                        for cmd_arg in hgarg[5:].split(';'):
 
                            cmd, _args = urllib.parse.unquote_plus(cmd_arg).split(' ', 1)
 
                            op = cmd_mapping.get(cmd, 'push')
 
                            if op != 'pull':
 
                                assert op == 'push'
 
                                action = 'push'
 
                                break
 
                    else:
 
                        action = cmd_mapping.get(cmd, 'push')
 
                    break # only process one cmd
 

	
 
        return parsed_request
 

	
 
    def _make_app(self, parsed_request):
 
        """
 
        Make an hgweb wsgi application.
 
        """
 
        str_repo_name = safe_str(parsed_request.repo_name)
 
        repo_path = os.path.join(safe_str(self.basepath), str_repo_name)
 
        repo_name = parsed_request.repo_name
 
        repo_path = os.path.join(self.basepath, repo_name)
 
        baseui = make_ui(repo_path=repo_path)
 
        hgweb_app = mercurial.hgweb.hgweb(safe_bytes(repo_path), name=str_repo_name, baseui=baseui)
 
        hgweb_app = mercurial.hgweb.hgweb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
 

	
 
        def wrapper_app(environ, start_response):
 
            environ['REPO_NAME'] = str_repo_name # used by mercurial.hgweb.hgweb
 
            environ['REPO_NAME'] = repo_name # used by mercurial.hgweb.hgweb
 
            return hgweb_app(environ, start_response)
 

	
 
        return wrapper_app
kallithea/lib/utils.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.utils
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Utilities library for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 18, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import datetime
 
import logging
 
import os
 
import re
 
import sys
 
import traceback
 
from distutils.version import StrictVersion
 

	
 
import beaker.cache
 
import mercurial.config
 
import mercurial.ui
 
from tg.i18n import ugettext as _
 

	
 
import kallithea.config.conf
 
from kallithea.lib.exceptions import HgsubversionImportError
 
from kallithea.lib.utils2 import ascii_bytes, aslist, get_current_authuser, safe_bytes, safe_str
 
from kallithea.lib.utils2 import ascii_bytes, aslist, get_current_authuser, safe_bytes
 
from kallithea.lib.vcs.backends.git.repository import GitRepository
 
from kallithea.lib.vcs.backends.hg.repository import MercurialRepository
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import RepositoryError, VCSError
 
from kallithea.lib.vcs.utils.fakemod import create_module
 
from kallithea.lib.vcs.utils.helpers import get_scm
 
from kallithea.model import meta
 
from kallithea.model.db import RepoGroup, Repository, Setting, Ui, User, UserGroup, UserLog
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 
REMOVED_REPO_PAT = re.compile(r'rm__\d{8}_\d{6}_\d{6}_.*')
 

	
 

	
 
#==============================================================================
 
# PERM DECORATOR HELPERS FOR EXTRACTING NAMES FOR PERM CHECKS
 
#==============================================================================
 
def get_repo_slug(request):
 
    _repo = request.environ['pylons.routes_dict'].get('repo_name')
 
    if _repo:
 
        _repo = _repo.rstrip('/')
 
    return _repo
 

	
 

	
 
def get_repo_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('group_name')
 
    if _group:
 
        _group = _group.rstrip('/')
 
    return _group
 

	
 

	
 
def get_user_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('id')
 
    _group = UserGroup.get(_group)
 
    if _group:
 
        return _group.users_group_name
 
    return None
 

	
 

	
 
def _get_permanent_id(s):
 
    """Helper for decoding stable URLs with repo ID. For a string like '_123'
 
    return 123.
 
    """
 
    by_id_match = re.match(r'^_(\d+)$', s)
 
    if by_id_match is None:
 
        return None
 
    return int(by_id_match.group(1))
 

	
 

	
 
def fix_repo_id_name(path):
 
    """
 
    Rewrite repo_name for _<ID> permanent URLs.
 

	
 
    Given a path, if the first path element is like _<ID>, return the path with
 
    this part expanded to the corresponding full repo name, else return the
 
    provided path.
 
    """
 
    first, rest = path, ''
 
    if '/' in path:
 
        first, rest_ = path.split('/', 1)
 
        rest = '/' + rest_
 
    repo_id = _get_permanent_id(first)
 
    if repo_id is not None:
 
        repo = Repository.get(repo_id)
 
        if repo is not None:
 
            return repo.repo_name + rest
 
    return path
 

	
 

	
 
def action_logger(user, action, repo, ipaddr='', commit=False):
 
    """
 
    Action logger for various actions made by users
 

	
 
    :param user: user that made this action, can be a unique username string or
 
        object containing user_id attribute
 
    :param action: action to log, should be on of predefined unique actions for
 
        easy translations
 
    :param repo: string name of repository or object containing repo_id,
 
        that action was made on
 
    :param ipaddr: optional IP address from what the action was made
 

	
 
    """
 

	
 
    # if we don't get explicit IP address try to get one from registered user
 
    # in tmpl context var
 
    if not ipaddr:
 
        ipaddr = getattr(get_current_authuser(), 'ip_addr', '')
 

	
 
    if getattr(user, 'user_id', None):
 
        user_obj = User.get(user.user_id)
 
    elif isinstance(user, str):
 
        user_obj = User.get_by_username(user)
 
    else:
 
        raise Exception('You have to provide a user object or a username')
 

	
 
    if getattr(repo, 'repo_id', None):
 
        repo_obj = Repository.get(repo.repo_id)
 
        repo_name = repo_obj.repo_name
 
    elif isinstance(repo, str):
 
        repo_name = repo.lstrip('/')
 
        repo_obj = Repository.get_by_repo_name(repo_name)
 
    else:
 
        repo_obj = None
 
        repo_name = u''
 

	
 
    user_log = UserLog()
 
    user_log.user_id = user_obj.user_id
 
    user_log.username = user_obj.username
 
    user_log.action = action
 

	
 
    user_log.repository = repo_obj
 
    user_log.repository_name = repo_name
 

	
 
    user_log.action_date = datetime.datetime.now()
 
    user_log.user_ip = ipaddr
 
    meta.Session().add(user_log)
 

	
 
    log.info('Logging action:%s on %s by user:%s ip:%s',
 
             action, repo, user_obj, ipaddr)
 
    if commit:
 
        meta.Session().commit()
 

	
 

	
 
def get_filesystem_repos(path):
 
    """
 
    Scans given path for repos and return (name,(type,path)) tuple
 

	
 
    :param path: path to scan for repositories
 
    :param recursive: recursive search and return names with subdirs in front
 
    """
 

	
 
    # remove ending slash for better results
 
    path = safe_str(path.rstrip(os.sep))
 
    path = path.rstrip(os.sep)
 
    log.debug('now scanning in %s', path)
 

	
 
    def isdir(*n):
 
        return os.path.isdir(os.path.join(*n))
 

	
 
    for root, dirs, _files in os.walk(path):
 
        recurse_dirs = []
 
        for subdir in dirs:
 
            # skip removed repos
 
            if REMOVED_REPO_PAT.match(subdir):
 
                continue
 

	
 
            # skip .<something> dirs TODO: rly? then we should prevent creating them ...
 
            if subdir.startswith('.'):
 
                continue
 

	
 
            cur_path = os.path.join(root, subdir)
 
            if isdir(cur_path, '.git'):
 
                log.warning('ignoring non-bare Git repo: %s', cur_path)
 
                continue
 

	
 
            if (isdir(cur_path, '.hg') or
 
                isdir(cur_path, '.svn') or
 
                isdir(cur_path, 'objects') and (isdir(cur_path, 'refs') or
 
                                                os.path.isfile(os.path.join(cur_path, 'packed-refs')))):
 

	
 
                if not os.access(cur_path, os.R_OK) or not os.access(cur_path, os.X_OK):
 
                    log.warning('ignoring repo path without access: %s', cur_path)
 
                    continue
 

	
 
                if not os.access(cur_path, os.W_OK):
 
                    log.warning('repo path without write access: %s', cur_path)
 

	
 
                try:
 
                    scm_info = get_scm(cur_path)
 
                    assert cur_path.startswith(path)
 
                    repo_path = cur_path[len(path) + 1:]
 
                    yield repo_path, scm_info
 
                    continue # no recursion
 
                except VCSError:
 
                    # We should perhaps ignore such broken repos, but especially
 
                    # the bare git detection is unreliable so we dive into it
 
                    pass
 

	
 
            recurse_dirs.append(subdir)
 

	
 
        dirs[:] = recurse_dirs
 

	
 

	
 
def is_valid_repo_uri(repo_type, url, ui):
 
    """Check if the url seems like a valid remote repo location - raise an Exception if any problems"""
 
    if repo_type == 'hg':
 
        if url.startswith('http') or url.startswith('ssh'):
 
            # initially check if it's at least the proper URL
 
            # or does it pass basic auth
 
            MercurialRepository._check_url(url, ui)
 
        elif url.startswith('svn+http'):
 
            try:
 
                from hgsubversion.svnrepo import svnremoterepo
 
            except ImportError:
 
                raise HgsubversionImportError(_('Unable to activate hgsubversion support. '
 
                                                'The "hgsubversion" library is missing'))
 
            svnremoterepo(ui, url).svn.uuid
 
        elif url.startswith('git+http'):
 
            raise NotImplementedError()
 
        else:
 
            raise Exception('URI %s not allowed' % (url,))
 

	
 
    elif repo_type == 'git':
 
        if url.startswith('http') or url.startswith('git'):
 
            # initially check if it's at least the proper URL
 
            # or does it pass basic auth
 
            GitRepository._check_url(url)
 
        elif url.startswith('svn+http'):
 
            raise NotImplementedError()
 
        elif url.startswith('hg+http'):
 
            raise NotImplementedError()
 
        else:
 
            raise Exception('URI %s not allowed' % (url))
 

	
 

	
 
def is_valid_repo(repo_name, base_path, scm=None):
 
    """
 
    Returns True if given path is a valid repository False otherwise.
 
    If scm param is given also compare if given scm is the same as expected
 
    from scm parameter
 

	
 
    :param repo_name:
 
    :param base_path:
 
    :param scm:
 

	
 
    :return True: if given path is a valid repository
 
    """
 
    # TODO: paranoid security checks?
 
    full_path = os.path.join(safe_str(base_path), safe_str(repo_name))
 
    full_path = os.path.join(base_path, repo_name)
 

	
 
    try:
 
        scm_ = get_scm(full_path)
 
        if scm:
 
            return scm_[0] == scm
 
        return True
 
    except VCSError:
 
        return False
 

	
 

	
 
def is_valid_repo_group(repo_group_name, base_path, skip_path_check=False):
 
    """
 
    Returns True if given path is a repository group False otherwise
 

	
 
    :param repo_name:
 
    :param base_path:
 
    """
 
    full_path = os.path.join(safe_str(base_path), safe_str(repo_group_name))
 
    full_path = os.path.join(base_path, repo_group_name)
 

	
 
    # check if it's not a repo
 
    if is_valid_repo(repo_group_name, base_path):
 
        return False
 

	
 
    try:
 
        # we need to check bare git repos at higher level
 
        # since we might match branches/hooks/info/objects or possible
 
        # other things inside bare git repo
 
        get_scm(os.path.dirname(full_path))
 
        return False
 
    except VCSError:
 
        pass
 

	
 
    # check if it's a valid path
 
    if skip_path_check or os.path.isdir(full_path):
 
        return True
 

	
 
    return False
 

	
 

	
 
# propagated from mercurial documentation
 
ui_sections = ['alias', 'auth',
 
                'decode/encode', 'defaults',
 
                'diff', 'email',
 
                'extensions', 'format',
 
                'merge-patterns', 'merge-tools',
 
                'hooks', 'http_proxy',
 
                'smtp', 'patch',
 
                'paths', 'profiling',
 
                'server', 'trusted',
 
                'ui', 'web', ]
 

	
 

	
 
def make_ui(repo_path=None):
 
    """
 
    Create an Mercurial 'ui' object based on database Ui settings, possibly
 
    augmenting with content from a hgrc file.
 
    """
 
    baseui = mercurial.ui.ui()
 

	
 
    # clean the baseui object
 
    baseui._ocfg = mercurial.config.config()
 
    baseui._ucfg = mercurial.config.config()
 
    baseui._tcfg = mercurial.config.config()
 

	
 
    sa = meta.Session()
 
    for ui_ in sa.query(Ui).all():
 
        if ui_.ui_active:
 
            log.debug('config from db: [%s] %s=%r', ui_.ui_section,
 
                      ui_.ui_key, ui_.ui_value)
 
            baseui.setconfig(ascii_bytes(ui_.ui_section), ascii_bytes(ui_.ui_key),
 
                             b'' if ui_.ui_value is None else safe_bytes(ui_.ui_value))
 

	
 
    # force set push_ssl requirement to False, Kallithea handles that
 
    baseui.setconfig(b'web', b'push_ssl', False)
 
    baseui.setconfig(b'web', b'allow_push', b'*')
 
    # prevent interactive questions for ssh password / passphrase
 
    ssh = baseui.config(b'ui', b'ssh', default=b'ssh')
 
    baseui.setconfig(b'ui', b'ssh', b'%s -oBatchMode=yes -oIdentitiesOnly=yes' % ssh)
 
    # push / pull hooks
 
    baseui.setconfig(b'hooks', b'changegroup.kallithea_log_push_action', b'python:kallithea.lib.hooks.log_push_action')
 
    baseui.setconfig(b'hooks', b'outgoing.kallithea_log_pull_action', b'python:kallithea.lib.hooks.log_pull_action')
 

	
 
    if repo_path is not None:
 
        hgrc_path = os.path.join(repo_path, '.hg', 'hgrc')
 
        if os.path.isfile(hgrc_path):
 
            log.debug('reading hgrc from %s', hgrc_path)
 
            cfg = mercurial.config.config()
 
            cfg.read(safe_bytes(hgrc_path))
 
            for section in ui_sections:
 
                for k, v in cfg.items(section):
 
                    log.debug('config from file: [%s] %s=%s', section, k, v)
 
                    baseui.setconfig(ascii_bytes(section), ascii_bytes(k), safe_bytes(v))
 
        else:
 
            log.debug('hgrc file is not present at %s, skipping...', hgrc_path)
 

	
 
    return baseui
 

	
 

	
 
def set_app_settings(config):
 
    """
 
    Updates app config with new settings from database
 

	
 
    :param config:
 
    """
 
    hgsettings = Setting.get_app_settings()
 
    for k, v in hgsettings.items():
 
        config[k] = v
 

	
 

	
 
def set_vcs_config(config):
 
    """
 
    Patch VCS config with some Kallithea specific stuff
 

	
 
    :param config: kallithea.CONFIG
kallithea/lib/utils2.py
Show inline comments
 
@@ -236,193 +236,193 @@ def age(prevdate, show_short_version=Fal
 
            if deltas['month'] > 6:
 
                deltas['year'] += 1
 
                deltas['month'] = 0
 

	
 
    # Format the result
 
    fmt_funcs = {
 
        'year': lambda d: ungettext(u'%d year', '%d years', d) % d,
 
        'month': lambda d: ungettext(u'%d month', '%d months', d) % d,
 
        'day': lambda d: ungettext(u'%d day', '%d days', d) % d,
 
        'hour': lambda d: ungettext(u'%d hour', '%d hours', d) % d,
 
        'minute': lambda d: ungettext(u'%d minute', '%d minutes', d) % d,
 
        'second': lambda d: ungettext(u'%d second', '%d seconds', d) % d,
 
    }
 

	
 
    for i, part in enumerate(order):
 
        value = deltas[part]
 
        if value == 0:
 
            continue
 

	
 
        if i < 5:
 
            sub_part = order[i + 1]
 
            sub_value = deltas[sub_part]
 
        else:
 
            sub_value = 0
 

	
 
        if sub_value == 0 or show_short_version:
 
            if future:
 
                return _('in %s') % fmt_funcs[part](value)
 
            else:
 
                return _('%s ago') % fmt_funcs[part](value)
 
        if future:
 
            return _('in %s and %s') % (fmt_funcs[part](value),
 
                fmt_funcs[sub_part](sub_value))
 
        else:
 
            return _('%s and %s ago') % (fmt_funcs[part](value),
 
                fmt_funcs[sub_part](sub_value))
 

	
 
    return _('just now')
 

	
 

	
 
def uri_filter(uri):
 
    """
 
    Removes user:password from given url string
 

	
 
    :param uri:
 
    :rtype: unicode
 
    :returns: filtered list of strings
 
    """
 
    if not uri:
 
        return []
 

	
 
    proto = ''
 

	
 
    for pat in ('https://', 'http://', 'git://'):
 
        if uri.startswith(pat):
 
            uri = uri[len(pat):]
 
            proto = pat
 
            break
 

	
 
    # remove passwords and username
 
    uri = uri[uri.find('@') + 1:]
 

	
 
    # get the port
 
    cred_pos = uri.find(':')
 
    if cred_pos == -1:
 
        host, port = uri, None
 
    else:
 
        host, port = uri[:cred_pos], uri[cred_pos + 1:]
 

	
 
    return [_f for _f in [proto, host, port] if _f]
 

	
 

	
 
def credentials_filter(uri):
 
    """
 
    Returns a url with removed credentials
 

	
 
    :param uri:
 
    """
 

	
 
    uri = uri_filter(uri)
 
    # check if we have port
 
    if len(uri) > 2 and uri[2]:
 
        uri[2] = ':' + uri[2]
 

	
 
    return ''.join(uri)
 

	
 

	
 
def get_clone_url(clone_uri_tmpl, prefix_url, repo_name, repo_id, username=None):
 
    parsed_url = urlobject.URLObject(prefix_url)
 
    prefix = urllib.parse.unquote(parsed_url.path.rstrip('/'))
 
    try:
 
        system_user = pwd.getpwuid(os.getuid()).pw_name
 
    except Exception: # TODO: support all systems - especially Windows
 
        system_user = 'kallithea' # hardcoded default value ...
 
    args = {
 
        'scheme': parsed_url.scheme,
 
        'user': urllib.parse.quote(safe_str(username or '')),
 
        'user': urllib.parse.quote(username or ''),
 
        'netloc': parsed_url.netloc + prefix,  # like "hostname:port/prefix" (with optional ":port" and "/prefix")
 
        'prefix': prefix, # undocumented, empty or starting with /
 
        'repo': repo_name,
 
        'repoid': str(repo_id),
 
        'system_user': system_user,
 
        'hostname': parsed_url.hostname,
 
    }
 
    url = re.sub('{([^{}]+)}', lambda m: args.get(m.group(1), m.group(0)), clone_uri_tmpl)
 

	
 
    # remove leading @ sign if it's present. Case of empty user
 
    url_obj = urlobject.URLObject(url)
 
    if not url_obj.username:
 
        url_obj = url_obj.with_username(None)
 

	
 
    return str(url_obj)
 

	
 

	
 
def get_changeset_safe(repo, rev):
 
    """
 
    Safe version of get_changeset if this changeset doesn't exists for a
 
    repo it returns a Dummy one instead
 

	
 
    :param repo:
 
    :param rev:
 
    """
 
    from kallithea.lib.vcs.backends.base import BaseRepository
 
    from kallithea.lib.vcs.exceptions import RepositoryError
 
    from kallithea.lib.vcs.backends.base import EmptyChangeset
 
    if not isinstance(repo, BaseRepository):
 
        raise Exception('You must pass an Repository '
 
                        'object as first argument got %s' % type(repo))
 

	
 
    try:
 
        cs = repo.get_changeset(rev)
 
    except (RepositoryError, LookupError):
 
        cs = EmptyChangeset(requested_revision=rev)
 
    return cs
 

	
 

	
 
def datetime_to_time(dt):
 
    if dt:
 
        return time.mktime(dt.timetuple())
 

	
 

	
 
def time_to_datetime(tm):
 
    if tm:
 
        if isinstance(tm, str):
 
            try:
 
                tm = float(tm)
 
            except ValueError:
 
                return
 
        return datetime.datetime.fromtimestamp(tm)
 

	
 

	
 
# Must match regexp in kallithea/public/js/base.js MentionsAutoComplete()
 
# Check char before @ - it must not look like we are in an email addresses.
 
# Matching is greedy so we don't have to look beyond the end.
 
MENTIONS_REGEX = re.compile(r'(?:^|(?<=[^a-zA-Z0-9]))@([a-zA-Z0-9][-_.a-zA-Z0-9]*[a-zA-Z0-9])')
 

	
 

	
 
def extract_mentioned_usernames(text):
 
    r"""
 
    Returns list of (possible) usernames @mentioned in given text.
 

	
 
    >>> extract_mentioned_usernames('@1-2.a_X,@1234 not@not @ddd@not @n @ee @ff @gg, @gg;@hh @n\n@zz,')
 
    ['1-2.a_X', '1234', 'ddd', 'ee', 'ff', 'gg', 'gg', 'hh', 'zz']
 
    """
 
    return MENTIONS_REGEX.findall(text)
 

	
 

	
 
def extract_mentioned_users(text):
 
    """ Returns set of actual database Users @mentioned in given text. """
 
    from kallithea.model.db import User
 
    result = set()
 
    for name in extract_mentioned_usernames(text):
 
        user = User.get_by_username(name, case_insensitive=True)
 
        if user is not None and not user.is_default_user:
 
            result.add(user)
 
    return result
 

	
 

	
 
class AttributeDict(dict):
 
    def __getattr__(self, attr):
 
        return self.get(attr, None)
 
    __setattr__ = dict.__setitem__
 
    __delattr__ = dict.__delitem__
 

	
 

	
 
def obfuscate_url_pw(engine):
 
    from sqlalchemy.engine import url as sa_url
 
    from sqlalchemy.exc import ArgumentError
 
    try:
 
        _url = sa_url.make_url(engine or '')
 
    except ArgumentError:
 
        return engine
 
    if _url.password:
 
@@ -468,146 +468,146 @@ def set_hook_environment(username, ip_ad
 
    Must always be called before anything with hooks are invoked.
 
    """
 
    from kallithea import CONFIG
 
    extras = {
 
        'ip': ip_addr, # used in log_push/pull_action action_logger
 
        'username': username,
 
        'action': action or 'push_local', # used in log_push_action_raw_ids action_logger
 
        'repository': repo_name,
 
        'scm': repo_alias, # used to pick hack in log_push_action_raw_ids
 
        'config': CONFIG['__file__'], # used by git hook to read config
 
    }
 
    os.environ['KALLITHEA_EXTRAS'] = json.dumps(extras)
 

	
 

	
 
def get_current_authuser():
 
    """
 
    Gets kallithea user from threadlocal tmpl_context variable if it's
 
    defined, else returns None.
 
    """
 
    from tg import tmpl_context
 
    try:
 
        return getattr(tmpl_context, 'authuser', None)
 
    except TypeError:  # No object (name: context) has been registered for this thread
 
        return None
 

	
 

	
 
class OptionalAttr(object):
 
    """
 
    Special Optional Option that defines other attribute. Example::
 

	
 
        def test(apiuser, userid=Optional(OAttr('apiuser')):
 
            user = Optional.extract(userid)
 
            # calls
 

	
 
    """
 

	
 
    def __init__(self, attr_name):
 
        self.attr_name = attr_name
 

	
 
    def __repr__(self):
 
        return '<OptionalAttr:%s>' % self.attr_name
 

	
 
    def __call__(self):
 
        return self
 

	
 

	
 
# alias
 
OAttr = OptionalAttr
 

	
 

	
 
class Optional(object):
 
    """
 
    Defines an optional parameter::
 

	
 
        param = param.getval() if isinstance(param, Optional) else param
 
        param = param() if isinstance(param, Optional) else param
 

	
 
    is equivalent of::
 

	
 
        param = Optional.extract(param)
 

	
 
    """
 

	
 
    def __init__(self, type_):
 
        self.type_ = type_
 

	
 
    def __repr__(self):
 
        return '<Optional:%s>' % self.type_.__repr__()
 

	
 
    def __call__(self):
 
        return self.getval()
 

	
 
    def getval(self):
 
        """
 
        returns value from this Optional instance
 
        """
 
        if isinstance(self.type_, OAttr):
 
            # use params name
 
            return self.type_.attr_name
 
        return self.type_
 

	
 
    @classmethod
 
    def extract(cls, val):
 
        """
 
        Extracts value from Optional() instance
 

	
 
        :param val:
 
        :return: original value if it's not Optional instance else
 
            value of instance
 
        """
 
        if isinstance(val, cls):
 
            return val.getval()
 
        return val
 

	
 

	
 
def urlreadable(s, _cleanstringsub=re.compile('[^-a-zA-Z0-9./]+').sub):
 
    return _cleanstringsub('_', safe_str(s)).rstrip('_')
 
    return _cleanstringsub('_', s).rstrip('_')
 

	
 

	
 
def recursive_replace(str_, replace=' '):
 
    """
 
    Recursive replace of given sign to just one instance
 

	
 
    :param str_: given string
 
    :param replace: char to find and replace multiple instances
 

	
 
    Examples::
 
    >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
 
    'Mighty-Mighty-Bo-sstones'
 
    """
 

	
 
    if str_.find(replace * 2) == -1:
 
        return str_
 
    else:
 
        str_ = str_.replace(replace * 2, replace)
 
        return recursive_replace(str_, replace)
 

	
 

	
 
def repo_name_slug(value):
 
    """
 
    Return slug of name of repository
 
    This function is called on each creation/modification
 
    of repository to prevent bad names in repo
 
    """
 

	
 
    slug = remove_formatting(value)
 
    slug = strip_tags(slug)
 

	
 
    for c in r"""`?=[]\;'"<>,/~!@#$%^&*()+{}|: """:
 
        slug = slug.replace(c, '-')
 
    slug = recursive_replace(slug, '-')
 
    slug = collapse(slug, '-')
 
    return slug
 

	
 

	
 
def ask_ok(prompt, retries=4, complaint='Yes or no please!'):
 
    while True:
 
        ok = input(prompt)
 
        if ok in ('y', 'ye', 'yes'):
 
            return True
 
        if ok in ('n', 'no', 'nop', 'nope'):
 
            return False
 
        retries = retries - 1
 
        if retries < 0:
 
            raise IOError
 
        print(complaint)
kallithea/lib/vcs/backends/git/changeset.py
Show inline comments
 
import re
 
from io import BytesIO
 
from itertools import chain
 
from subprocess import PIPE, Popen
 

	
 
from dulwich import objects
 
from dulwich.config import ConfigFile
 

	
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, ChangesetError, ImproperArchiveTypeError, NodeDoesNotExistError, RepositoryError, VCSError
 
from kallithea.lib.vcs.nodes import (
 
    AddedFileNodesGenerator, ChangedFileNodesGenerator, DirNode, FileNode, NodeKind, RemovedFileNodesGenerator, RootNode, SubModuleNode)
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, safe_int, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, safe_int, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
class GitChangeset(BaseChangeset):
 
    """
 
    Represents state of the repository at a revision.
 
    """
 

	
 
    def __init__(self, repository, revision):
 
        self._stat_modes = {}
 
        self.repository = repository
 
        revision = safe_str(revision)
 
        try:
 
            commit = self.repository._repo[ascii_bytes(revision)]
 
            if isinstance(commit, objects.Tag):
 
                revision = safe_str(commit.object[1])
 
                commit = self.repository._repo.get_object(commit.object[1])
 
        except KeyError:
 
            raise RepositoryError("Cannot get object with id %s" % revision)
 
        self.raw_id = ascii_str(commit.id)
 
        self.short_id = self.raw_id[:12]
 
        self._commit = commit  # a Dulwich Commmit with .id
 
        self._tree_id = commit.tree
 
        self._committer_property = 'committer'
 
        self._author_property = 'author'
 
        self._date_property = 'commit_time'
 
        self._date_tz_property = 'commit_timezone'
 
        self.revision = repository.revisions.index(self.raw_id)
 

	
 
        self.nodes = {}
 
        self._paths = {}
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        return ()
 

	
 
    @LazyProperty
 
    def message(self):
 
        return safe_unicode(self._commit.message)
 

	
 
    @LazyProperty
 
    def committer(self):
 
        return safe_unicode(getattr(self._commit, self._committer_property))
 

	
 
    @LazyProperty
 
    def author(self):
 
        return safe_unicode(getattr(self._commit, self._author_property))
 

	
 
    @LazyProperty
 
    def date(self):
 
        return date_fromtimestamp(getattr(self._commit, self._date_property),
 
                                  getattr(self._commit, self._date_tz_property))
 

	
 
    @LazyProperty
 
    def _timestamp(self):
 
        return getattr(self._commit, self._date_property)
 

	
 
    @LazyProperty
 
    def status(self):
 
        """
 
        Returns modified, added, removed, deleted files for current changeset
 
        """
 
        return self.changed, self.added, self.removed
 

	
 
    @LazyProperty
 
    def tags(self):
 
        _tags = []
 
        for tname, tsha in self.repository.tags.items():
 
            if tsha == self.raw_id:
 
                _tags.append(tname)
 
        return _tags
 

	
 
    @LazyProperty
 
    def branch(self):
 
        # Note: This function will return one branch name for the changeset -
 
        # that might not make sense in Git where branches() is a better match
 
        # for the basic model
 
        heads = self.repository._heads(reverse=False)
 
        ref = heads.get(self._commit.id)
 
        if ref:
 
            return safe_unicode(ref)
 

	
 
    @LazyProperty
 
    def branches(self):
 
        heads = self.repository._heads(reverse=True)
 
        return [b for b in heads if heads[b] == self._commit.id] # FIXME: Inefficient ... and returning None!
 

	
 
    def _fix_path(self, path):
 
        """
 
        Paths are stored without trailing slash so we need to get rid off it if
 
        needed.
 
        """
 
        if path.endswith('/'):
 
            path = path.rstrip('/')
 
        return path
 

	
 
    def _get_id_for_path(self, path):
 
        path = safe_str(path)
 
        # FIXME: Please, spare a couple of minutes and make those codes cleaner;
 
        if path not in self._paths:
 
            path = path.strip('/')
 
            # set root tree
 
            tree = self.repository._repo[self._tree_id]
 
            if path == '':
 
                self._paths[''] = tree.id
 
                return tree.id
 
            splitted = path.split('/')
 
            dirs, name = splitted[:-1], splitted[-1]
 
            curdir = ''
 

	
 
            # initially extract things from root dir
 
            for item, stat, id in tree.items():
 
                if curdir:
 
                    name = '/'.join((curdir, item))
 
                else:
 
                    name = item
 
                self._paths[name] = id
 
                self._stat_modes[name] = stat
 

	
 
            for dir in dirs:
 
                if curdir:
 
                    curdir = '/'.join((curdir, dir))
 
                else:
 
                    curdir = dir
 
                dir_id = None
 
                for item, stat, id in tree.items():
 
                    if dir == item:
 
                        dir_id = id
 
                if dir_id:
 
                    # Update tree
 
                    tree = self.repository._repo[dir_id]
 
                    if not isinstance(tree, objects.Tree):
 
                        raise ChangesetError('%s is not a directory' % curdir)
 
                else:
 
                    raise ChangesetError('%s have not been found' % curdir)
 

	
 
                # cache all items from the given traversed tree
 
                for item, stat, id in tree.items():
 
                    if curdir:
 
                        name = '/'.join((curdir, item))
 
                    else:
 
                        name = item
 
                    self._paths[name] = id
 
                    self._stat_modes[name] = stat
 
            if path not in self._paths:
 
                raise NodeDoesNotExistError("There is no file nor directory "
 
                    "at the given path '%s' at revision %s"
 
                    % (path, safe_str(self.short_id)))
 
                    % (path, self.short_id))
 
        return self._paths[path]
 

	
 
    def _get_kind(self, path):
 
        obj = self.repository._repo[self._get_id_for_path(path)]
 
        if isinstance(obj, objects.Blob):
 
            return NodeKind.FILE
 
        elif isinstance(obj, objects.Tree):
 
            return NodeKind.DIR
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %s at "
 
                " '%s'" % (self.raw_id, path))
 
        return path
 

	
 
    def _get_file_nodes(self):
 
        return chain(*(t[2] for t in self.walk()))
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        return [self.repository.get_changeset(ascii_str(parent_id))
 
                for parent_id in self._commit.parents]
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        rev_filter = settings.GIT_REV_FILTER
 
        so = self.repository.run_git_command(
 
            ['rev-list', rev_filter, '--children']
 
        )
 
        return [
 
            self.repository.get_changeset(cs)
 
            for parts in (l.split(' ') for l in so.splitlines())
 
            if parts[0] == self.raw_id
 
            for cs in parts[1:]
 
        ]
 

	
 
    def next(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                next_ = cs.revision + 1
 
                next_rev = cs.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(next_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                prev_ = cs.revision - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = cs.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(prev_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        # Only used to feed diffstat
 
        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
 
        rev2 = self
 
        return b''.join(self.repository.get_diff(rev1, rev2,
 
                                    ignore_whitespace=ignore_whitespace,
 
                                    context=context))
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        # ensure path is traversed
 
        path = safe_str(path)
 
        self._get_id_for_path(path)
 
        return self._stat_modes[path]
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.as_pretty_string()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.raw_length()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        TODO: This function now uses os underlying 'git' and 'grep' commands
 
        which is generally not good. Should be replaced with algorithm
 
        iterating commits.
 
        """
 
        self._get_filectx(path)
 
        f_path = safe_str(path)
 

	
 
        if limit is not None:
 
            cmd = ['log', '-n', str(safe_int(limit, 0)),
 
                   '--pretty=format:%H', '-s', self.raw_id, '--', f_path]
 
                   '--pretty=format:%H', '-s', self.raw_id, '--', path]
 

	
 
        else:
 
            cmd = ['log',
 
                   '--pretty=format:%H', '-s', self.raw_id, '--', f_path]
 
                   '--pretty=format:%H', '-s', self.raw_id, '--', path]
 
        so = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(sha) for sha in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        """
 
        self._get_filectx(path)
 
        from dulwich.walk import Walker
 
        include = [self.raw_id]
 
        walker = Walker(self.repository._repo.object_store, include,
 
                        paths=[path], max_entries=1)
 
        return [self.repository.get_changeset(ascii_str(x.commit.id.decode))
 
                for x in walker]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 
        """
 
        # TODO: This function now uses os underlying 'git' command which is
 
        # generally not good. Should be replaced with algorithm iterating
 
        # commits.
 
        cmd = ['blame', '-l', '--root', '-r', self.raw_id, '--', path]
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for boundaries
 
        # -r sha ==> blames for the given revision
 
        so = self.repository.run_git_command(cmd)
 

	
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
            sha, line = re.split(r' ', blame_line, 1)
 
            yield (i + 1, sha, lambda sha=sha: self.repository.get_changeset(sha), line)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
 
        :raise VcsError: If given stream is None
 
        """
 
        allowed_kinds = settings.ARCHIVE_SPECS
 
        if kind not in allowed_kinds:
 
            raise ImproperArchiveTypeError('Archive kind not supported use one'
 
                'of %s' % ' '.join(allowed_kinds))
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 

	
 
        if prefix is None:
 
            prefix = '%s-%s' % (self.repository.name, self.short_id)
 
        elif prefix.startswith('/'):
 
            raise VCSError("Prefix cannot start with leading slash")
 
        elif prefix.strip() == '':
 
            raise VCSError("Prefix cannot be empty")
 

	
 
        if kind == 'zip':
 
            frmt = 'zip'
 
        else:
 
            frmt = 'tar'
 
        _git_path = settings.GIT_EXECUTABLE_PATH
 
        cmd = '%s archive --format=%s --prefix=%s/ %s' % (_git_path,
 
                                                frmt, prefix, self.raw_id)
 
        if kind == 'tgz':
 
            cmd += ' | gzip -9'
 
        elif kind == 'tbz2':
 
            cmd += ' | bzip2 -9'
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 
        popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
 
                      cwd=self.repository.path)
 

	
 
        buffer_size = 1024 * 8
 
        chunk = popen.stdout.read(buffer_size)
 
        while chunk:
 
            stream.write(chunk)
 
            chunk = popen.stdout.read(buffer_size)
 
        # Make sure all descriptors would be read
 
        popen.communicate()
 

	
 
    def get_nodes(self, path):
 
        """
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Git repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import errno
 
import logging
 
import os
 
import re
 
import time
 
import urllib.error
 
import urllib.parse
 
import urllib.request
 
from collections import OrderedDict
 

	
 
import mercurial.url  # import httpbasicauthhandler, httpdigestauthhandler
 
import mercurial.util  # import url as hg_url
 
from dulwich.config import ConfigFile
 
from dulwich.objects import Tag
 
from dulwich.repo import NotGitRepository, Repo
 

	
 
from kallithea.lib.vcs import subprocessio
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (
 
    BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError, TagDoesNotExistError)
 
from kallithea.lib.vcs.utils import ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.paths import abspath, get_user_home
 

	
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .workdir import GitWorkdir
 

	
 

	
 
SHA_PATTERN = re.compile(r'^([0-9a-fA-F]{12}|[0-9a-fA-F]{40})$')
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        self.repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        self.bare = self.repo.bare
 

	
 
    @property
 
    def _config_files(self):
 
        return [
 
            self.bare and abspath(self.path, 'config')
 
                      or abspath(self.path, '.git', 'config'),
 
             abspath(get_user_home(), '.gitconfig'),
 
         ]
 

	
 
    @property
 
    def _repo(self):
 
        return self.repo
 

	
 
    @property
 
    def head(self):
 
        try:
 
            return self._repo.head()
 
        except KeyError:
 
            return None
 

	
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 

	
 
        try:
 
            self.revisions[0]
 
        except (KeyError, IndexError):
 
            return True
 
        return False
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @classmethod
 
    def _run_git_command(cls, cmd, cwd=None):
 
        """
 
        Runs given ``cmd`` as git command and returns output bytes in a tuple
 
        (stdout, stderr) ... or raise RepositoryError.
 

	
 
        :param cmd: git command to be executed
 
        :param cwd: passed directly to subprocess
 
        """
 
        # need to clean fix GIT_DIR !
 
        gitenv = dict(os.environ)
 
        gitenv.pop('GIT_DIR', None)
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        assert isinstance(cmd, list), cmd
 
        cmd = [settings.GIT_EXECUTABLE_PATH, '-c', 'core.quotepath=false'] + cmd
 
        try:
 
            p = subprocessio.SubprocessIOChunker(cmd, cwd=cwd, env=gitenv, shell=False)
 
        except (EnvironmentError, OSError) as err:
 
            # output from the failing process is in str(EnvironmentError)
 
            msg = ("Couldn't run git command %s.\n"
 
                   "Subprocess failed with '%s': %s\n" %
 
                   (cmd, type(err).__name__, err)
 
            ).strip()
 
            log.error(msg)
 
            raise RepositoryError(msg)
 

	
 
        try:
 
            stdout = b''.join(p.output)
 
            stderr = b''.join(p.error)
 
@@ -224,193 +224,192 @@ class GitRepository(BaseRepository):
 
                return Repo(self.path)
 
            elif create:
 
                os.makedirs(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return Repo(self.path)
 
        except (NotGitRepository, OSError) as err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 

	
 
        rev_filter = settings.GIT_REV_FILTER
 
        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
 
        try:
 
            so = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        # alternate implementation using dulwich
 
        includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
 
                    if type_ != b'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            revision = -1
 

	
 
        if isinstance(revision, int):
 
            try:
 
                return self.revisions[revision]
 
            except IndexError:
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        if isinstance(revision, (str, unicode)):
 
            if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
 
                try:
 
                    return self.revisions[int(revision)]
 
                except IndexError:
 
                    msg = "Revision %r does not exist for %s" % (revision, self)
 
                    raise ChangesetDoesNotExistError(msg)
 

	
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(safe_bytes(revision))
 
            if _ref_revision:  # and _ref_revision[1] in [b'H', b'RH', b'T']:
 
                return ascii_str(_ref_revision[0])
 

	
 
            if revision in self.revisions:
 
                return revision
 

	
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            if revision in self.tags.values():
 
                return revision
 

	
 
            if SHA_PATTERN.match(revision):
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns ``GitChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return self._get_revision(ref_name)
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
            yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        url = safe_str(url)
 
        if url != 'default' and '://' not in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            idx_loc = '' if self.bare else '.git'
 
            # fallback to filesystem
 
            in_path = os.path.join(self.path, idx_loc, "index")
 
            he_path = os.path.join(self.path, idx_loc, "HEAD")
 
            if os.path.exists(in_path):
 
                return os.stat(in_path).st_mtime
 
            else:
 
                return os.stat(he_path).st_mtime
 

	
 
    @LazyProperty
 
    def description(self):
 
        return safe_unicode(self._repo.get_description() or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = u'Unknown'
 
        return undefined_contact
 

	
 
    @property
 
    def branches(self):
 
        if not self.revisions:
 
            return {}
 
        sortkey = lambda ctx: ctx[0]
 
        _branches = [(key, ascii_str(sha))
 
                     for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
 
        return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if not self.revisions:
 
            return {}
 

	
 
        sortkey = lambda ctx: ctx[0]
 
        _tags = [(key, ascii_str(sha))
 
                 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
 
        return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        message = message or "Added tag %s for commit %s" % (name,
 
            changeset.raw_id)
 
        self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
 

	
 
        self._parsed_refs = self._get_parsed_refs()
 
        self.tags = self._get_tags()
 
        return changeset
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
kallithea/lib/vcs/backends/git/ssh.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import logging
 
import os
 

	
 
from kallithea.lib.hooks import log_pull_action
 
from kallithea.lib.utils import make_ui
 
from kallithea.lib.vcs.backends.ssh import BaseSshHandler
 
from kallithea.lib.vcs.utils import safe_str
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitSshHandler(BaseSshHandler):
 
    vcs_type = 'git'
 

	
 
    @classmethod
 
    def make(cls, ssh_command_parts):
 
        r"""
 
        >>> import shlex
 

	
 
        >>> GitSshHandler.make(shlex.split("git-upload-pack '/foo bar'")).repo_name
 
        u'foo bar'
 
        >>> GitSshHandler.make(shlex.split("git-upload-pack '/foo bar'")).verb
 
        'git-upload-pack'
 
        >>> GitSshHandler.make(shlex.split(" git-upload-pack /blåbærgrød ")).repo_name # might not be necessary to support no quoting ... but we can
 
        u'bl\xe5b\xe6rgr\xf8d'
 
        >>> GitSshHandler.make(shlex.split('''git-upload-pack "/foo'bar"''')).repo_name
 
        u"foo'bar"
 
        >>> GitSshHandler.make(shlex.split("git-receive-pack '/foo'")).repo_name
 
        u'foo'
 
        >>> GitSshHandler.make(shlex.split("git-receive-pack '/foo'")).verb
 
        'git-receive-pack'
 

	
 
        >>> GitSshHandler.make(shlex.split("/bin/git-upload-pack '/foo'")) # ssh-serve will report 'SSH command %r is not supported'
 
        >>> GitSshHandler.make(shlex.split('''git-upload-pack /foo bar''')) # ssh-serve will report 'SSH command %r is not supported'
 
        >>> shlex.split("git-upload-pack '/foo'bar' x") # ssh-serve will report: Error parsing SSH command "...": No closing quotation
 
        Traceback (most recent call last):
 
        ValueError: No closing quotation
 
        >>> GitSshHandler.make(shlex.split('hg -R foo serve --stdio')) # not handled here
 
        """
 
        if (len(ssh_command_parts) == 2 and
 
            ssh_command_parts[0] in ['git-upload-pack', 'git-receive-pack'] and
 
            ssh_command_parts[1].startswith('/')
 
        ):
 
            return cls(ssh_command_parts[1][1:], ssh_command_parts[0])
 

	
 
        return None
 

	
 
    def __init__(self, repo_name, verb):
 
        BaseSshHandler.__init__(self, repo_name)
 
        self.verb = verb
 

	
 
    def _serve(self):
 
        if self.verb == 'git-upload-pack': # action 'pull'
 
            # base class called set_hook_environment - action is hardcoded to 'pull'
 
            log_pull_action(ui=make_ui(), repo=self.db_repo.scm_instance._repo)
 
        else: # probably verb 'git-receive-pack', action 'push'
 
            if not self.allow_push:
 
                self.exit('Push access to %r denied' % safe_str(self.repo_name))
 
                self.exit('Push access to %r denied' % self.repo_name)
 
            # Note: push logging is handled by Git post-receive hook
 

	
 
        # git shell is not a real shell but use shell inspired quoting *inside* the argument.
 
        # Per https://github.com/git/git/blob/v2.22.0/quote.c#L12 :
 
        # The path must be "'" quoted, but "'" and "!" must exit the quoting and be "\" escaped
 
        quoted_abspath = "'%s'" % self.db_repo.repo_full_path.replace("'", r"'\''").replace("!", r"'\!'")
 
        newcmd = ['git', 'shell', '-c', "%s %s" % (self.verb, quoted_abspath)]
 
        log.debug('Serving: %s', newcmd)
 
        os.execvp(newcmd[0], newcmd)
 
        self.exit("Failed to exec 'git' as %s" % newcmd)
kallithea/lib/vcs/backends/hg/changeset.py
Show inline comments
 
import os
 
import posixpath
 

	
 
import mercurial.archival
 
import mercurial.node
 
import mercurial.obsutil
 

	
 
from kallithea.lib.vcs.backends.base import BaseChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, ChangesetError, ImproperArchiveTypeError, NodeDoesNotExistError, VCSError
 
from kallithea.lib.vcs.nodes import (
 
    AddedFileNodesGenerator, ChangedFileNodesGenerator, DirNode, FileNode, NodeKind, RemovedFileNodesGenerator, RootNode, SubModuleNode)
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, safe_bytes, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.paths import get_dirs_for_path
 

	
 

	
 
class MercurialChangeset(BaseChangeset):
 
    """
 
    Represents state of the repository at a revision.
 
    """
 

	
 
    def __init__(self, repository, revision):
 
        self.repository = repository
 
        assert isinstance(revision, str), repr(revision)
 
        self._ctx = repository._repo[ascii_bytes(revision)]
 
        self.raw_id = ascii_str(self._ctx.hex())
 
        self.revision = self._ctx._rev
 
        self.nodes = {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return [safe_unicode(tag) for tag in self._ctx.tags()]
 

	
 
    @LazyProperty
 
    def branch(self):
 
        return safe_unicode(self._ctx.branch())
 

	
 
    @LazyProperty
 
    def branches(self):
 
        return [safe_unicode(self._ctx.branch())]
 

	
 
    @LazyProperty
 
    def closesbranch(self):
 
        return self._ctx.closesbranch()
 

	
 
    @LazyProperty
 
    def obsolete(self):
 
        return self._ctx.obsolete()
 

	
 
    @LazyProperty
 
    def bumped(self):
 
        return self._ctx.phasedivergent()
 

	
 
    @LazyProperty
 
    def divergent(self):
 
        return self._ctx.contentdivergent()
 

	
 
    @LazyProperty
 
    def extinct(self):
 
        return self._ctx.extinct()
 

	
 
    @LazyProperty
 
    def unstable(self):
 
        return self._ctx.orphan()
 

	
 
    @LazyProperty
 
    def phase(self):
 
        if(self._ctx.phase() == 1):
 
            return 'Draft'
 
        elif(self._ctx.phase() == 2):
 
            return 'Secret'
 
        else:
 
            return ''
 

	
 
    @LazyProperty
 
    def successors(self):
 
        successors = mercurial.obsutil.successorssets(self._ctx._repo, self._ctx.node(), closest=True)
 
        if successors:
 
            # flatten the list here handles both divergent (len > 1)
 
            # and the usual case (len = 1)
 
            successors = [mercurial.node.hex(n)[:12] for sub in successors for n in sub if n != self._ctx.node()]
 

	
 
        return successors
 

	
 
    @LazyProperty
 
    def predecessors(self):
 
        return [mercurial.node.hex(n)[:12] for n in mercurial.obsutil.closestpredecessors(self._ctx._repo, self._ctx.node())]
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        return [safe_unicode(bookmark) for bookmark in self._ctx.bookmarks()]
 

	
 
    @LazyProperty
 
    def message(self):
 
        return safe_unicode(self._ctx.description())
 

	
 
    @LazyProperty
 
    def committer(self):
 
        return safe_unicode(self.author)
 

	
 
    @LazyProperty
 
    def author(self):
 
        return safe_unicode(self._ctx.user())
 

	
 
    @LazyProperty
 
    def date(self):
 
        return date_fromtimestamp(*self._ctx.date())
 

	
 
    @LazyProperty
 
    def _timestamp(self):
 
        return self._ctx.date()[0]
 

	
 
    @LazyProperty
 
    def status(self):
 
        """
 
        Returns modified, added, removed, deleted files for current changeset
 
        """
 
        return self.repository._repo.status(self._ctx.p1().node(),
 
                                            self._ctx.node())
 

	
 
    @LazyProperty
 
    def _file_paths(self):
 
        return list(self._ctx)
 

	
 
    @LazyProperty
 
    def _dir_paths(self):
 
        p = list(set(get_dirs_for_path(*self._file_paths)))
 
        p.insert(0, '')
 
        return p
 

	
 
    @LazyProperty
 
    def _paths(self):
 
        return self._dir_paths + self._file_paths
 

	
 
    @LazyProperty
 
    def short_id(self):
 
        return self.raw_id[:12]
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        return [self.repository.get_changeset(parent.rev())
 
                for parent in self._ctx.parents() if parent.rev() >= 0]
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        return [self.repository.get_changeset(child.rev())
 
                for child in self._ctx.children() if child.rev() >= 0]
 

	
 
    def next(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                next_ = cs.repository.revisions.index(cs.raw_id) + 1
 
                next_rev = cs.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(next_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        cs = self
 
        while True:
 
            try:
 
                prev_ = cs.repository.revisions.index(cs.raw_id) - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = cs.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = cs.repository.get_changeset(prev_rev)
 

	
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def diff(self):
 
        # Only used to feed diffstat
 
        return b''.join(self._ctx.diff())
 

	
 
    def _fix_path(self, path):
 
        """
 
        Paths are stored without trailing slash so we need to get rid off it if
 
        needed. Also mercurial keeps filenodes as str so we need to decode
 
        from unicode to str
 
        """
 
        if path.endswith('/'):
 
            path = path.rstrip('/')
 

	
 
        return safe_str(path)
 
        return path
 

	
 
    def _get_kind(self, path):
 
        path = self._fix_path(path)
 
        if path in self._file_paths:
 
            return NodeKind.FILE
 
        elif path in self._dir_paths:
 
            return NodeKind.DIR
 
        else:
 
            raise ChangesetError("Node does not exist at the given path '%s'"
 
                % (path))
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %s at "
 
                " '%s'" % (self.raw_id, path))
 
        return self._ctx.filectx(safe_bytes(path))
 

	
 
    def _extract_submodules(self):
 
        """
 
        returns a dictionary with submodule information from substate file
 
        of hg repository
 
        """
 
        return self._ctx.substate
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        if b'x' in fctx.flags():
 
            return 0o100755
 
        else:
 
            return 0o100644
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.data()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.size()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 
        """
 
        fctx = self._get_filectx(path)
 
        hist = []
 
        cnt = 0
 
        for cs in reversed([x for x in fctx.filelog()]):
 
            cnt += 1
 
            hist.append(mercurial.node.hex(fctx.filectx(cs).node()))
 
            if limit is not None and cnt == limit:
 
                break
 

	
 
        return [self.repository.get_changeset(node) for node in hist]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 
        """
 
        annotations = self._get_filectx(path).annotate()
 
        annotation_lines = [(annotateline.fctx, annotateline.text) for annotateline in annotations]
 
        for i, (fctx, line) in enumerate(annotation_lines):
 
            sha = ascii_str(fctx.hex())
 
            yield (i + 1, sha, lambda sha=sha: self.repository.get_changeset(sha), line)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
kallithea/lib/vcs/backends/hg/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.hg.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Mercurial repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import datetime
 
import logging
 
import os
 
import time
 
import urllib.error
 
import urllib.parse
 
import urllib.request
 
from collections import OrderedDict
 

	
 
import mercurial.commands
 
import mercurial.error
 
import mercurial.exchange
 
import mercurial.hg
 
import mercurial.hgweb
 
import mercurial.httppeer
 
import mercurial.localrepo
 
import mercurial.match
 
import mercurial.mdiff
 
import mercurial.node
 
import mercurial.patch
 
import mercurial.scmutil
 
import mercurial.sshpeer
 
import mercurial.tags
 
import mercurial.ui
 
import mercurial.url
 
import mercurial.util
 

	
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.exceptions import (
 
    BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError, TagDoesNotExistError, VCSError)
 
from kallithea.lib.vcs.utils import ascii_str, author_email, author_name, date_fromtimestamp, makedate, safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import ascii_str, author_email, author_name, date_fromtimestamp, makedate, safe_bytes, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.paths import abspath
 

	
 
from .changeset import MercurialChangeset
 
from .inmemory import MercurialInMemoryChangeset
 
from .workdir import MercurialWorkdir
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class MercurialRepository(BaseRepository):
 
    """
 
    Mercurial repository backend
 
    """
 
    DEFAULT_BRANCH_NAME = 'default'
 
    scm = 'hg'
 

	
 
    def __init__(self, repo_path, create=False, baseui=None, src_url=None,
 
                 update_after_clone=False):
 
        """
 
        Raises RepositoryError if repository could not be find at the given
 
        ``repo_path``.
 

	
 
        :param repo_path: local path of the repository
 
        :param create=False: if set to True, would try to create repository if
 
           it does not exist rather than raising exception
 
        :param baseui=None: user data
 
        :param src_url=None: would try to clone repository from given location
 
        :param update_after_clone=False: sets update of working copy after
 
          making a clone
 
        """
 

	
 
        if not isinstance(repo_path, str):
 
            raise VCSError('Mercurial backend requires repository path to '
 
                           'be instance of <str> got %s instead' %
 
                           type(repo_path))
 
        self.path = abspath(repo_path)
 
        self.baseui = baseui or mercurial.ui.ui()
 
        # We've set path and ui, now we can set _repo itself
 
        self._repo = self._get_repo(create, src_url, update_after_clone)
 

	
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 
        # TODO: Following raises errors when using InMemoryChangeset...
 
        # return len(self._repo.changelog) == 0
 
        return len(self.revisions) == 0
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def branches(self):
 
        return self._get_branches()
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return self._get_branches(normal=False, closed=True)
 

	
 
    @LazyProperty
 
    def allbranches(self):
 
        """
 
        List all branches, including closed branches.
 
        """
 
        return self._get_branches(closed=True)
 

	
 
    def _get_branches(self, normal=True, closed=False):
 
        """
 
        Gets branches for this repository
 
        Returns only not closed branches by default
 

	
 
        :param closed: return also closed branches for mercurial
 
        :param normal: return also normal branches
 
        """
 

	
 
        if self._empty:
 
            return {}
 

	
 
        bt = OrderedDict()
 
        for bn, _heads, node, isclosed in sorted(self._repo.branchmap().iterbranches()):
 
            if isclosed:
 
                if closed:
 
                    bt[safe_unicode(bn)] = ascii_str(mercurial.node.hex(node))
 
            else:
 
@@ -353,236 +353,233 @@ class MercurialRepository(BaseRepository
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 
        try:
 
            if src_url:
 
                url = safe_bytes(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                MercurialRepository._check_url(url, self.baseui)
 
                mercurial.commands.clone(self.baseui, url, safe_bytes(self.path), **opts)
 

	
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return mercurial.localrepo.instance(self.baseui, safe_bytes(self.path), create=create)
 
        except (mercurial.error.Abort, mercurial.error.RepoError) as err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        _desc = self._repo.ui.config(b'web', b'description', None, untrusted=True)
 
        return safe_unicode(_desc or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        return safe_unicode(mercurial.hgweb.common.get_contact(self._repo.ui.config)
 
                            or b'Unknown')
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            # fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
 
                return os.stat(cl_path).st_mtime
 
            else:
 
                return os.stat(st_path).st_mtime
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 

	
 
        :param revision: str or int or None
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in [-1, None]:
 
            revision = b'tip'
 
        elif isinstance(revision, unicode):
 
            revision = safe_bytes(revision)
 

	
 
        try:
 
            if isinstance(revision, int):
 
                return ascii_str(self._repo[revision].hex())
 
            return ascii_str(mercurial.scmutil.revsymbol(self._repo, revision).hex())
 
        except (IndexError, ValueError, mercurial.error.RepoLookupError, TypeError):
 
            msg = "Revision %r does not exist for %s" % (safe_unicode(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except (LookupError, ):
 
            msg = "Ambiguous identifier `%s` for %s" % (safe_unicode(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns revision number for the given reference.
 
        """
 
        ref_name = safe_str(ref_name)
 
        if ref_type == 'rev' and not ref_name.strip('0'):
 
            return self.EMPTY_CHANGESET
 
        # lookup up the exact node id
 
        _revset_predicates = {
 
                'branch': 'branch',
 
                'book': 'bookmark',
 
                'tag': 'tag',
 
                'rev': 'id',
 
            }
 
        # avoid expensive branch(x) iteration over whole repo
 
        rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
 
        try:
 
            revs = self._repo.revs(rev_spec, ref_name, ref_name)
 
        except LookupError:
 
            msg = "Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except mercurial.error.RepoLookupError:
 
            msg = "Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        if revs:
 
            revision = revs.last()
 
        else:
 
            # TODO: just report 'not found'?
 
            revision = ref_name
 

	
 
        return self._get_revision(revision)
 

	
 
    def _get_archives(self, archive_name='tip'):
 
        allowed = self.baseui.configlist(b"web", b"allow_archive",
 
                                         untrusted=True)
 
        for name, ext in [(b'zip', '.zip'), (b'gz', '.tar.gz'), (b'bz2', '.tar.bz2')]:
 
            if name in allowed or self._repo.ui.configbool(b"web",
 
                                                           b"allow" + name,
 
                                                           untrusted=True):
 
                yield {"type": name, "extension": ext, "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall
 
        to filesystem
 
        (``file:///``) schema.
 
        Returns normalized url. If schema is not given, fall back to
 
        filesystem (``file:///``) schema.
 
        """
 
        url = safe_str(url)
 
        if url != 'default' and '://' not in url:
 
            url = "file:" + urllib.request.pathname2url(url)
 
        return url
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return MercurialChangeset(repository=self, revision=self._get_revision(revision))
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
                       end_date=None, branch_name=None, reverse=False, max_revisions=None):
 
        """
 
        Returns iterator of ``MercurialChangeset`` objects from start to end
 
        (both are inclusive)
 

	
 
        :param start: None, str, int or mercurial lookup format
 
        :param end:  None, str, int or mercurial lookup format
 
        :param start_date:
 
        :param end_date:
 
        :param branch_name:
 
        :param reversed: return changesets in reversed order
 
        """
 
        start_raw_id = self._get_revision(start)
 
        start_pos = None if start is None else self.revisions.index(start_raw_id)
 
        end_raw_id = self._get_revision(end)
 
        end_pos = None if end is None else self.revisions.index(end_raw_id)
 

	
 
        if start_pos is not None and end_pos is not None and start_pos > end_pos:
 
            raise RepositoryError("Start revision '%s' cannot be "
 
                                  "after end revision '%s'" % (start, end))
 

	
 
        if branch_name and branch_name not in self.allbranches:
 
            msg = "Branch %r not found in %s" % (branch_name, self.name)
 
            raise BranchDoesNotExistError(msg)
 
        if end_pos is not None:
 
            end_pos += 1
 
        # filter branches
 
        filter_ = []
 
        if branch_name:
 
            filter_.append(b'branch("%s")' % safe_bytes(branch_name))
 
        if start_date:
 
            filter_.append(b'date(">%s")' % safe_bytes(str(start_date)))
 
        if end_date:
 
            filter_.append(b'date("<%s")' % safe_bytes(str(end_date)))
 
        if filter_ or max_revisions:
 
            if filter_:
 
                revspec = b' and '.join(filter_)
 
            else:
 
                revspec = b'all()'
 
            if max_revisions:
 
                revspec = b'limit(%s, %d)' % (revspec, max_revisions)
 
            revisions = mercurial.scmutil.revrange(self._repo, [revspec])
 
        else:
 
            revisions = self.revisions
 

	
 
        # this is very much a hack to turn this into a list; a better solution
 
        # would be to get rid of this function entirely and use revsets
 
        revs = list(revisions)[start_pos:end_pos]
 
        if reverse:
 
            revs.reverse()
 

	
 
        return CollectionGenerator(self, revs)
 

	
 
    def pull(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        other = mercurial.hg.peer(self._repo, {}, safe_bytes(self._get_url(url)))
 
        try:
 
            mercurial.exchange.pull(self._repo, other, heads=None, force=None)
 
        except mercurial.error.Abort as err:
 
            # Propagate error but with vcs's type
 
            raise RepositoryError(str(err))
 

	
 
    @LazyProperty
 
    def workdir(self):
 
        """
 
        Returns ``Workdir`` instance for this repository.
 
        """
 
        return MercurialWorkdir(self)
 

	
 
    def get_config_value(self, section, name=None, config_file=None):
 
        """
 
        Returns configuration value for a given [``section``] and ``name``.
 

	
 
        :param section: Section we want to retrieve value from
 
        :param name: Name of configuration we want to retrieve
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        if config_file is None:
 
            config_file = []
 
        elif isinstance(config_file, str):
 
            config_file = [config_file]
kallithea/lib/vcs/backends/ssh.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
"""
 
vcs.backends.ssh
 
~~~~~~~~~~~~~~~~~
 

	
 
SSH backend for all available SCMs
 
"""
 

	
 
import datetime
 
import logging
 
import sys
 

	
 
from kallithea.lib.auth import AuthUser, HasPermissionAnyMiddleware
 
from kallithea.lib.utils2 import set_hook_environment
 
from kallithea.lib.vcs.utils import safe_str
 
from kallithea.model.db import Repository, User, UserSshKeys
 
from kallithea.model.meta import Session
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class BaseSshHandler(object):
 
    # Protocol for setting properties:
 
    # Set by sub class:
 
    #   vcs_type: 'hg' or 'git'
 
    # Set by make() / __init__():
 
    #   repo_name: requested repo name - only validated by serve()
 
    # Set by serve() - must not be accessed before:
 
    #   db_repo: repository db object
 
    #   authuser: user that has been authenticated - like request.authuser ... which isn't used here
 
    #   allow_push: false for read-only access to the repo
 

	
 
    # Set defaults, in case .exit should be called early
 
    vcs_type = None
 
    repo_name = None
 

	
 
    @staticmethod
 
    def make(ssh_command):
 
        """Factory function. Given a command as invoked over SSH (and preserved
 
        in SSH_ORIGINAL_COMMAND when run as authorized_keys command), return a
 
        handler if the command looks ok, else return None.
 
        """
 
        raise NotImplementedError
 

	
 
    def __init__(self, repo_name):
 
        self.repo_name = repo_name.rstrip('/')
 

	
 
    def serve(self, user_id, key_id, client_ip):
 
        """Verify basic sanity of the repository, and that the user is
 
        valid and has access - then serve the native VCS protocol for
 
        repository access."""
 
        dbuser = User.get(user_id)
 
        if dbuser is None:
 
            self.exit('User %r not found' % user_id)
 
        self.authuser = AuthUser.make(dbuser=dbuser, ip_addr=client_ip)
 
        log.info('Authorized user %s from SSH %s trusting user id %s and key id %s for %r', dbuser, client_ip, user_id, key_id, self.repo_name)
 
        if self.authuser is None: # not ok ... but already kind of authenticated by SSH ... but not really not authorized ...
 
            self.exit('User %s from %s cannot be authorized' % (dbuser.username, client_ip))
 

	
 
        ssh_key = UserSshKeys.get(key_id)
 
        if ssh_key is None:
 
            self.exit('SSH key %r not found' % key_id)
 
        ssh_key.last_seen = datetime.datetime.now()
 
        Session().commit()
 

	
 
        if HasPermissionAnyMiddleware('repository.write',
 
                                      'repository.admin')(self.authuser, self.repo_name):
 
            self.allow_push = True
 
        elif HasPermissionAnyMiddleware('repository.read')(self.authuser, self.repo_name):
 
            self.allow_push = False
 
        else:
 
            self.exit('Access to %r denied' % safe_str(self.repo_name))
 
            self.exit('Access to %r denied' % self.repo_name)
 

	
 
        self.db_repo = Repository.get_by_repo_name(self.repo_name)
 
        if self.db_repo is None:
 
            self.exit("Repository '%s' not found" % self.repo_name)
 
        assert self.db_repo.repo_name == self.repo_name
 

	
 
        # Set global hook environment up for 'push' actions.
 
        # If pull actions should be served, the actual hook invocation will be
 
        # hardcoded to 'pull' when log_pull_action is invoked (directly on Git,
 
        # or through the Mercurial 'outgoing' hook).
 
        # For push actions, the action in global hook environment is used (in
 
        # handle_git_post_receive when it is called as Git post-receive hook,
 
        # or in log_push_action through the Mercurial 'changegroup' hook).
 
        set_hook_environment(self.authuser.username, client_ip, self.repo_name, self.vcs_type, 'push')
 
        return self._serve()
 

	
 
    def _serve(self):
 
        """Serve the native protocol for repository access."""
 
        raise NotImplementedError
 

	
 
    def exit(self, error):
 
        log.info('abort serving %s %s: %s', self.vcs_type, self.repo_name, error)
 
        sys.stderr.write('abort: %s\n' % error)
 
        sys.exit(1)
kallithea/lib/vcs/nodes.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.nodes
 
    ~~~~~~~~~
 

	
 
    Module holding everything related to vcs nodes.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import functools
 
import mimetypes
 
import posixpath
 
import stat
 

	
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError
 
from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import safe_bytes, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
class NodeKind:
 
    SUBMODULE = -1
 
    DIR = 1
 
    FILE = 2
 

	
 

	
 
class NodeState:
 
    ADDED = u'added'
 
    CHANGED = u'changed'
 
    NOT_CHANGED = u'not changed'
 
    REMOVED = u'removed'
 

	
 

	
 
class NodeGeneratorBase(object):
 
    """
 
    Base class for removed added and changed filenodes, it's a lazy generator
 
    class that will create filenodes only on iteration or call
 

	
 
    The len method doesn't need to create filenodes at all
 
    """
 

	
 
    def __init__(self, current_paths, cs):
 
        self.cs = cs
 
        self.current_paths = current_paths
 

	
 
    def __getitem__(self, key):
 
        assert isinstance(key, slice), key
 
        for p in self.current_paths[key]:
 
            yield self.cs.get_node(p)
 

	
 
    def __len__(self):
 
        return len(self.current_paths)
 

	
 
    def __iter__(self):
 
        for p in self.current_paths:
 
            yield self.cs.get_node(p)
 

	
 

	
 
class AddedFileNodesGenerator(NodeGeneratorBase):
 
    """
 
    Class holding Added files for current changeset
 
    """
 
    pass
 

	
 

	
 
class ChangedFileNodesGenerator(NodeGeneratorBase):
 
    """
 
    Class holding Changed files for current changeset
 
    """
 
    pass
 

	
 

	
 
class RemovedFileNodesGenerator(NodeGeneratorBase):
 
    """
 
    Class holding removed files for current changeset
 
    """
 
    def __iter__(self):
 
        for p in self.current_paths:
 
            yield RemovedFileNode(path=p)
 

	
 
    def __getitem__(self, key):
 
        assert isinstance(key, slice), key
 
        for p in self.current_paths[key]:
 
            yield RemovedFileNode(path=p)
 

	
 

	
 
@functools.total_ordering
 
class Node(object):
 
    """
 
    Simplest class representing file or directory on repository.  SCM backends
 
    should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node``
 
    directly.
 

	
 
    Node's ``path`` cannot start with slash as we operate on *relative* paths
 
    only. Moreover, every single node is identified by the ``path`` attribute,
 
    so it cannot end with slash, too. Otherwise, path could lead to mistakes.
 
    """
 

	
 
    def __init__(self, path, kind):
 
        if path.startswith('/'):
 
            raise NodeError("Cannot initialize Node objects with slash at "
 
                            "the beginning as only relative paths are supported")
 
        self.path = safe_str(path.rstrip('/'))  # we store paths as str
 
        self.path = path.rstrip('/')
 
        if path == '' and kind != NodeKind.DIR:
 
            raise NodeError("Only DirNode and its subclasses may be "
 
                            "initialized with empty path")
 
        self.kind = kind
 
        #self.dirs, self.files = [], []
 
        if self.is_root() and not self.is_dir():
 
            raise NodeError("Root node cannot be FILE kind")
 

	
 
    @LazyProperty
 
    def parent(self):
 
        parent_path = self.get_parent_path()
 
        if parent_path:
 
            if self.changeset:
 
                return self.changeset.get_node(parent_path)
 
            return DirNode(parent_path)
 
        return None
 

	
 
    @LazyProperty
 
    def name(self):
 
        """
 
        Returns name of the node so if its path
 
        then only last part is returned.
 
        """
 
        return self.path.rstrip('/').split('/')[-1]
 

	
 
    def _get_kind(self):
 
        return self._kind
 

	
 
    def _set_kind(self, kind):
 
        if hasattr(self, '_kind'):
 
            raise NodeError("Cannot change node's kind")
 
        else:
 
            self._kind = kind
 
            # Post setter check (path's trailing slash)
 
            if self.path.endswith('/'):
 
                raise NodeError("Node's path cannot end with slash")
 

	
 
    kind = property(_get_kind, _set_kind)
 

	
 
    def __eq__(self, other):
 
        if type(self) is not type(other):
 
            return False
 
        if self._kind != other._kind:
 
            return False
 
        if self.path != other.path:
 
            return False
 
        if self.is_file():
 
            return self.content == other.content
 
        else:
 
            # For DirNode's check without entering each dir
 
            self_nodes_paths = list(sorted(n.path for n in self.nodes))
 
            other_nodes_paths = list(sorted(n.path for n in self.nodes))
 
            return self_nodes_paths == other_nodes_paths
 

	
 
    def __lt__(self, other):
 
        if self._kind < other._kind:
 
            return True
 
        if self._kind > other._kind:
 
            return False
 
        if self.path < other.path:
 
            return True
 
        if self.path > other.path:
 
            return False
 
        if self.is_file():
 
            return self.content < other.content
 
        else:
 
            # For DirNode's check without entering each dir
 
            self_nodes_paths = list(sorted(n.path for n in self.nodes))
 
            other_nodes_paths = list(sorted(n.path for n in self.nodes))
 
            return self_nodes_paths < other_nodes_paths
 

	
 
    def __repr__(self):
 
        return '<%s %r>' % (self.__class__.__name__, self.path)
 

	
 
    def get_parent_path(self):
 
        """
 
        Returns node's parent path or empty string if node is root.
 
        """
 
        if self.is_root():
 
            return ''
 
        return posixpath.dirname(self.path.rstrip('/')) + '/'
 

	
 
    def is_file(self):
 
        """
 
        Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False``
 
        otherwise.
 
        """
 
        return self.kind == NodeKind.FILE
 

	
 
    def is_dir(self):
 
        """
 
        Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False``
 
        otherwise.
 
        """
 
        return self.kind == NodeKind.DIR
 

	
 
@@ -499,111 +499,111 @@ class DirNode(Node):
 

	
 
    @LazyProperty
 
    def dirs(self):
 
        return sorted((node for node in self.nodes if node.is_dir()))
 

	
 
    def __iter__(self):
 
        for node in self.nodes:
 
            yield node
 

	
 
    def get_node(self, path):
 
        """
 
        Returns node from within this particular ``DirNode``, so it is now
 
        allowed to fetch, i.e. node located at 'docs/api/index.rst' from node
 
        'docs'. In order to access deeper nodes one must fetch nodes between
 
        them first - this would work::
 

	
 
           docs = root.get_node('docs')
 
           docs.get_node('api').get_node('index.rst')
 

	
 
        :param: path - relative to the current node
 

	
 
        .. note::
 
           To access lazily (as in example above) node have to be initialized
 
           with related changeset object - without it node is out of
 
           context and may know nothing about anything else than nearest
 
           (located at same level) nodes.
 
        """
 
        try:
 
            path = path.rstrip('/')
 
            if path == '':
 
                raise NodeError("Cannot retrieve node without path")
 
            self.nodes  # access nodes first in order to set _nodes_dict
 
            paths = path.split('/')
 
            if len(paths) == 1:
 
                if not self.is_root():
 
                    path = '/'.join((self.path, paths[0]))
 
                else:
 
                    path = paths[0]
 
                return self._nodes_dict[path]
 
            elif len(paths) > 1:
 
                if self.changeset is None:
 
                    raise NodeError("Cannot access deeper "
 
                                    "nodes without changeset")
 
                else:
 
                    path1, path2 = paths[0], '/'.join(paths[1:])
 
                    return self.get_node(path1).get_node(path2)
 
            else:
 
                raise KeyError
 
        except KeyError:
 
            raise NodeError("Node does not exist at %s" % path)
 

	
 
    @LazyProperty
 
    def state(self):
 
        raise NodeError("Cannot access state of DirNode")
 

	
 
    @LazyProperty
 
    def size(self):
 
        size = 0
 
        for root, dirs, files in self.changeset.walk(self.path):
 
            for f in files:
 
                size += f.size
 

	
 
        return size
 

	
 
    def __repr__(self):
 
        return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
 
                                 getattr(self.changeset, 'short_id', ''))
 

	
 

	
 
class RootNode(DirNode):
 
    """
 
    DirNode being the root node of the repository.
 
    """
 

	
 
    def __init__(self, nodes=(), changeset=None):
 
        super(RootNode, self).__init__(path='', nodes=nodes,
 
            changeset=changeset)
 

	
 
    def __repr__(self):
 
        return '<%s>' % self.__class__.__name__
 

	
 

	
 
class SubModuleNode(Node):
 
    """
 
    represents a SubModule of Git or SubRepo of Mercurial
 
    """
 
    is_binary = False
 
    size = 0
 

	
 
    def __init__(self, name, url, changeset=None, alias=None):
 
        # Note: Doesn't call Node.__init__!
 
        self.path = name
 
        self.kind = NodeKind.SUBMODULE
 
        self.alias = alias
 
        # we have to use emptyChangeset here since this can point to svn/git/hg
 
        # submodules we cannot get from repository
 
        self.changeset = EmptyChangeset(str(changeset), alias=alias)
 
        self.changeset = EmptyChangeset(changeset, alias=alias)
 
        self.url = url
 

	
 
    def __repr__(self):
 
        return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
 
                                 getattr(self.changeset, 'short_id', ''))
 

	
 
    @LazyProperty
 
    def name(self):
 
        """
 
        Returns name of the node so if its path
 
        then only last part is returned.
 
        """
 
        org = self.path.rstrip('/').rsplit('/', 1)[-1]
 
        return u'%s @ %s' % (org, self.changeset.short_id)
kallithea/lib/vcs/utils/__init__.py
Show inline comments
 
@@ -111,111 +111,111 @@ def safe_bytes(s):
 
    assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
 

	
 
    from kallithea.lib.vcs.conf import settings
 
    for enc in settings.DEFAULT_ENCODINGS:
 
        try:
 
            return s.encode(enc)
 
        except UnicodeEncodeError:
 
            pass
 

	
 
    return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
 

	
 

	
 
safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
 

	
 

	
 
def ascii_bytes(s):
 
    """
 
    Simple conversion from unicode/str to bytes, *assuming* all codepoints are
 
    7-bit and it thus is pure ASCII.
 
    Will fail badly with UnicodeError on invalid input.
 
    This should be used where enocding and "safe" ambiguity should be avoided.
 
    Where strings already have been encoded in other ways but still are unicode
 
    string - for example to hex, base64, json, urlencoding, or are known to be
 
    identifiers.
 

	
 
    >>> ascii_bytes('a')
 
    'a'
 
    >>> ascii_bytes(u'a')
 
    'a'
 
    >>> ascii_bytes('å')
 
    Traceback (most recent call last):
 
    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
 
    >>> ascii_bytes(u'å')
 
    Traceback (most recent call last):
 
    UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
 
    """
 
    assert isinstance(s, (unicode, str)), repr(s)
 
    return s.encode('ascii')
 

	
 

	
 
def ascii_str(s):
 
    r"""
 
    Simple conversion from bytes to str, *assuming* all codepoints are
 
    7-bit and it thus is pure ASCII.
 
    Will fail badly with UnicodeError on invalid input.
 
    This should be used where enocding and "safe" ambiguity should be avoided.
 
    Where strings are encoded but also in other ways are known to be ASCII, and
 
    where a unicode string is wanted without caring about encoding. For example
 
    to hex, base64, urlencoding, or are known to be identifiers.
 

	
 
    >>> ascii_str('a')
 
    'a'
 
    >>> ascii_str(u'a')
 
    Traceback (most recent call last):
 
    AssertionError: u'a'
 
    >>> ascii_str('å')
 
    Traceback (most recent call last):
 
    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
 
    >>> ascii_str(u'å')
 
    Traceback (most recent call last):
 
    AssertionError: u'\xc3\xa5'
 
    """
 
    assert isinstance(s, bytes), repr(s)
 
    # Note: we use "encode", even though we really *should* use "decode". But
 
    # we are in py2 and don't want py2, and encode is doing what we need for the
 
    # ascii subset.
 
    return s.encode('ascii')
 

	
 

	
 
# Regex taken from http://www.regular-expressions.info/email.html
 
email_re = re.compile(
 
    r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
 
    r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
 
    re.IGNORECASE)
 

	
 

	
 
def author_email(author):
 
    """
 
    Returns email address of given author string.
 
    If author contains <> brackets, only look inside that.
 
    If any RFC valid email address is found, return that.
 
    Else, return empty string.
 

	
 
    """
 
    if not author:
 
        return ''
 

	
 
    l = author.find('<') + 1
 
    if l != 0:
 
        r = author.find('>', l)
 
        if r != -1:
 
            author = author[l:r]
 

	
 
    m = email_re.search(author)
 
    if m is None:
 
        return ''
 
    return safe_str(m.group(0))
 
    return m.group(0)
 

	
 

	
 
def author_name(author):
 
    """
 
    get name of author, or else username.
 
    It'll try to find an email in the author string and just cut it off
 
    to get the username
 
    """
 
    if not author:
 
        return ''
 
    if '@' not in author:
 
        return author
 
    return author.replace(author_email(author), '').replace('<', '') \
 
        .replace('>', '').strip()

Changeset was too big and was cut off... Show full diff anyway

0 comments (0 inline, 0 general)