Changeset - ce2a4ef8cd5f
[Not reviewed]
default
0 7 1
Mads Kiilerich - 7 years ago 2019-01-07 01:50:56
mads@kiilerich.com
Grafted from: fbbda81038e8
middleware: move handling of permanent repo URLs to separate middleware

This is about the handling of repo URLs like '_123' for the repo with repo_id
123. The de-mangling of such URLs was spread out across multiple layers. It
fits much more nicely as a middleware layer. The code in routing and simplehg /
simplegit can thus be removed.

The base _get_by_id function was confusing - fix it by removing it. To do that,
refactor utils introducing fix_repo_id_name to replace get_repo_by_id.

We now assume in the application that we never have any extra leading '/' in
URL paths.

And while trailing extra '/' might be fine in actual URLs, they must be handled
at the routing level, not propagated through all layers. This changeset is not
really changing that.
8 files changed with 96 insertions and 76 deletions:
0 comments (0 inline, 0 general)
kallithea/config/app_cfg.py
Show inline comments
 
@@ -9,48 +9,49 @@
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
Global configuration file for TurboGears2 specific settings in Kallithea.
 

	
 
This file complements the .ini file.
 
"""
 

	
 
import platform
 
import os, sys, logging
 

	
 
import tg
 
from tg import hooks
 
from tg.configuration import AppConfig
 
from tg.support.converters import asbool
 
import alembic.config
 
from alembic.script.base import ScriptDirectory
 
from alembic.migration import MigrationContext
 
from sqlalchemy import create_engine
 
import mercurial
 

	
 
from kallithea.lib.middleware.permanent_repo_url import PermanentRepoUrl
 
from kallithea.lib.middleware.https_fixup import HttpsFixup
 
from kallithea.lib.middleware.simplegit import SimpleGit
 
from kallithea.lib.middleware.simplehg import SimpleHg
 
from kallithea.lib.auth import set_available_permissions
 
from kallithea.lib.utils import load_rcextensions, make_ui, set_app_settings, set_vcs_config, \
 
    set_indexer_config, check_git_version, repo2db_mapper
 
from kallithea.lib.utils2 import str2bool
 
import kallithea.model.base
 
from kallithea.model.scm import ScmModel
 

	
 
import formencode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class KallitheaAppConfig(AppConfig):
 
    # Note: AppConfig has a misleading name, as it's not the application
 
    # configuration, but the application configurator. The AppConfig values are
 
    # used as a template to create the actual configuration, which might
 
    # overwrite or extend the one provided by the configurator template.
 

	
 
    # To make it clear, AppConfig creates the config and sets into it the same
 
    # values that AppConfig itself has. Then the values from the config file and
 
    # gearbox options are loaded and merged into the configuration. Then an
 
@@ -187,28 +188,30 @@ def setup_configuration(app):
 

	
 
    # configure vcs and indexer libraries (they are supposed to be independent
 
    # as much as possible and thus avoid importing tg.config or
 
    # kallithea.CONFIG).
 
    set_vcs_config(kallithea.CONFIG)
 
    set_indexer_config(kallithea.CONFIG)
 

	
 
    check_git_version()
 

	
 

	
 
hooks.register('configure_new_app', setup_configuration)
 

	
 

	
 
def setup_application(app):
 
    config = app.config
 

	
 
    # we want our low level middleware to get to the request ASAP. We don't
 
    # need any stack middleware in them - especially no StatusCodeRedirect buffering
 
    app = SimpleHg(app, config)
 
    app = SimpleGit(app, config)
 

	
 
    # Enable https redirects based on HTTP_X_URL_SCHEME set by proxy
 
    if any(asbool(config.get(x)) for x in ['https_fixup', 'force_https', 'use_htsts']):
 
        app = HttpsFixup(app, config)
 

	
 
    app = PermanentRepoUrl(app, config)
 
    return app
 

	
 

	
 
hooks.register('before_config', setup_application)
kallithea/config/routing.py
Show inline comments
 
@@ -12,70 +12,60 @@
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
Routes configuration
 

	
 
The more specific and detailed routes should be defined first so they
 
may take precedent over the more generic routes. For more information
 
refer to the routes manual at http://routes.groovie.org/docs/
 
"""
 

	
 
from tg import request
 
from routes import Mapper
 

	
 
# prefix for non repository related links needs to be prefixed with `/`
 
ADMIN_PREFIX = '/_admin'
 

	
 

	
 
def make_map(config):
 
    """Create, configure and return the routes Mapper"""
 
    rmap = Mapper(directory=config['paths']['controllers'],
 
                  always_scan=config['debug'])
 
    rmap.minimization = False
 
    rmap.explicit = False
 

	
 
    from kallithea.lib.utils import (is_valid_repo, is_valid_repo_group,
 
                                     get_repo_by_id)
 
    from kallithea.lib.utils import is_valid_repo, is_valid_repo_group
 

	
 
    def check_repo(environ, match_dict):
 
        """
 
        check for valid repository for proper 404 handling
 

	
 
        :param environ:
 
        :param match_dict:
 
        Check for valid repository for proper 404 handling.
 
        Also, a bit of side effect modifying match_dict ...
 
        """
 
        repo_name = match_dict.get('repo_name')
 

	
 
        if match_dict.get('f_path'):
 
            # fix for multiple initial slashes that causes errors
 
            match_dict['f_path'] = match_dict['f_path'].lstrip('/')
 

	
 
        by_id_match = get_repo_by_id(repo_name)
 
        if by_id_match:
 
            repo_name = by_id_match
 
            match_dict['repo_name'] = repo_name
 

	
 
        return is_valid_repo(repo_name, config['base_path'])
 
        return is_valid_repo(match_dict['repo_name'], config['base_path'])
 

	
 
    def check_group(environ, match_dict):
 
        """
 
        check for valid repository group for proper 404 handling
 

	
 
        :param environ:
 
        :param match_dict:
 
        """
 
        repo_group_name = match_dict.get('group_name')
 
        return is_valid_repo_group(repo_group_name, config['base_path'])
 

	
 
    def check_group_skip_path(environ, match_dict):
 
        """
 
        check for valid repository group for proper 404 handling, but skips
 
        verification of existing path
 

	
 
        :param environ:
 
        :param match_dict:
 
        """
 
        repo_group_name = match_dict.get('group_name')
 
        return is_valid_repo_group(repo_group_name, config['base_path'],
 
                                   skip_path_check=True)
 

	
 
    def check_user_group(environ, match_dict):
kallithea/lib/base.py
Show inline comments
 
@@ -245,65 +245,48 @@ class BaseVCSController(object):
 
                username = result
 
            else:
 
                return None, result.wsgi_application
 

	
 
        #==============================================================
 
        # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME
 
        #==============================================================
 
        try:
 
            user = User.get_by_username_or_email(username)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return None, webob.exc.HTTPInternalServerError()
 

	
 
        authuser = AuthUser.make(dbuser=user, ip_addr=ip_addr)
 
        if authuser is None:
 
            return None, webob.exc.HTTPForbidden()
 
        if not self._check_permission(action, authuser, repo_name):
 
            return None, webob.exc.HTTPForbidden()
 

	
 
        return user, None
 

	
 
    def _handle_request(self, environ, start_response):
 
        raise NotImplementedError()
 

	
 
    def _get_by_id(self, repo_name):
 
        """
 
        Gets a special pattern _<ID> from clone url and tries to replace it
 
        with a repository_name for support of _<ID> permanent URLs
 

	
 
        :param repo_name:
 
        """
 

	
 
        data = repo_name.split('/')
 
        if len(data) >= 2:
 
            from kallithea.lib.utils import get_repo_by_id
 
            by_id_match = get_repo_by_id(repo_name)
 
            if by_id_match:
 
                data[1] = safe_str(by_id_match)
 

	
 
        return '/'.join(data)
 

	
 
    def _check_permission(self, action, authuser, repo_name):
 
        """
 
        Checks permissions using action (push/pull) user and repository
 
        name
 

	
 
        :param action: 'push' or 'pull' action
 
        :param user: `User` instance
 
        :param repo_name: repository name
 
        """
 
        if action == 'push':
 
            if not HasPermissionAnyMiddleware('repository.write',
 
                                              'repository.admin')(authuser,
 
                                                                  repo_name):
 
                return False
 

	
 
        else:
 
            #any other action need at least read permission
 
            if not HasPermissionAnyMiddleware('repository.read',
 
                                              'repository.write',
 
                                              'repository.admin')(authuser,
 
                                                                  repo_name):
 
                return False
 

	
 
        return True
kallithea/lib/middleware/permanent_repo_url.py
Show inline comments
 
new file 100644
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.middleware.permanent_repo_url
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
middleware to handle permanent repo URLs, replacing PATH_INFO '/_123/yada' with
 
'/name/of/repo/yada' after looking 123 up in the database.
 
"""
 

	
 

	
 
from kallithea.lib.utils import safe_str, fix_repo_id_name
 

	
 

	
 
class PermanentRepoUrl(object):
 

	
 
    def __init__(self, app, config):
 
        self.application = app
 
        self.config = config
 

	
 
    def __call__(self, environ, start_response):
 
        path_info = environ['PATH_INFO']
 
        if path_info.startswith('/'): # it must
 
            path_info = '/' + safe_str(fix_repo_id_name(path_info[1:]))
 
            environ['PATH_INFO'] = path_info
 

	
 
        return self.application(environ, start_response)
kallithea/lib/middleware/simplegit.py
Show inline comments
 
@@ -120,56 +120,53 @@ class SimpleGit(BaseVCSController):
 

	
 
        try:
 
            self._handle_githooks(repo_name, action, baseui, environ)
 
            log.info('%s action on Git repo "%s" by "%s" from %s',
 
                     action, str_repo_name, safe_str(user.username), ip_addr)
 
            app = self.__make_app(repo_name)
 
            return app(environ, start_response)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
    def __make_app(self, repo_name):
 
        """
 
        Return a pygrack wsgi application.
 
        """
 
        return make_wsgi_app(repo_name, safe_str(self.basepath)) # FIXME: safe_str???
 

	
 
    def __get_repository(self, environ):
 
        """
 
        Gets repository name out of PATH_INFO header
 

	
 
        :param environ: environ where PATH_INFO is stored
 
        """
 
        try:
 
            environ['PATH_INFO'] = self._get_by_id(environ['PATH_INFO'])
 
            repo_name = GIT_PROTO_PAT.match(environ['PATH_INFO']).group(1)
 
            return GIT_PROTO_PAT.match(environ['PATH_INFO']).group(1)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            raise
 

	
 
        return repo_name
 

	
 
    def __get_action(self, environ):
 
        """
 
        Maps Git request commands into a pull or push command.
 

	
 
        :param environ:
 
        """
 
        service = environ['QUERY_STRING'].split('=')
 

	
 
        if len(service) > 1:
 
            service_cmd = service[1]
 
            mapping = {
 
                'git-receive-pack': 'push',
 
                'git-upload-pack': 'pull',
 
            }
 
            op = mapping[service_cmd]
 
            self._git_stored_op = op
 
            # try to fallback to stored variable as we don't know if the last
 
            # operation is pull/push
 
        return self._git_stored_op
 

	
 
    def _handle_githooks(self, repo_name, action, baseui, environ):
 
        """
 
        Handles pull action, push is handled by post-receive hook
 
        """
kallithea/lib/middleware/simplehg.py
Show inline comments
 
@@ -146,58 +146,55 @@ class SimpleHg(BaseVCSController):
 

	
 
        try:
 
            log.info('%s action on Mercurial repo "%s" by "%s" from %s',
 
                     action, str_repo_name, safe_str(user.username), ip_addr)
 
            environ['REPO_NAME'] = str_repo_name # used by hgweb_mod.hgweb
 
            app = self.__make_app(repo_path, baseui)
 
            return app(environ, start_response)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
    def __make_app(self, repo_name, baseui):
 
        """
 
        Make an hgweb wsgi application using baseui.
 
        """
 
        return hgweb_mod.hgweb(repo_name, name=repo_name, baseui=baseui)
 

	
 
    def __get_repository(self, environ):
 
        """
 
        Gets repository name out of PATH_INFO header
 

	
 
        :param environ: environ where PATH_INFO is stored
 
        """
 
        try:
 
            environ['PATH_INFO'] = self._get_by_id(environ['PATH_INFO'])
 
            repo_name = '/'.join(environ['PATH_INFO'].split('/')[1:])
 
            if repo_name.endswith('/'):
 
                repo_name = repo_name.rstrip('/')
 
            path_info = environ['PATH_INFO']
 
            if path_info.startswith('/'):
 
                return path_info[1:].rstrip('/')
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            raise
 

	
 
        return repo_name
 

	
 
    def __get_action(self, environ):
 
        """
 
        Maps Mercurial request commands into 'pull' or 'push'.
 

	
 
        Raises HTTPBadRequest if the request environment doesn't look like a hg client.
 
        """
 
        mapping = {
 
            # 'batch' is not in this list - it is handled explicitly
 
            'between': 'pull',
 
            'branches': 'pull',
 
            'branchmap': 'pull',
 
            'capabilities': 'pull',
 
            'changegroup': 'pull',
 
            'changegroupsubset': 'pull',
 
            'changesetdata': 'pull',
 
            'clonebundles': 'pull',
 
            'debugwireargs': 'pull',
 
            'filedata': 'pull',
 
            'getbundle': 'pull',
 
            'getlfile': 'pull',
 
            'heads': 'pull',
 
            'hello': 'pull',
 
            'known': 'pull',
 
            'lheads': 'pull',
kallithea/lib/utils.py
Show inline comments
 
@@ -57,71 +57,77 @@ REMOVED_REPO_PAT = re.compile(r'rm__\d{8
 
# PERM DECORATOR HELPERS FOR EXTRACTING NAMES FOR PERM CHECKS
 
#==============================================================================
 
def get_repo_slug(request):
 
    _repo = request.environ['pylons.routes_dict'].get('repo_name')
 
    if _repo:
 
        _repo = _repo.rstrip('/')
 
    return _repo
 

	
 

	
 
def get_repo_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('group_name')
 
    if _group:
 
        _group = _group.rstrip('/')
 
    return _group
 

	
 

	
 
def get_user_group_slug(request):
 
    _group = request.environ['pylons.routes_dict'].get('id')
 
    _group = UserGroup.get(_group)
 
    if _group:
 
        return _group.users_group_name
 
    return None
 

	
 

	
 
def _extract_id_from_repo_name(repo_name):
 
    if repo_name.startswith('/'):
 
        repo_name = repo_name.lstrip('/')
 
    by_id_match = re.match(r'^_(\d{1,})', repo_name)
 
    if by_id_match:
 
        return by_id_match.groups()[0]
 
def _get_permanent_id(s):
 
    """Helper for decoding stable URLs with repo ID. For a string like '_123'
 
    return 123.
 
    """
 
    by_id_match = re.match(r'^_(\d+)$', s)
 
    if by_id_match is None:
 
        return None
 
    return int(by_id_match.group(1))
 

	
 

	
 
def get_repo_by_id(repo_name):
 
def fix_repo_id_name(path):
 
    """
 
    Extracts repo_name by id from special urls. Example url is _11/repo_name
 
    Rewrite repo_name for _<ID> permanent URLs.
 

	
 
    :param repo_name:
 
    :return: repo_name if matched else None
 
    Given a path, if the first path element is like _<ID>, return the path with
 
    this part expanded to the corresponding full repo name, else return the
 
    provided path.
 
    """
 
    _repo_id = _extract_id_from_repo_name(repo_name)
 
    if _repo_id:
 
    first, rest = path, ''
 
    if '/' in path:
 
        first, rest_ = path.split('/', 1)
 
        rest = '/' + rest_
 
    repo_id = _get_permanent_id(first)
 
    if repo_id is not None:
 
        from kallithea.model.db import Repository
 
        repo = Repository.get(_repo_id)
 
        if repo:
 
            # TODO: return repo instead of reponame? or would that be a layering violation?
 
            return repo.repo_name
 
    return None
 
        repo = Repository.get(repo_id)
 
        if repo is not None:
 
            return repo.repo_name + rest
 
    return path
 

	
 

	
 
def action_logger(user, action, repo, ipaddr='', commit=False):
 
    """
 
    Action logger for various actions made by users
 

	
 
    :param user: user that made this action, can be a unique username string or
 
        object containing user_id attribute
 
    :param action: action to log, should be on of predefined unique actions for
 
        easy translations
 
    :param repo: string name of repository or object containing repo_id,
 
        that action was made on
 
    :param ipaddr: optional IP address from what the action was made
 

	
 
    """
 

	
 
    # if we don't get explicit IP address try to get one from registered user
 
    # in tmpl context var
 
    if not ipaddr:
 
        ipaddr = getattr(get_current_authuser(), 'ip_addr', '')
 

	
 
    if getattr(user, 'user_id', None):
 
        user_obj = User.get(user.user_id)
 
    elif isinstance(user, basestring):
kallithea/tests/other/test_libs.py
Show inline comments
 
@@ -511,68 +511,74 @@ class TestLibs(TestController):
 
        config_stub = {
 
            'sqlalchemy.url': 'foo',
 
            'issue_pat': 'X(\d+)',
 
            'issue_server_link': 'http://main/{repo}/main/\\1/',
 
            'issue_sub': '#\\1',
 
            'issue_pat_pr': '(?:pullrequest|pull request|PR|pr) ?#?(\d+)',
 
            'issue_server_link_pr': 'http://pr/{repo}/pr/\\1',
 
            'issue_sub_pr': 'PR#\\1',
 
            'issue_pat_bug': '(?:BUG|bug|issue) ?#?(\d+)',
 
            'issue_server_link_bug': 'http://bug/{repo}/bug/\\1',
 
            'issue_sub_bug': 'bug#\\1',
 
            'issue_pat_empty_prefix': 'FAIL(\d+)',
 
            'issue_server_link_empty_prefix': 'http://fail/{repo}/\\1',
 
            'issue_sub_empty_prefix': '',
 
            'issue_pat_absent_prefix': 'FAILMORE(\d+)',
 
            'issue_server_link_absent_prefix': 'http://failmore/{repo}/\\1',
 
        }
 
        # force recreation of lazy function
 
        with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
 
            with mock.patch('kallithea.CONFIG', config_stub):
 
                assert urlify_text(sample, 'repo_name') == expected
 

	
 
    @parametrize('test,expected', [
 
      ("", None),
 
      ("/_2", '2'),
 
      ("_2", '2'),
 
      ("/_2/", '2'),
 
      ("_2/", '2'),
 

	
 
      ("/_21", '21'),
 
      ("_21", '21'),
 
      ("/_21/", '21'),
 
      ("_21/", '21'),
 
      ("/_2", None),
 
      ("_2", 2),
 
      ("_2/", None),
 
    ])
 
    def test_get_permanent_id(self, test, expected):
 
        from kallithea.lib.utils import _get_permanent_id
 
        extracted = _get_permanent_id(test)
 
        assert extracted == expected, 'url:%s, got:`%s` expected: `%s`' % (test, _test, expected)
 

	
 
      ("/_21/foobar", '21'),
 
      ("_21/121", '21'),
 
      ("/_21/_12", '21'),
 
      ("_21/prefix/foo", '21'),
 
    @parametrize('test,expected', [
 
      ("", ""),
 
      ("/", "/"),
 
      ("/_ID", '/_ID'),
 
      ("ID", "ID"),
 
      ("_ID", 'NAME'),
 
      ("_ID/", 'NAME/'),
 
      ("_ID/1/2", 'NAME/1/2'),
 
      ("_IDa", '_IDa'),
 
    ])
 
    def test_get_repo_by_id(self, test, expected):
 
        from kallithea.lib.utils import _extract_id_from_repo_name
 
        _test = _extract_id_from_repo_name(test)
 
        assert _test == expected, 'url:%s, got:`%s` expected: `%s`' % (test, _test, expected)
 

	
 
    def test_fix_repo_id_name(self, test, expected):
 
        repo = Repository.get_by_repo_name(HG_REPO)
 
        test = test.replace('ID', str(repo.repo_id))
 
        expected = expected.replace('NAME', repo.repo_name).replace('ID', str(repo.repo_id))
 
        from kallithea.lib.utils import fix_repo_id_name
 
        replaced = fix_repo_id_name(test)
 
        assert replaced == expected, 'url:%s, got:`%s` expected: `%s`' % (test, replaced, expected)
 

	
 
    @parametrize('canonical,test,expected', [
 
        ('http://www.example.org/', '/abc/xyz', 'http://www.example.org/abc/xyz'),
 
        ('http://www.example.org', '/abc/xyz', 'http://www.example.org/abc/xyz'),
 
        ('http://www.example.org', '/abc/xyz/', 'http://www.example.org/abc/xyz/'),
 
        ('http://www.example.org', 'abc/xyz/', 'http://www.example.org/abc/xyz/'),
 
        ('http://www.example.org', 'about', 'http://www.example.org/about-page'),
 
        ('http://www.example.org/repos/', 'abc/xyz/', 'http://www.example.org/repos/abc/xyz/'),
 
        ('http://www.example.org/kallithea/repos/', 'abc/xyz/', 'http://www.example.org/kallithea/repos/abc/xyz/'),
 
    ])
 
    def test_canonical_url(self, canonical, test, expected):
 
        from kallithea.lib.helpers import canonical_url
 
        from tg import request
 

	
 
        # setup url(), used by canonical_url
 
        import routes
 
        m = routes.Mapper()
 
        m.connect('about', '/about-page')
 
        url = routes.URLGenerator(m, {'HTTP_HOST': 'http_host.example.org'})
 

	
 
        config_mock = {
 
            'canonical_url': canonical,
 
        }
 

	
0 comments (0 inline, 0 general)