Changeset - f4fed0b32103
[Not reviewed]
beta
0 2 0
Marcin Kuzminski - 14 years ago 2011-09-29 22:34:47
marcin@python-works.com
Rewrote git middleware with the same pattern as recent fix for #176
- additionally fixed dulwich server bug, that used old deprecated method. This fix caused huge improvement of speed for fetching git repos
- moved simplehg and simplegit middleware into begining of pylons callstack. This low level middleware
don't need to run any of the middlewares except each other.
2 files changed with 59 insertions and 44 deletions:
0 comments (0 inline, 0 general)
rhodecode/config/middleware.py
Show inline comments
 
"""Pylons middleware initialization"""
 

	
 
from beaker.middleware import SessionMiddleware
 
from routes.middleware import RoutesMiddleware
 
from paste.cascade import Cascade
 
from paste.registry import RegistryManager
 
from paste.urlparser import StaticURLParser
 
from paste.deploy.converters import asbool
 
from paste.gzipper import make_gzip_middleware
 

	
 
from pylons.middleware import ErrorHandler, StatusCodeRedirect
 
from pylons.wsgiapp import PylonsApp
 

	
 
from rhodecode.lib.middleware.simplehg import SimpleHg
 
from rhodecode.lib.middleware.simplegit import SimpleGit
 
from rhodecode.lib.middleware.https_fixup import HttpsFixup
 
from rhodecode.config.environment import load_environment
 

	
 

	
 
def make_app(global_conf, full_stack=True, static_files=True, **app_conf):
 
    """Create a Pylons WSGI application and return it
 

	
 
    ``global_conf``
 
        The inherited configuration for this application. Normally from
 
        the [DEFAULT] section of the Paste ini file.
 

	
 
    ``full_stack``
 
        Whether or not this application provides a full WSGI stack (by
 
        default, meaning it handles its own exceptions and errors).
 
        Disable full_stack when this application is "managed" by
 
        another WSGI middleware.
 

	
 
    ``app_conf``
 
        The application's local configuration. Normally specified in
 
        the [app:<name>] section of the Paste ini file (where <name>
 
        defaults to main).
 

	
 
    """
 
    # Configure the Pylons environment
 
    config = load_environment(global_conf, app_conf)
 

	
 
    # The Pylons WSGI app
 
    app = PylonsApp(config=config)
 

	
 
    # Routing/Session/Cache Middleware
 
    app = RoutesMiddleware(app, config['routes.map'])
 
    app = SessionMiddleware(app, config)
 

	
 
    # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares)
 
    if asbool(config['pdebug']):
 
        from rhodecode.lib.profiler import ProfilingMiddleware
 
        app = ProfilingMiddleware(app)
 

	
 
    app = SimpleHg(app, config)
 
    app = SimpleGit(app, config)
 

	
 
    if asbool(full_stack):
 
        # Handle Python exceptions
 
        app = ErrorHandler(app, global_conf, **config['pylons.errorware'])
 

	
 
        # Display error documents for 401, 403, 404 status codes (and
 
        # 500 when debug is disabled)
 
        if asbool(config['debug']):
 
            app = StatusCodeRedirect(app)
 
        else:
 
            app = StatusCodeRedirect(app, [400, 401, 403, 404, 500])
 

	
 
    #enable https redirets based on HTTP_X_URL_SCHEME set by proxy
 
    app = HttpsFixup(app, config)
 

	
 
    # Establish the Registry for this application
 
    app = RegistryManager(app)
 

	
 
    if asbool(static_files):
 
        # Serve static files
 
        static_app = StaticURLParser(config['pylons.paths']['static_files'])
 
        app = Cascade([static_app, app])
 
        app = make_gzip_middleware(app, global_conf, compress_level=1)
 

	
 
    # we want our low level middleware to get to the request ASAP. We don't
 
    # need any pylons stack middleware in them
 
    app = SimpleHg(app, config)
 
    app = SimpleGit(app, config)
 

	
 
    app.config = config
 

	
 
    return app
rhodecode/lib/middleware/simplegit.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    rhodecode.lib.middleware.simplegit
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    SimpleGit middleware for handling git protocol request (push/clone etc.)
 
    It's implemented with basic auth function
 

	
 
    :created_on: Apr 28, 2010
 
    :author: marcink
 
    :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
    :license: GPLv3, see COPYING for more details.
 
"""
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import os
 
import logging
 
import traceback
 

	
 
from dulwich import server as dulserver
 

	
 

	
 
class SimpleGitUploadPackHandler(dulserver.UploadPackHandler):
 

	
 
    def handle(self):
 
        write = lambda x: self.proto.write_sideband(1, x)
 

	
 
        graph_walker = dulserver.ProtocolGraphWalker(self,
 
                                                     self.repo.object_store,
 
                                                     self.repo.get_peeled)
 
        objects_iter = self.repo.fetch_objects(
 
          graph_walker.determine_wants, graph_walker, self.progress,
 
          get_tagged=self.get_tagged)
 

	
 
        # Do they want any objects?
 
        if len(objects_iter) == 0:
 
        if objects_iter is None or len(objects_iter) == 0:
 
            return
 

	
 
        self.progress("counting objects: %d, done.\n" % len(objects_iter))
 
        dulserver.write_pack_data(dulserver.ProtocolFile(None, write),
 
        dulserver.write_pack_objects(dulserver.ProtocolFile(None, write),
 
                                  objects_iter, len(objects_iter))
 
        messages = []
 
        messages.append('thank you for using rhodecode')
 

	
 
        for msg in messages:
 
            self.progress(msg + "\n")
 
        # we are done
 
        self.proto.write("0000")
 

	
 
dulserver.DEFAULT_HANDLERS = {
 
  'git-upload-pack': SimpleGitUploadPackHandler,
 
  'git-receive-pack': dulserver.ReceivePackHandler,
 
}
 

	
 
from dulwich.repo import Repo
 
from dulwich.web import HTTPGitApplication
 

	
 
from paste.auth.basic import AuthBasicAuthenticator
 
from paste.httpheaders import REMOTE_USER, AUTH_TYPE
 

	
 
from rhodecode.lib import safe_str
 
from rhodecode.lib.auth import authfunc, HasPermissionAnyMiddleware
 
from rhodecode.lib.utils import invalidate_cache, check_repo_fast
 
from rhodecode.model.user import UserModel
 

	
 
from webob.exc import HTTPNotFound, HTTPForbidden, HTTPInternalServerError
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def is_git(environ):
 
    """Returns True if request's target is git server.
 
    ``HTTP_USER_AGENT`` would then have git client version given.
 

	
 
    :param environ:
 
    """
 
    http_user_agent = environ.get('HTTP_USER_AGENT')
 
    if http_user_agent and http_user_agent.startswith('git'):
 
        return True
 
    return False
 

	
 

	
 
class SimpleGit(object):
 

	
 
    def __init__(self, application, config):
 
        self.application = application
 
        self.config = config
 
        #authenticate this git request using
 
        # base path of repo locations
 
        self.basepath = self.config['base_path']
 
        #authenticate this mercurial request using authfunc
 
        self.authenticate = AuthBasicAuthenticator('', authfunc)
 
        self.ipaddr = '0.0.0.0'
 
        self.repo_name = None
 
        self.username = None
 
        self.action = None
 

	
 
    def __call__(self, environ, start_response):
 
        if not is_git(environ):
 
            return self.application(environ, start_response)
 

	
 
        proxy_key = 'HTTP_X_REAL_IP'
 
        def_key = 'REMOTE_ADDR'
 
        self.ipaddr = environ.get(proxy_key, environ.get(def_key, '0.0.0.0'))
 
        ipaddr = environ.get(proxy_key, environ.get(def_key, '0.0.0.0'))
 
        username = None
 
        # skip passing error to error controller
 
        environ['pylons.status_code_redirect'] = True
 

	
 
        #======================================================================
 
        # EXTRACT REPOSITORY NAME FROM ENV
 
        #======================================================================
 
        try:
 
            repo_name = self.__get_repository(environ)
 
            log.debug('Extracted repo name is %s' % repo_name)
 
        except:
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
        #======================================================================
 
        # GET ACTION PULL or PUSH
 
        #======================================================================
 
        self.action = self.__get_action(environ)
 
        try:
 
            #==================================================================
 
            # GET REPOSITORY NAME
 
            #==================================================================
 
            self.repo_name = self.__get_repository(environ)
 
        except:
 
            return HTTPInternalServerError()(environ, start_response)
 
        action = self.__get_action(environ)
 

	
 
        #======================================================================
 
        # CHECK ANONYMOUS PERMISSION
 
        #======================================================================
 
        if self.action in ['pull', 'push']:
 
        if action in ['pull', 'push']:
 
            anonymous_user = self.__get_user('default')
 
            self.username = anonymous_user.username
 
            anonymous_perm = self.__check_permission(self.action,
 
            username = anonymous_user.username
 
            anonymous_perm = self.__check_permission(action,
 
                                                     anonymous_user,
 
                                                     self.repo_name)
 
                                                     repo_name)
 

	
 
            if anonymous_perm is not True or anonymous_user.active is False:
 
                if anonymous_perm is not True:
 
                    log.debug('Not enough credentials to access this '
 
                              'repository as anonymous user')
 
                if anonymous_user.active is False:
 
                    log.debug('Anonymous access is disabled, running '
 
                              'authentication')
 
                #==============================================================
 
                # DEFAULT PERM FAILED OR ANONYMOUS ACCESS IS DISABLED SO WE
 
                # NEED TO AUTHENTICATE AND ASK FOR AUTH USER PERMISSIONS
 
                #==============================================================
 

	
 
                if not REMOTE_USER(environ):
 
                    self.authenticate.realm = \
 
                        safe_str(self.config['rhodecode_realm'])
 
                    result = self.authenticate(environ)
 
                    if isinstance(result, str):
 
                        AUTH_TYPE.update(environ, 'basic')
 
                        REMOTE_USER.update(environ, result)
 
                    else:
 
                        return result.wsgi_application(environ, start_response)
 

	
 
                #==============================================================
 
                # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME FROM
 
                # BASIC AUTH
 
                #==============================================================
 

	
 
                if self.action in ['pull', 'push']:
 
                if action in ['pull', 'push']:
 
                    username = REMOTE_USER(environ)
 
                    try:
 
                        user = self.__get_user(username)
 
                        self.username = user.username
 
                        username = user.username
 
                    except:
 
                        log.error(traceback.format_exc())
 
                        return HTTPInternalServerError()(environ,
 
                                                         start_response)
 

	
 
                    #check permissions for this repository
 
                    perm = self.__check_permission(self.action, user,
 
                                                   self.repo_name)
 
                    perm = self.__check_permission(action, user,
 
                                                   repo_name)
 
                    if perm is not True:
 
                        return HTTPForbidden()(environ, start_response)
 

	
 
        self.extras = {'ip': self.ipaddr,
 
                       'username': self.username,
 
                       'action': self.action,
 
                       'repository': self.repo_name}
 
        extras = {'ip': ipaddr,
 
                  'username': username,
 
                  'action': action,
 
                  'repository': repo_name}
 

	
 
        #===================================================================
 
        # GIT REQUEST HANDLING
 
        #===================================================================
 
        self.basepath = self.config['base_path']
 
        self.repo_path = os.path.join(self.basepath, self.repo_name)
 

	
 
        repo_path = safe_str(os.path.join(self.basepath, repo_name))
 
        log.debug('Repository path is %s' % repo_path)
 

	
 
        #quick check if that dir exists...
 
        if check_repo_fast(self.repo_name, self.basepath):
 
        if check_repo_fast(repo_name, self.basepath):
 
            return HTTPNotFound()(environ, start_response)
 

	
 
        try:
 
            app = self.__make_app()
 
        except:
 
            #invalidate cache on push
 
            if action == 'push':
 
                self.__invalidate_cache(repo_name)
 

	
 
            app = self.__make_app(repo_name, repo_path)
 
            return app(environ, start_response)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
        #invalidate cache on push
 
        if self.action == 'push':
 
            self.__invalidate_cache(self.repo_name)
 
    def __make_app(self, repo_name, repo_path):
 
        """
 
        Make an wsgi application using dulserver
 

	
 
        return app(environ, start_response)
 
        :param repo_name: name of the repository
 
        :param repo_path: full path to the repository
 
        """
 

	
 
    def __make_app(self):
 
        _d = {'/' + self.repo_name: Repo(self.repo_path)}
 
        _d = {'/' + repo_name: Repo(repo_path)}
 
        backend = dulserver.DictBackend(_d)
 
        gitserve = HTTPGitApplication(backend)
 

	
 
        return gitserve
 

	
 
    def __check_permission(self, action, user, repo_name):
 
        """Checks permissions using action (push/pull) user and repository
 
        """
 
        Checks permissions using action (push/pull) user and repository
 
        name
 

	
 
        :param action: push or pull action
 
        :param user: user instance
 
        :param repo_name: repository name
 
        """
 
        if action == 'push':
 
            if not HasPermissionAnyMiddleware('repository.write',
 
                                              'repository.admin')(user,
 
                                                                  repo_name):
 
                return False
 

	
 
        else:
 
            #any other action need at least read permission
 
            if not HasPermissionAnyMiddleware('repository.read',
 
                                              'repository.write',
 
                                              'repository.admin')(user,
 
                                                                  repo_name):
 
                return False
 

	
 
        return True
 

	
 
    def __get_repository(self, environ):
 
        """Get's repository name out of PATH_INFO header
 
        """
 
        Get's repository name out of PATH_INFO header
 

	
 
        :param environ: environ where PATH_INFO is stored
 
        """
 
        try:
 
            repo_name = '/'.join(environ['PATH_INFO'].split('/')[1:])
 
            if repo_name.endswith('/'):
 
                repo_name = repo_name.rstrip('/')
 
        except:
 
            log.error(traceback.format_exc())
 
            raise
 
        repo_name = repo_name.split('/')[0]
 
        return repo_name
 

	
 
    def __get_user(self, username):
 
        return UserModel().get_by_username(username, cache=True)
 

	
 
    def __get_action(self, environ):
 
        """Maps git request commands into a pull or push command.
 

	
 
        :param environ:
 
        """
 
        service = environ['QUERY_STRING'].split('=')
 
        if len(service) > 1:
 
            service_cmd = service[1]
 
            mapping = {'git-receive-pack': 'push',
 
                       'git-upload-pack': 'pull',
 
                       }
 

	
 
            return mapping.get(service_cmd,
 
                               service_cmd if service_cmd else 'other')
 
        else:
 
            return 'other'
 

	
 
    def __invalidate_cache(self, repo_name):
 
        """we know that some change was made to repositories and we should
 
        invalidate the cache to see the changes right away but only for
 
        push requests"""
 
        invalidate_cache('get_repo_cached_%s' % repo_name)
 

	
0 comments (0 inline, 0 general)