Changeset - f4fed0b32103
[Not reviewed]
beta
0 2 0
Marcin Kuzminski - 14 years ago 2011-09-29 22:34:47
marcin@python-works.com
Rewrote git middleware with the same pattern as recent fix for #176
- additionally fixed dulwich server bug, that used old deprecated method. This fix caused huge improvement of speed for fetching git repos
- moved simplehg and simplegit middleware into begining of pylons callstack. This low level middleware
don't need to run any of the middlewares except each other.
2 files changed with 61 insertions and 46 deletions:
0 comments (0 inline, 0 general)
rhodecode/config/middleware.py
Show inline comments
 
@@ -6,77 +6,79 @@ from paste.cascade import Cascade
 
from paste.registry import RegistryManager
 
from paste.urlparser import StaticURLParser
 
from paste.deploy.converters import asbool
 
from paste.gzipper import make_gzip_middleware
 

	
 
from pylons.middleware import ErrorHandler, StatusCodeRedirect
 
from pylons.wsgiapp import PylonsApp
 

	
 
from rhodecode.lib.middleware.simplehg import SimpleHg
 
from rhodecode.lib.middleware.simplegit import SimpleGit
 
from rhodecode.lib.middleware.https_fixup import HttpsFixup
 
from rhodecode.config.environment import load_environment
 

	
 

	
 
def make_app(global_conf, full_stack=True, static_files=True, **app_conf):
 
    """Create a Pylons WSGI application and return it
 

	
 
    ``global_conf``
 
        The inherited configuration for this application. Normally from
 
        the [DEFAULT] section of the Paste ini file.
 

	
 
    ``full_stack``
 
        Whether or not this application provides a full WSGI stack (by
 
        default, meaning it handles its own exceptions and errors).
 
        Disable full_stack when this application is "managed" by
 
        another WSGI middleware.
 

	
 
    ``app_conf``
 
        The application's local configuration. Normally specified in
 
        the [app:<name>] section of the Paste ini file (where <name>
 
        defaults to main).
 

	
 
    """
 
    # Configure the Pylons environment
 
    config = load_environment(global_conf, app_conf)
 

	
 
    # The Pylons WSGI app
 
    app = PylonsApp(config=config)
 

	
 
    # Routing/Session/Cache Middleware
 
    app = RoutesMiddleware(app, config['routes.map'])
 
    app = SessionMiddleware(app, config)
 

	
 
    # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares)
 
    if asbool(config['pdebug']):
 
        from rhodecode.lib.profiler import ProfilingMiddleware
 
        app = ProfilingMiddleware(app)
 

	
 
    app = SimpleHg(app, config)
 
    app = SimpleGit(app, config)
 

	
 
    if asbool(full_stack):
 
        # Handle Python exceptions
 
        app = ErrorHandler(app, global_conf, **config['pylons.errorware'])
 

	
 
        # Display error documents for 401, 403, 404 status codes (and
 
        # 500 when debug is disabled)
 
        if asbool(config['debug']):
 
            app = StatusCodeRedirect(app)
 
        else:
 
            app = StatusCodeRedirect(app, [400, 401, 403, 404, 500])
 

	
 
    #enable https redirets based on HTTP_X_URL_SCHEME set by proxy
 
    app = HttpsFixup(app, config)
 

	
 
    # Establish the Registry for this application
 
    app = RegistryManager(app)
 

	
 
    if asbool(static_files):
 
        # Serve static files
 
        static_app = StaticURLParser(config['pylons.paths']['static_files'])
 
        app = Cascade([static_app, app])
 
        app = make_gzip_middleware(app, global_conf, compress_level=1)
 

	
 
    # we want our low level middleware to get to the request ASAP. We don't
 
    # need any pylons stack middleware in them
 
    app = SimpleHg(app, config)
 
    app = SimpleGit(app, config)
 

	
 
    app.config = config
 

	
 
    return app
rhodecode/lib/middleware/simplegit.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    rhodecode.lib.middleware.simplegit
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    SimpleGit middleware for handling git protocol request (push/clone etc.)
 
    It's implemented with basic auth function
 

	
 
    :created_on: Apr 28, 2010
 
    :author: marcink
 
    :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
    :license: GPLv3, see COPYING for more details.
 
"""
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import os
 
import logging
 
import traceback
 

	
 
from dulwich import server as dulserver
 

	
 

	
 
class SimpleGitUploadPackHandler(dulserver.UploadPackHandler):
 

	
 
    def handle(self):
 
        write = lambda x: self.proto.write_sideband(1, x)
 

	
 
        graph_walker = dulserver.ProtocolGraphWalker(self,
 
                                                     self.repo.object_store,
 
                                                     self.repo.get_peeled)
 
        objects_iter = self.repo.fetch_objects(
 
          graph_walker.determine_wants, graph_walker, self.progress,
 
          get_tagged=self.get_tagged)
 

	
 
        # Do they want any objects?
 
        if len(objects_iter) == 0:
 
        if objects_iter is None or len(objects_iter) == 0:
 
            return
 

	
 
        self.progress("counting objects: %d, done.\n" % len(objects_iter))
 
        dulserver.write_pack_data(dulserver.ProtocolFile(None, write),
 
        dulserver.write_pack_objects(dulserver.ProtocolFile(None, write),
 
                                  objects_iter, len(objects_iter))
 
        messages = []
 
        messages.append('thank you for using rhodecode')
 

	
 
        for msg in messages:
 
            self.progress(msg + "\n")
 
        # we are done
 
        self.proto.write("0000")
 

	
 
dulserver.DEFAULT_HANDLERS = {
 
  'git-upload-pack': SimpleGitUploadPackHandler,
 
  'git-receive-pack': dulserver.ReceivePackHandler,
 
}
 

	
 
from dulwich.repo import Repo
 
from dulwich.web import HTTPGitApplication
 

	
 
from paste.auth.basic import AuthBasicAuthenticator
 
from paste.httpheaders import REMOTE_USER, AUTH_TYPE
 

	
 
from rhodecode.lib import safe_str
 
from rhodecode.lib.auth import authfunc, HasPermissionAnyMiddleware
 
from rhodecode.lib.utils import invalidate_cache, check_repo_fast
 
from rhodecode.model.user import UserModel
 

	
 
from webob.exc import HTTPNotFound, HTTPForbidden, HTTPInternalServerError
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def is_git(environ):
 
    """Returns True if request's target is git server.
 
    ``HTTP_USER_AGENT`` would then have git client version given.
 

	
 
    :param environ:
 
    """
 
    http_user_agent = environ.get('HTTP_USER_AGENT')
 
    if http_user_agent and http_user_agent.startswith('git'):
 
        return True
 
    return False
 

	
 

	
 
class SimpleGit(object):
 

	
 
    def __init__(self, application, config):
 
        self.application = application
 
        self.config = config
 
        #authenticate this git request using
 
        # base path of repo locations
 
        self.basepath = self.config['base_path']
 
        #authenticate this mercurial request using authfunc
 
        self.authenticate = AuthBasicAuthenticator('', authfunc)
 
        self.ipaddr = '0.0.0.0'
 
        self.repo_name = None
 
        self.username = None
 
        self.action = None
 

	
 
    def __call__(self, environ, start_response):
 
        if not is_git(environ):
 
            return self.application(environ, start_response)
 

	
 
        proxy_key = 'HTTP_X_REAL_IP'
 
        def_key = 'REMOTE_ADDR'
 
        self.ipaddr = environ.get(proxy_key, environ.get(def_key, '0.0.0.0'))
 
        ipaddr = environ.get(proxy_key, environ.get(def_key, '0.0.0.0'))
 
        username = None
 
        # skip passing error to error controller
 
        environ['pylons.status_code_redirect'] = True
 

	
 
        #======================================================================
 
        # EXTRACT REPOSITORY NAME FROM ENV
 
        #======================================================================
 
        try:
 
            repo_name = self.__get_repository(environ)
 
            log.debug('Extracted repo name is %s' % repo_name)
 
        except:
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
        #======================================================================
 
        # GET ACTION PULL or PUSH
 
        #======================================================================
 
        self.action = self.__get_action(environ)
 
        try:
 
            #==================================================================
 
            # GET REPOSITORY NAME
 
            #==================================================================
 
            self.repo_name = self.__get_repository(environ)
 
        except:
 
            return HTTPInternalServerError()(environ, start_response)
 
        action = self.__get_action(environ)
 

	
 
        #======================================================================
 
        # CHECK ANONYMOUS PERMISSION
 
        #======================================================================
 
        if self.action in ['pull', 'push']:
 
        if action in ['pull', 'push']:
 
            anonymous_user = self.__get_user('default')
 
            self.username = anonymous_user.username
 
            anonymous_perm = self.__check_permission(self.action,
 
            username = anonymous_user.username
 
            anonymous_perm = self.__check_permission(action,
 
                                                     anonymous_user,
 
                                                     self.repo_name)
 
                                                     repo_name)
 

	
 
            if anonymous_perm is not True or anonymous_user.active is False:
 
                if anonymous_perm is not True:
 
                    log.debug('Not enough credentials to access this '
 
                              'repository as anonymous user')
 
                if anonymous_user.active is False:
 
                    log.debug('Anonymous access is disabled, running '
 
                              'authentication')
 
                #==============================================================
 
                # DEFAULT PERM FAILED OR ANONYMOUS ACCESS IS DISABLED SO WE
 
                # NEED TO AUTHENTICATE AND ASK FOR AUTH USER PERMISSIONS
 
                #==============================================================
 

	
 
                if not REMOTE_USER(environ):
 
                    self.authenticate.realm = \
 
                        safe_str(self.config['rhodecode_realm'])
 
                    result = self.authenticate(environ)
 
                    if isinstance(result, str):
 
                        AUTH_TYPE.update(environ, 'basic')
 
                        REMOTE_USER.update(environ, result)
 
                    else:
 
                        return result.wsgi_application(environ, start_response)
 

	
 
                #==============================================================
 
                # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME FROM
 
                # BASIC AUTH
 
                #==============================================================
 

	
 
                if self.action in ['pull', 'push']:
 
                if action in ['pull', 'push']:
 
                    username = REMOTE_USER(environ)
 
                    try:
 
                        user = self.__get_user(username)
 
                        self.username = user.username
 
                        username = user.username
 
                    except:
 
                        log.error(traceback.format_exc())
 
                        return HTTPInternalServerError()(environ,
 
                                                         start_response)
 

	
 
                    #check permissions for this repository
 
                    perm = self.__check_permission(self.action, user,
 
                                                   self.repo_name)
 
                    perm = self.__check_permission(action, user,
 
                                                   repo_name)
 
                    if perm is not True:
 
                        return HTTPForbidden()(environ, start_response)
 

	
 
        self.extras = {'ip': self.ipaddr,
 
                       'username': self.username,
 
                       'action': self.action,
 
                       'repository': self.repo_name}
 
        extras = {'ip': ipaddr,
 
                  'username': username,
 
                  'action': action,
 
                  'repository': repo_name}
 

	
 
        #===================================================================
 
        # GIT REQUEST HANDLING
 
        #===================================================================
 
        self.basepath = self.config['base_path']
 
        self.repo_path = os.path.join(self.basepath, self.repo_name)
 
        #quick check if that dir exists...
 
        if check_repo_fast(self.repo_name, self.basepath):
 

	
 
        repo_path = safe_str(os.path.join(self.basepath, repo_name))
 
        log.debug('Repository path is %s' % repo_path)
 

	
 
        # quick check if that dir exists...
 
        if check_repo_fast(repo_name, self.basepath):
 
            return HTTPNotFound()(environ, start_response)
 

	
 
        try:
 
            app = self.__make_app()
 
        except:
 
            #invalidate cache on push
 
            if action == 'push':
 
                self.__invalidate_cache(repo_name)
 

	
 
            app = self.__make_app(repo_name, repo_path)
 
            return app(environ, start_response)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            return HTTPInternalServerError()(environ, start_response)
 

	
 
        #invalidate cache on push
 
        if self.action == 'push':
 
            self.__invalidate_cache(self.repo_name)
 
    def __make_app(self, repo_name, repo_path):
 
        """
 
        Make an wsgi application using dulserver
 
        
 
        :param repo_name: name of the repository
 
        :param repo_path: full path to the repository
 
        """
 

	
 
        return app(environ, start_response)
 

	
 
    def __make_app(self):
 
        _d = {'/' + self.repo_name: Repo(self.repo_path)}
 
        _d = {'/' + repo_name: Repo(repo_path)}
 
        backend = dulserver.DictBackend(_d)
 
        gitserve = HTTPGitApplication(backend)
 

	
 
        return gitserve
 

	
 
    def __check_permission(self, action, user, repo_name):
 
        """Checks permissions using action (push/pull) user and repository
 
        """
 
        Checks permissions using action (push/pull) user and repository
 
        name
 

	
 
        :param action: push or pull action
 
        :param user: user instance
 
        :param repo_name: repository name
 
        """
 
        if action == 'push':
 
            if not HasPermissionAnyMiddleware('repository.write',
 
                                              'repository.admin')(user,
 
                                                                  repo_name):
 
                return False
 

	
 
        else:
 
            #any other action need at least read permission
 
            if not HasPermissionAnyMiddleware('repository.read',
 
                                              'repository.write',
 
                                              'repository.admin')(user,
 
                                                                  repo_name):
 
                return False
 

	
 
        return True
 

	
 
    def __get_repository(self, environ):
 
        """Get's repository name out of PATH_INFO header
 
        """
 
        Get's repository name out of PATH_INFO header
 

	
 
        :param environ: environ where PATH_INFO is stored
 
        """
 
        try:
 
            repo_name = '/'.join(environ['PATH_INFO'].split('/')[1:])
 
            if repo_name.endswith('/'):
 
                repo_name = repo_name.rstrip('/')
 
        except:
 
            log.error(traceback.format_exc())
 
            raise
 
        repo_name = repo_name.split('/')[0]
 
        return repo_name
 

	
 
    def __get_user(self, username):
 
        return UserModel().get_by_username(username, cache=True)
 

	
 
    def __get_action(self, environ):
 
        """Maps git request commands into a pull or push command.
 

	
 
        :param environ:
 
        """
 
        service = environ['QUERY_STRING'].split('=')
 
        if len(service) > 1:
 
            service_cmd = service[1]
 
            mapping = {'git-receive-pack': 'push',
 
                       'git-upload-pack': 'pull',
 
                       }
 

	
 
            return mapping.get(service_cmd,
 
                               service_cmd if service_cmd else 'other')
 
        else:
 
            return 'other'
 

	
 
    def __invalidate_cache(self, repo_name):
 
        """we know that some change was made to repositories and we should
 
        invalidate the cache to see the changes right away but only for
 
        push requests"""
 
        invalidate_cache('get_repo_cached_%s' % repo_name)
 

	
0 comments (0 inline, 0 general)