Changeset - 6c381371d106
[Not reviewed]
default
0 3 0
Mads Kiilerich - 6 years ago 2019-12-23 00:56:45
mads@kiilerich.com
Grafted from: 989d0d9d0607
py3: fix non-ASCII URLs - decode unicode correctly before passing them to controllers as unicode strings

This is needed for supporting localized repo path names in the path of URLs.

Some references:

https://www.python.org/dev/peps/pep-0333/#unicode-issues
https://bugs.python.org/issue16679
http://lucumr.pocoo.org/2010/5/25/wsgi-on-python-3/
https://bugs.launchpad.net/pecan/+bug/1451842
https://github.com/tipabu/eventlet/commit/a5a7751b013fe99b6d30acbca79e819770e9ae5d
3 files changed with 30 insertions and 5 deletions:
0 comments (0 inline, 0 general)
kallithea/config/routing.py
Show inline comments
 
@@ -16,20 +16,40 @@ Routes configuration
 

	
 
The more specific and detailed routes should be defined first so they
 
may take precedent over the more generic routes. For more information
 
refer to the routes manual at http://routes.groovie.org/docs/
 
"""
 

	
 
from routes import Mapper
 
import routes
 
from tg import request
 

	
 
from kallithea.lib.utils2 import safe_str
 

	
 

	
 
# prefix for non repository related links needs to be prefixed with `/`
 
ADMIN_PREFIX = '/_admin'
 

	
 

	
 
class Mapper(routes.Mapper):
 
    """
 
    Subclassed Mapper with routematch patched to decode "unicode" str url to
 
    *real* unicode str before applying matches and invoking controller methods.
 
    """
 

	
 
    def routematch(self, url=None, environ=None):
 
        """
 
        routematch that also decode url from "fake bytes" to real unicode
 
        string before matching and invoking controllers.
 
        """
 
        # Process url like get_path_info does ... but PATH_INFO has already
 
        # been retrieved from environ and is passed, so - let's just use that
 
        # instead.
 
        url = safe_str(url.encode('latin1'))
 
        return super().routematch(url=url, environ=environ)
 

	
 

	
 
def make_map(config):
 
    """Create, configure and return the routes Mapper"""
 
    rmap = Mapper(directory=config['paths']['controllers'],
 
                  always_scan=config['debug'])
 
    rmap.minimization = False
 
    rmap.explicit = False
kallithea/lib/base.py
Show inline comments
 
@@ -94,18 +94,23 @@ def _get_ip_addr(environ):
 

	
 
    ip = environ.get(def_key, '0.0.0.0')
 
    return _filter_proxy(ip)
 

	
 

	
 
def get_path_info(environ):
 
    """Return unicode PATH_INFO from environ ... using tg.original_request if available.
 
    """Return PATH_INFO from environ ... using tg.original_request if available.
 

	
 
    In Python 3 WSGI, PATH_INFO is a unicode str, but kind of contains encoded
 
    bytes. The code points are guaranteed to only use the lower 8 bit bits, and
 
    encoding the string with the 1:1 encoding latin1 will give the
 
    corresponding byte string ... which then can be decoded to proper unicode.
 
    """
 
    org_req = environ.get('tg.original_request')
 
    if org_req is not None:
 
        environ = org_req.environ
 
    return safe_str(environ['PATH_INFO'])
 
    return safe_str(environ['PATH_INFO'].encode('latin1'))
 

	
 

	
 
def log_in_user(user, remember, is_external_auth, ip_addr):
 
    """
 
    Log a `User` in and update session and cookies. If `remember` is True,
 
    the session cookie is set to expire in a year; otherwise, it expires at
kallithea/lib/middleware/permanent_repo_url.py
Show inline comments
 
@@ -30,12 +30,12 @@ class PermanentRepoUrl(object):
 
        self.application = app
 
        self.config = config
 

	
 
    def __call__(self, environ, start_response):
 
        # Extract path_info as get_path_info does, but do it explicitly because
 
        # we also have to do the reverse operation when patching it back in
 
        path_info = safe_str(environ['PATH_INFO'])
 
        path_info = safe_str(environ['PATH_INFO'].encode('latin1'))
 
        if path_info.startswith('/'): # it must
 
            path_info = '/' + fix_repo_id_name(path_info[1:])
 
            environ['PATH_INFO'] = safe_bytes(path_info)
 
            environ['PATH_INFO'] = safe_bytes(path_info).decode('latin1')
 

	
 
        return self.application(environ, start_response)
0 comments (0 inline, 0 general)