Changeset - 6c381371d106
[Not reviewed]
default
0 3 0
Mads Kiilerich - 6 years ago 2019-12-23 00:56:45
mads@kiilerich.com
Grafted from: 989d0d9d0607
py3: fix non-ASCII URLs - decode unicode correctly before passing them to controllers as unicode strings

This is needed for supporting localized repo path names in the path of URLs.

Some references:

https://www.python.org/dev/peps/pep-0333/#unicode-issues
https://bugs.python.org/issue16679
http://lucumr.pocoo.org/2010/5/25/wsgi-on-python-3/
https://bugs.launchpad.net/pecan/+bug/1451842
https://github.com/tipabu/eventlet/commit/a5a7751b013fe99b6d30acbca79e819770e9ae5d
3 files changed with 30 insertions and 5 deletions:
0 comments (0 inline, 0 general)
kallithea/config/routing.py
Show inline comments
 
@@ -19,14 +19,34 @@ may take precedent over the more generic
 
refer to the routes manual at http://routes.groovie.org/docs/
 
"""
 

	
 
from routes import Mapper
 
import routes
 
from tg import request
 

	
 
from kallithea.lib.utils2 import safe_str
 

	
 

	
 
# prefix for non repository related links needs to be prefixed with `/`
 
ADMIN_PREFIX = '/_admin'
 

	
 

	
 
class Mapper(routes.Mapper):
 
    """
 
    Subclassed Mapper with routematch patched to decode "unicode" str url to
 
    *real* unicode str before applying matches and invoking controller methods.
 
    """
 

	
 
    def routematch(self, url=None, environ=None):
 
        """
 
        routematch that also decode url from "fake bytes" to real unicode
 
        string before matching and invoking controllers.
 
        """
 
        # Process url like get_path_info does ... but PATH_INFO has already
 
        # been retrieved from environ and is passed, so - let's just use that
 
        # instead.
 
        url = safe_str(url.encode('latin1'))
 
        return super().routematch(url=url, environ=environ)
 

	
 

	
 
def make_map(config):
 
    """Create, configure and return the routes Mapper"""
 
    rmap = Mapper(directory=config['paths']['controllers'],
kallithea/lib/base.py
Show inline comments
 
@@ -97,12 +97,17 @@ def _get_ip_addr(environ):
 

	
 

	
 
def get_path_info(environ):
 
    """Return unicode PATH_INFO from environ ... using tg.original_request if available.
 
    """Return PATH_INFO from environ ... using tg.original_request if available.
 

	
 
    In Python 3 WSGI, PATH_INFO is a unicode str, but kind of contains encoded
 
    bytes. The code points are guaranteed to only use the lower 8 bit bits, and
 
    encoding the string with the 1:1 encoding latin1 will give the
 
    corresponding byte string ... which then can be decoded to proper unicode.
 
    """
 
    org_req = environ.get('tg.original_request')
 
    if org_req is not None:
 
        environ = org_req.environ
 
    return safe_str(environ['PATH_INFO'])
 
    return safe_str(environ['PATH_INFO'].encode('latin1'))
 

	
 

	
 
def log_in_user(user, remember, is_external_auth, ip_addr):
kallithea/lib/middleware/permanent_repo_url.py
Show inline comments
 
@@ -33,9 +33,9 @@ class PermanentRepoUrl(object):
 
    def __call__(self, environ, start_response):
 
        # Extract path_info as get_path_info does, but do it explicitly because
 
        # we also have to do the reverse operation when patching it back in
 
        path_info = safe_str(environ['PATH_INFO'])
 
        path_info = safe_str(environ['PATH_INFO'].encode('latin1'))
 
        if path_info.startswith('/'): # it must
 
            path_info = '/' + fix_repo_id_name(path_info[1:])
 
            environ['PATH_INFO'] = safe_bytes(path_info)
 
            environ['PATH_INFO'] = safe_bytes(path_info).decode('latin1')
 

	
 
        return self.application(environ, start_response)
0 comments (0 inline, 0 general)