Changeset - 6e76b4892d72
[Not reviewed]
beta
0 5 0
Marcin Kuzminski - 13 years ago 2012-11-15 00:57:52
marcin@python-works.com
Implemented #647, option to pass list of default encoding used to encode to/decode from unicode
5 files changed with 59 insertions and 22 deletions:
0 comments (0 inline, 0 general)
development.ini
Show inline comments
 
@@ -55,48 +55,50 @@ full_stack = true
 
static_files = true
 
# Optional Languages
 
# en, fr, ja, pt_BR, zh_CN, zh_TW
 
lang = en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
app_instance_uuid = rc-develop
 
cut_off_limit = 256000
 
force_https = false
 
commit_parse_limit = 25
 
use_gravatar = true
 

	
 
## alternative_gravatar_url allows you to use your own avatar server application
 
## the following parts of the URL will be replaced
 
## {email}        user email
 
## {md5email}     md5 hash of the user email (like at gravatar.com)
 
## {size}         size of the image that is expected from the server application
 
## {scheme}       http/https from RhodeCode server
 
## {netloc}       network location from RhodeCode server
 
#alternative_gravatar_url = http://myavatarserver.com/getbyemail/{email}/{size}
 
#alternative_gravatar_url = http://myavatarserver.com/getbymd5/{md5email}?s={size}
 

	
 
container_auth_enabled = false
 
proxypass_auth_enabled = false
 
## default encoding used to convert from and to unicode
 
## can be also a comma seperated list of encoding in case of mixed encodings
 
default_encoding = utf8
 

	
 
## overwrite schema of clone url
 
## available vars:
 
## scheme - http/https
 
## user - current user
 
## pass - password 
 
## netloc - network location
 
## path - usually repo_name
 

	
 
#clone_uri = {scheme}://{user}{pass}{netloc}{path}
 

	
 
## issue tracking mapping for commits messages
 
## comment out issue_pat, issue_server, issue_prefix to enable
 

	
 
## pattern to get the issues from commit messages
 
## default one used here is #<numbers> with a regex passive group for `#`
 
## {id} will be all groups matched from this pattern
 

	
 
issue_pat = (?:\s*#)(\d+)
 

	
 
## server url to the issue, each {id} will be replaced with match
 
## fetched from the regex and {repo} is replaced with full repository name
 
## including groups {repo_name} is replaced with just name of repo
production.ini
Show inline comments
 
@@ -55,48 +55,50 @@ full_stack = true
 
static_files = true
 
# Optional Languages
 
# en, fr, ja, pt_BR, zh_CN, zh_TW
 
lang = en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
app_instance_uuid = rc-production
 
cut_off_limit = 256000
 
force_https = false
 
commit_parse_limit = 50
 
use_gravatar = true
 

	
 
## alternative_gravatar_url allows you to use your own avatar server application
 
## the following parts of the URL will be replaced
 
## {email}        user email
 
## {md5email}     md5 hash of the user email (like at gravatar.com)
 
## {size}         size of the image that is expected from the server application
 
## {scheme}       http/https from RhodeCode server
 
## {netloc}       network location from RhodeCode server
 
#alternative_gravatar_url = http://myavatarserver.com/getbyemail/{email}/{size}
 
#alternative_gravatar_url = http://myavatarserver.com/getbymd5/{md5email}?s={size}
 

	
 
container_auth_enabled = false
 
proxypass_auth_enabled = false
 
## default encoding used to convert from and to unicode
 
## can be also a comma seperated list of encoding in case of mixed encodings
 
default_encoding = utf8
 

	
 
## overwrite schema of clone url
 
## available vars:
 
## scheme - http/https
 
## user - current user
 
## pass - password 
 
## netloc - network location
 
## path - usually repo_name
 

	
 
#clone_uri = {scheme}://{user}{pass}{netloc}{path}
 

	
 
## issue tracking mapping for commits messages
 
## comment out issue_pat, issue_server, issue_prefix to enable
 

	
 
## pattern to get the issues from commit messages
 
## default one used here is #<numbers> with a regex passive group for `#`
 
## {id} will be all groups matched from this pattern
 

	
 
issue_pat = (?:\s*#)(\d+)
 

	
 
## server url to the issue, each {id} will be replaced with match
 
## fetched from the regex and {repo} is replaced with full repository name
 
## including groups {repo_name} is replaced with just name of repo
rhodecode/config/deployment.ini_tmpl
Show inline comments
 
@@ -55,48 +55,50 @@ full_stack = true
 
static_files = true
 
# Optional Languages
 
# en, fr, ja, pt_BR, zh_CN, zh_TW
 
lang = en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
app_instance_uuid = ${app_instance_uuid}
 
cut_off_limit = 256000
 
force_https = false
 
commit_parse_limit = 50
 
use_gravatar = true
 

	
 
## alternative_gravatar_url allows you to use your own avatar server application
 
## the following parts of the URL will be replaced
 
## {email}        user email
 
## {md5email}     md5 hash of the user email (like at gravatar.com)
 
## {size}         size of the image that is expected from the server application
 
## {scheme}       http/https from RhodeCode server
 
## {netloc}       network location from RhodeCode server
 
#alternative_gravatar_url = http://myavatarserver.com/getbyemail/{email}/{size}
 
#alternative_gravatar_url = http://myavatarserver.com/getbymd5/{md5email}?s={size}
 

	
 
container_auth_enabled = false
 
proxypass_auth_enabled = false
 
## default encoding used to convert from and to unicode
 
## can be also a comma seperated list of encoding in case of mixed encodings
 
default_encoding = utf8
 

	
 
## overwrite schema of clone url
 
## available vars:
 
## scheme - http/https
 
## user - current user
 
## pass - password 
 
## netloc - network location
 
## path - usually repo_name
 

	
 
#clone_uri = {scheme}://{user}{pass}{netloc}{path}
 

	
 
## issue tracking mapping for commits messages
 
## comment out issue_pat, issue_server, issue_prefix to enable
 

	
 
## pattern to get the issues from commit messages
 
## default one used here is #<numbers> with a regex passive group for `#`
 
## {id} will be all groups matched from this pattern
 

	
 
issue_pat = (?:\s*#)(\d+)
 

	
 
## server url to the issue, each {id} will be replaced with match
 
## fetched from the regex and {repo} is replaced with full repository name
 
## including groups {repo_name} is replaced with just name of repo
rhodecode/lib/utils2.py
Show inline comments
 
@@ -45,65 +45,87 @@ def __get_lem():
 
    def __clean(s):
 
        s = s.lstrip('*')
 
        s = s.lstrip('.')
 

	
 
        if s.find('[') != -1:
 
            exts = []
 
            start, stop = s.find('['), s.find(']')
 

	
 
            for suffix in s[start + 1:stop]:
 
                exts.append(s[:s.find('[')] + suffix)
 
            return map(lower, exts)
 
        else:
 
            return map(lower, [s])
 

	
 
    for lx, t in sorted(lexers.LEXERS.items()):
 
        m = map(__clean, t[-2])
 
        if m:
 
            m = reduce(lambda x, y: x + y, m)
 
            for ext in m:
 
                desc = lx.replace('Lexer', '')
 
                d[ext].append(desc)
 

	
 
    return dict(d)
 

	
 

	
 
def str2bool(_str):
 
    """
 
    returs True/False value from given string, it tries to translate the
 
    string into boolean
 

	
 
    :param _str: string value to translate into boolean
 
    :rtype: boolean
 
    :returns: boolean from given string
 
    """
 
    if _str is None:
 
        return False
 
    if _str in (True, False):
 
        return _str
 
    _str = str(_str).strip().lower()
 
    return _str in ('t', 'true', 'y', 'yes', 'on', '1')
 

	
 

	
 
def aslist(obj, sep=None, strip=True):
 
    """
 
    Returns given string separated by sep as list
 

	
 
    :param obj:
 
    :param sep:
 
    :param strip:
 
    """
 
    if isinstance(obj, (basestring)):
 
        lst = obj.split(sep)
 
        if strip:
 
            lst = [v.strip() for v in lst]
 
        return lst
 
    elif isinstance(obj, (list, tuple)):
 
        return obj
 
    elif obj is None:
 
        return []
 
    else:
 
        return [obj]
 

	
 

	
 
def convert_line_endings(line, mode):
 
    """
 
    Converts a given line  "line end" accordingly to given mode
 

	
 
    Available modes are::
 
        0 - Unix
 
        1 - Mac
 
        2 - DOS
 

	
 
    :param line: given line to convert
 
    :param mode: mode to convert to
 
    :rtype: str
 
    :return: converted line according to mode
 
    """
 
    from string import replace
 

	
 
    if mode == 0:
 
            line = replace(line, '\r\n', '\n')
 
            line = replace(line, '\r', '\n')
 
    elif mode == 1:
 
            line = replace(line, '\r\n', '\r')
 
            line = replace(line, '\n', '\r')
 
    elif mode == 2:
 
            line = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", line)
 
@@ -161,109 +183,119 @@ def safe_int(val, default=None):
 
    try:
 
        val = int(val)
 
    except ValueError:
 
        val = default
 

	
 
    return val
 

	
 

	
 
def safe_unicode(str_, from_encoding=None):
 
    """
 
    safe unicode function. Does few trick to turn str_ into unicode
 

	
 
    In case of UnicodeDecode error we try to return it with encoding detected
 
    by chardet library if it fails fallback to unicode with errors replaced
 

	
 
    :param str_: string to decode
 
    :rtype: unicode
 
    :returns: unicode object
 
    """
 
    if isinstance(str_, unicode):
 
        return str_
 

	
 
    if not from_encoding:
 
        import rhodecode
 
        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
 
        from_encoding = DEFAULT_ENCODING
 
        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
 
                                                        'utf8'), sep=',')
 
        from_encoding = DEFAULT_ENCODINGS
 

	
 
    if not isinstance(from_encoding, (list, tuple)):
 
        from_encoding = [from_encoding]
 

	
 
    try:
 
        return unicode(str_)
 
    except UnicodeDecodeError:
 
        pass
 

	
 
    try:
 
        return unicode(str_, from_encoding)
 
    except UnicodeDecodeError:
 
        pass
 
    for enc in from_encoding:
 
        try:
 
            return unicode(str_, enc)
 
        except UnicodeDecodeError:
 
            pass
 

	
 
    try:
 
        import chardet
 
        encoding = chardet.detect(str_)['encoding']
 
        if encoding is None:
 
            raise Exception()
 
        return str_.decode(encoding)
 
    except (ImportError, UnicodeDecodeError, Exception):
 
        return unicode(str_, from_encoding, 'replace')
 
        return unicode(str_, from_encoding[0], 'replace')
 

	
 

	
 
def safe_str(unicode_, to_encoding=None):
 
    """
 
    safe str function. Does few trick to turn unicode_ into string
 

	
 
    In case of UnicodeEncodeError we try to return it with encoding detected
 
    by chardet library if it fails fallback to string with errors replaced
 

	
 
    :param unicode_: unicode to encode
 
    :rtype: str
 
    :returns: str object
 
    """
 

	
 
    # if it's not basestr cast to str
 
    if not isinstance(unicode_, basestring):
 
        return str(unicode_)
 

	
 
    if isinstance(unicode_, str):
 
        return unicode_
 

	
 
    if not to_encoding:
 
        import rhodecode
 
        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
 
        to_encoding = DEFAULT_ENCODING
 
        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
 
                                                        'utf8'), sep=',')
 
        to_encoding = DEFAULT_ENCODINGS
 

	
 
    try:
 
        return unicode_.encode(to_encoding)
 
    except UnicodeEncodeError:
 
        pass
 
    if not isinstance(to_encoding, (list, tuple)):
 
        to_encoding = [to_encoding]
 

	
 
    for enc in to_encoding:
 
        try:
 
            return unicode_.encode(enc)
 
        except UnicodeEncodeError:
 
            pass
 

	
 
    try:
 
        import chardet
 
        encoding = chardet.detect(unicode_)['encoding']
 
        if encoding is None:
 
            raise UnicodeEncodeError()
 

	
 
        return unicode_.encode(encoding)
 
    except (ImportError, UnicodeEncodeError):
 
        return unicode_.encode(to_encoding, 'replace')
 
        return unicode_.encode(to_encoding[0], 'replace')
 

	
 
    return safe_str
 

	
 

	
 
def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
 
    """
 
    Custom engine_from_config functions that makes sure we use NullPool for
 
    file based sqlite databases. This prevents errors on sqlite. This only
 
    applies to sqlalchemy versions < 0.7.0
 

	
 
    """
 
    import sqlalchemy
 
    from sqlalchemy import engine_from_config as efc
 
    import logging
 

	
 
    if int(sqlalchemy.__version__.split('.')[1]) < 7:
 

	
 
        # This solution should work for sqlalchemy < 0.7.0, and should use
 
        # proxy=TimerProxy() for execution time profiling
 

	
 
        from sqlalchemy.pool import NullPool
 
        url = configuration[prefix + 'url']
 

	
 
        if url.startswith('sqlite'):
rhodecode/lib/vcs/utils/__init__.py
Show inline comments
 
@@ -17,92 +17,91 @@ def makedate():
 

	
 

	
 
def date_fromtimestamp(unixts, tzoffset=0):
 
    """
 
    Makes a local datetime object out of unix timestamp
 

	
 
    :param unixts:
 
    :param tzoffset:
 
    """
 

	
 
    return datetime.datetime.fromtimestamp(float(unixts))
 

	
 

	
 
def safe_unicode(str_, from_encoding=None):
 
    """
 
    safe unicode function. Does few trick to turn str_ into unicode
 

	
 
    In case of UnicodeDecode error we try to return it with encoding detected
 
    by chardet library if it fails fallback to unicode with errors replaced
 

	
 
    :param str_: string to decode
 
    :rtype: unicode
 
    :returns: unicode object
 
    """
 
    from rhodecode.lib.utils2 import safe_unicode
 
    return safe_unicode(str_, from_encoding)
 

	
 
    if isinstance(str_, unicode):
 
        return str_
 
    if not from_encoding:
 
        import rhodecode
 
        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
 
        from_encoding = DEFAULT_ENCODING
 

	
 
    try:
 
        return unicode(str_)
 
    except UnicodeDecodeError:
 
        pass
 

	
 
    try:
 
        return unicode(str_, from_encoding)
 
    except UnicodeDecodeError:
 
        pass
 

	
 
    try:
 
        import chardet
 
        encoding = chardet.detect(str_)['encoding']
 
        if encoding is None:
 
            raise Exception()
 
        return str_.decode(encoding)
 
    except (ImportError, UnicodeDecodeError, Exception):
 
        return unicode(str_, from_encoding, 'replace')
 

	
 

	
 
def safe_str(unicode_, to_encoding=None):
 
    """
 
    safe str function. Does few trick to turn unicode_ into string
 

	
 
    In case of UnicodeEncodeError we try to return it with encoding detected
 
    by chardet library if it fails fallback to string with errors replaced
 

	
 
    :param unicode_: unicode to encode
 
    :rtype: str
 
    :returns: str object
 
    """
 
    from rhodecode.lib.utils2 import safe_str
 
    return safe_str(unicode_, to_encoding)
 

	
 
    if isinstance(unicode_, str):
 
        return unicode_
 
    if not to_encoding:
 
        import rhodecode
 
        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
 
        to_encoding = DEFAULT_ENCODING
 

	
 
    try:
 
        return unicode_.encode(to_encoding)
 
    except UnicodeEncodeError:
 
        pass
 

	
 
    try:
 
        import chardet
 
        encoding = chardet.detect(unicode_)['encoding']
 
        if encoding is None:
 
            raise UnicodeEncodeError()
 

	
 
        return unicode_.encode(encoding)
 
    except (ImportError, UnicodeEncodeError):
 
        return unicode_.encode(to_encoding, 'replace')
 

	
 
    return safe_str
 

	
 

	
 
def author_email(author):
 
    """
 
    returns email address of given author.
 
    If any of <,> sign are found, it fallbacks to regex findall()
 
    and returns first found result or empty string
 

	
0 comments (0 inline, 0 general)