Files
@ 6e76b4892d72
Branch filter:
Location: kallithea/rhodecode/lib/vcs/utils/__init__.py
6e76b4892d72
3.6 KiB
text/x-python
Implemented #647, option to pass list of default encoding used to encode to/decode from unicode
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | """
This module provides some useful tools for ``vcs`` like annotate/diff html
output. It also includes some internal helpers.
"""
import sys
import time
import datetime
def makedate():
lt = time.localtime()
if lt[8] == 1 and time.daylight:
tz = time.altzone
else:
tz = time.timezone
return time.mktime(lt), tz
def date_fromtimestamp(unixts, tzoffset=0):
"""
Makes a local datetime object out of unix timestamp
:param unixts:
:param tzoffset:
"""
return datetime.datetime.fromtimestamp(float(unixts))
def safe_unicode(str_, from_encoding=None):
"""
safe unicode function. Does few trick to turn str_ into unicode
In case of UnicodeDecode error we try to return it with encoding detected
by chardet library if it fails fallback to unicode with errors replaced
:param str_: string to decode
:rtype: unicode
:returns: unicode object
"""
from rhodecode.lib.utils2 import safe_unicode
return safe_unicode(str_, from_encoding)
if isinstance(str_, unicode):
return str_
try:
return unicode(str_)
except UnicodeDecodeError:
pass
try:
return unicode(str_, from_encoding)
except UnicodeDecodeError:
pass
try:
import chardet
encoding = chardet.detect(str_)['encoding']
if encoding is None:
raise Exception()
return str_.decode(encoding)
except (ImportError, UnicodeDecodeError, Exception):
return unicode(str_, from_encoding, 'replace')
def safe_str(unicode_, to_encoding=None):
"""
safe str function. Does few trick to turn unicode_ into string
In case of UnicodeEncodeError we try to return it with encoding detected
by chardet library if it fails fallback to string with errors replaced
:param unicode_: unicode to encode
:rtype: str
:returns: str object
"""
from rhodecode.lib.utils2 import safe_str
return safe_str(unicode_, to_encoding)
if isinstance(unicode_, str):
return unicode_
try:
return unicode_.encode(to_encoding)
except UnicodeEncodeError:
pass
try:
import chardet
encoding = chardet.detect(unicode_)['encoding']
if encoding is None:
raise UnicodeEncodeError()
return unicode_.encode(encoding)
except (ImportError, UnicodeEncodeError):
return unicode_.encode(to_encoding, 'replace')
return safe_str
def author_email(author):
"""
returns email address of given author.
If any of <,> sign are found, it fallbacks to regex findall()
and returns first found result or empty string
Regex taken from http://www.regular-expressions.info/email.html
"""
import re
r = author.find('>')
l = author.find('<')
if l == -1 or r == -1:
# fallback to regex match of email out of a string
email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!"""
r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z"""
r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]"""
r"""*[a-z0-9])?""", re.IGNORECASE)
m = re.findall(email_re, author)
return m[0] if m else ''
return author[l + 1:r].strip()
def author_name(author):
"""
get name of author, or else username.
It'll try to find an email in the author string and just cut it off
to get the username
"""
if not '@' in author:
return author
else:
return author.replace(author_email(author), '').replace('<', '')\
.replace('>', '').strip()
|