kallithea Changeset - b1a3e6df8bae

Changeset - b1a3e6df8bae

Parent rev.

Child rev.

[Not reviewed]

default

0 1 0

Mads Kiilerich - 6 years ago 2019-12-19 22:49:43
mads@kiilerich.com

Grafted from: daf599d6bdaa

py3: update safe_* functions for how unicode pretty much has been renamed to str

safe_str and safe_unicode is thus the same thing.

1 file changed with 20 insertions and 23 deletions:

kallithea/lib/vcs/utils/__init__.py

0 comments (0 inline, 0 general)

kallithea/lib/vcs/utils/__init__.py

➞

Show inline comments

@@ @@ -61,129 +61,126 @@ def safe_int(val, default=None): @@
     """
     try:
         val = int(val)
     except (ValueError, TypeError):
         val = default
     return val
 def safe_unicode(s):
     """
-    Safe unicode function. Use a few tricks to turn s into unicode string:
+    Safe unicode str function. Use a few tricks to turn s into str:
     In case of UnicodeDecodeError with configured default encodings, try to
     detect encoding with chardet library, then fall back to first encoding with
     errors replaced.
     """
-    if isinstance(s, unicode):
+    if isinstance(s, str):
         return s
     if not isinstance(s, bytes):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
         return unicode(s)
     if not isinstance(s, bytes):  # use __str__ and don't expect UnicodeDecodeError
         return str(s)
     from kallithea.lib.vcs.conf import settings
     for enc in settings.DEFAULT_ENCODINGS:
         try:
-            return unicode(s, enc)
+            return str(s, enc)
         except UnicodeDecodeError:
             pass
     try:
         import chardet
         encoding = chardet.detect(s)['encoding']
         if encoding is not None:
             return s.decode(encoding)
     except (ImportError, UnicodeDecodeError):
         pass
-    return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
+    return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')
 def safe_bytes(s):
     """
     Safe bytes function. Use a few tricks to turn s into bytes string:
     In case of UnicodeEncodeError with configured default encodings, fall back
     to first configured encoding with errors replaced.
     """
     if isinstance(s, bytes):
         return s
-    assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
+    assert isinstance(s, str), repr(s)  # bytes cannot coerse with __str__ or handle None or int
     from kallithea.lib.vcs.conf import settings
     for enc in settings.DEFAULT_ENCODINGS:
         try:
             return s.encode(enc)
         except UnicodeEncodeError:
             pass
     return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
-safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
+safe_str = safe_unicode
 def ascii_bytes(s):
     """
-    Simple conversion from unicode/str to bytes, *assuming* all codepoints are
     Simple conversion from str to bytes, *assuming* all codepoints are
 -bit and it thus is pure ASCII.
     Will fail badly with UnicodeError on invalid input.
     This should be used where enocding and "safe" ambiguity should be avoided.
     Where strings already have been encoded in other ways but still are unicode
     string - for example to hex, base64, json, urlencoding, or are known to be
     identifiers.
     >>> ascii_bytes('a')
     'a'
+    b'a'
     >>> ascii_bytes(u'a')
     'a'
+    b'a'
     >>> ascii_bytes('å')
     Traceback (most recent call last):
     UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
     >>> ascii_bytes(u'å')
     UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
     >>> ascii_bytes('å'.encode('utf8'))
     Traceback (most recent call last):
-    UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
+    AssertionError: b'\xc3\xa5'
     """
-    assert isinstance(s, (unicode, str)), repr(s)
     assert isinstance(s, str), repr(s)
     return s.encode('ascii')
 def ascii_str(s):
     r"""
     Simple conversion from bytes to str, *assuming* all codepoints are
 -bit and it thus is pure ASCII.
     Will fail badly with UnicodeError on invalid input.
     This should be used where enocding and "safe" ambiguity should be avoided.
     Where strings are encoded but also in other ways are known to be ASCII, and
     where a unicode string is wanted without caring about encoding. For example
     to hex, base64, urlencoding, or are known to be identifiers.
     >>> ascii_str('a')
+    >>> ascii_str(b'a')
     'a'
     >>> ascii_str(u'a')
     Traceback (most recent call last):
     AssertionError: u'a'
     >>> ascii_str('å')
     AssertionError: 'a'
     >>> ascii_str('å'.encode('utf8'))
     Traceback (most recent call last):
     UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
     >>> ascii_str(u'å')
     Traceback (most recent call last):
-    AssertionError: u'\xc3\xa5'
+    AssertionError: 'å'
     """
     assert isinstance(s, bytes), repr(s)
     # Note: we use "encode", even though we really *should* use "decode". But
     # we are in py2 and don't want py2, and encode is doing what we need for the
     # ascii subset.
     return s.encode('ascii')
     return s.decode('ascii')
 # Regex taken from http://www.regular-expressions.info/email.html
 email_re = re.compile(
     r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
     r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
     re.IGNORECASE)
 def author_email(author):
     """
     Returns email address of given author string.

0 comments (0 inline, 0 general)