kallithea Changeset - 4e565c5d7b7d

Changeset - 4e565c5d7b7d

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Mads Kiilerich - 6 years ago 2019-12-15 20:00:38
mads@kiilerich.com

Grafted from: 78a342d95b59

lib: establish py3 compatible strategy for string handling: introducing safe_bytes and deprecating safe_str

The meaning of safe_str will change when moving to py3. All use of safe_str is
thus tech debt that we have to chop off, mostly by moving to either
safe_unicode or safe_bytes ... or dropping because we know what we are doing
and rely on the improved type safety in py3.

2 files changed with 9 insertions and 6 deletions:

kallithea/lib/utils2.py

kallithea/lib/vcs/utils/__init__.py

0 comments (0 inline, 0 general)

kallithea/lib/utils2.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.utils2
 ~~~~~~~~~~~~~~~~~~~~
 Some simple helper functions.
 Note: all these functions should be independent of Kallithea classes, i.e.
 models, controllers, etc.  to prevent import cycles.
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Jan 5, 2011
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 from __future__ import print_function
 import binascii
 import datetime
 import os
 import pwd
 import re
 import time
 import urllib
 import urlobject
 from tg.i18n import ugettext as _
 from tg.i18n import ungettext
 from webhelpers2.text import collapse, remove_formatting, strip_tags
 from kallithea.lib.compat import json
 from kallithea.lib.vcs.utils import safe_str, safe_unicode  # re-export
+from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode  # re-export
 from kallithea.lib.vcs.utils.lazy import LazyProperty
 def str2bool(_str):
     """
     returns True/False value from given string, it tries to translate the
     string into boolean
     :param _str: string value to translate into boolean
     :rtype: boolean
     :returns: boolean from given string
     """
     if _str is None:
         return False
     if _str in (True, False):
         return _str
     _str = str(_str).strip().lower()
     return _str in ('t', 'true', 'y', 'yes', 'on', '1')
 def aslist(obj, sep=None, strip=True):
     """
     Returns given string separated by sep as list
     :param obj:
     :param sep:
     :param strip:
     """
     if isinstance(obj, (basestring)):
         lst = obj.split(sep)
         if strip:
             lst = [v.strip() for v in lst]
         return lst
     elif isinstance(obj, (list, tuple)):
         return obj
     elif obj is None:
         return []
     else:
         return [obj]
 def convert_line_endings(line, mode):
     """
     Converts a given line  "line end" according to given mode
     Available modes are::
 - Unix
 - Mac
 - DOS
     :param line: given line to convert
     :param mode: mode to convert to
     :rtype: str
     :return: converted line according to mode
     """
     from string import replace
     if mode == 0:
         line = replace(line, '\r\n', '\n')
         line = replace(line, '\r', '\n')
     elif mode == 1:
         line = replace(line, '\r\n', '\r')
         line = replace(line, '\n', '\r')
     elif mode == 2:
         line = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", line)
     return line
 def detect_mode(line, default):
     """
     Detects line break for given line, if line break couldn't be found
     given default value is returned
     :param line: str line
     :param default: default
     :rtype: int
     :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS
     """
     if line.endswith('\r\n'):
         return 2
     elif line.endswith('\n'):
         return 0
     elif line.endswith('\r'):
         return 1
     else:
         return default
 def generate_api_key():
     """
     Generates a random (presumably unique) API key.
     This value is used in URLs and "Bearer" HTTP Authorization headers,
     which in practice means it should only contain URL-safe characters
     (RFC 3986):
         unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
     """
     # Hexadecimal certainly qualifies as URL-safe.
     return binascii.hexlify(os.urandom(20))
 def safe_int(val, default=None):
     """
     Returns int() of val if val is not convertable to int use default
     instead
     :param val:
     :param default:
     """
     try:
         val = int(val)
     except (ValueError, TypeError):
         val = default
     return val
 def remove_suffix(s, suffix):
     if s.endswith(suffix):
         s = s[:-1 * len(suffix)]
     return s
 def remove_prefix(s, prefix):
     if s.startswith(prefix):
         s = s[len(prefix):]
     return s
 def age(prevdate, show_short_version=False, now=None):
     """
     turns a datetime into an age string.
     If show_short_version is True, then it will generate a not so accurate but shorter string,
     example: 2days ago, instead of 2 days and 23 hours ago.
     :param prevdate: datetime object
     :param show_short_version: if it should approximate the date and return a shorter string
     :rtype: unicode
     :returns: unicode words describing age
     """
     now = now or datetime.datetime.now()
     order = ['year', 'month', 'day', 'hour', 'minute', 'second']
     deltas = {}
     future = False
     if prevdate > now:
         now, prevdate = prevdate, now
         future = True
     if future:
         prevdate = prevdate.replace(microsecond=0)
     # Get date parts deltas
     from dateutil import relativedelta
     for part in order:
         d = relativedelta.relativedelta(now, prevdate)
         deltas[part] = getattr(d, part + 's')
     # Fix negative offsets (there is 1 second between 10:59:59 and 11:00:00,
     # not 1 hour, -59 minutes and -59 seconds)
     for num, length in [(5, 60), (4, 60), (3, 24)]:  # seconds, minutes, hours
         part = order[num]
         carry_part = order[num - 1]
         if deltas[part] < 0:
             deltas[part] += length
             deltas[carry_part] -= 1
     # Same thing for days except that the increment depends on the (variable)
     # number of days in the month
     month_lengths = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
     if deltas['day'] < 0:
         if prevdate.month == 2 and (prevdate.year % 4 == 0 and
             (prevdate.year % 100 != 0 or prevdate.year % 400 == 0)
         ):
             deltas['day'] += 29
         else:
             deltas['day'] += month_lengths[prevdate.month - 1]
         deltas['month'] -= 1
     if deltas['month'] < 0:
         deltas['month'] += 12
         deltas['year'] -= 1
     # In short version, we want nicer handling of ages of more than a year
     if show_short_version:
         if deltas['year'] == 1:
             # ages between 1 and 2 years: show as months
             deltas['month'] += 12
             deltas['year'] = 0
         if deltas['year'] >= 2:
             # ages 2+ years: round
             if deltas['month'] > 6:

kallithea/lib/vcs/utils/__init__.py

➞

Show inline comments

 """
 This module provides some useful tools for ``vcs`` like annotate/diff html
 output. It also includes some internal helpers.
 """
 import datetime
 import re
 import time
 def makedate():
     lt = time.localtime()
     if lt[8] == 1 and time.daylight:
         tz = time.altzone
     else:
         tz = time.timezone
     return time.mktime(lt), tz
 def aslist(obj, sep=None, strip=True):
     """
     Returns given string separated by sep as list
     :param obj:
     :param sep:
     :param strip:
     """
     if isinstance(obj, basestring):
         lst = obj.split(sep)
         if strip:
             lst = [v.strip() for v in lst]
         return lst
     elif isinstance(obj, (list, tuple)):
         return obj
     elif obj is None:
         return []
     else:
         return [obj]
 def date_fromtimestamp(unixts, tzoffset=0):
     """
     Makes a local datetime object out of unix timestamp
     :param unixts:
     :param tzoffset:
     """
     return datetime.datetime.fromtimestamp(float(unixts))
 def safe_int(val, default=None):
     """
     Returns int() of val if val is not convertible to int use default
     instead
     :param val:
     :param default:
     """
     try:
         val = int(val)
     except (ValueError, TypeError):
         val = default
     return val
 def safe_unicode(s):
     """
     Safe unicode function. Use a few tricks to turn s into unicode string:
     In case of UnicodeDecodeError with configured default encodings, try to
     detect encoding with chardet library, then fall back to first encoding with
     errors replaced.
     """
     if isinstance(s, unicode):
         return s
-    if not isinstance(s, str):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
+    if not isinstance(s, bytes):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
         return unicode(s)
     from kallithea.lib.vcs.conf import settings
     for enc in settings.DEFAULT_ENCODINGS:
         try:
             return unicode(s, enc)
         except UnicodeDecodeError:
             pass
     try:
         import chardet
         encoding = chardet.detect(s)['encoding']
         if encoding is not None:
             return s.decode(encoding)
     except (ImportError, UnicodeDecodeError):
         pass
     return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
-def safe_str(s):
+def safe_bytes(s):
     """
-    Safe str function. Use a few tricks to turn s into bytes string:
+    Safe bytes function. Use a few tricks to turn s into bytes string:
     In case of UnicodeEncodeError with configured default encodings, fall back
     to first configured encoding with errors replaced.
     """
-    if isinstance(s, str):
+    if isinstance(s, bytes):
         return s
-    assert isinstance(s, unicode), s  # don't use safe_str to coerce non-strings
+    assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
     from kallithea.lib.vcs.conf import settings
     for enc in settings.DEFAULT_ENCODINGS:
         try:
             return s.encode(enc)
         except UnicodeEncodeError:
             pass
     return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
 safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
 # Regex taken from http://www.regular-expressions.info/email.html
 email_re = re.compile(
     r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
     r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
     re.IGNORECASE)
 def author_email(author):
     """
     Returns email address of given author string.
     If author contains <> brackets, only look inside that.
     If any RFC valid email address is found, return that.
     Else, return empty string.
     """
     if not author:
         return ''
     l = author.find('<') + 1
     if l != 0:
         r = author.find('>', l)
         if r != -1:
             author = author[l:r]
     m = email_re.search(author)
     if m is None:
         return ''
     return safe_str(m.group(0))
 def author_name(author):
     """
     get name of author, or else username.
     It'll try to find an email in the author string and just cut it off
     to get the username
     """
     if not author:
         return ''
     if '@' not in author:
         return author
     return author.replace(author_email(author), '').replace('<', '') \
         .replace('>', '').strip()

0 comments (0 inline, 0 general)