Changeset - 1fd82c81118d
[Not reviewed]
default
0 3 0
Mads Kiilerich - 10 years ago 2016-02-15 19:29:26
madski@unity3d.com
vcs: better handling of invalid email addresses: don't consider them email addresses

13da89053853 was in principle right in always returning email adresses as
string ... but unfortunately the function also returned invalid email addresses
that didn't fit into strings.

To fix this, the function is refactored to always use regexp matching of valid
email addresses ... and to be simpler. The behaviour should be the same as
before for all valid email addresses.
3 files changed with 23 insertions and 19 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/vcs/utils/__init__.py
Show inline comments
 
"""
 
This module provides some useful tools for ``vcs`` like annotate/diff html
 
output. It also includes some internal helpers.
 
"""
 

	
 
import time
 
import datetime
 
import re
 

	
 

	
 
def makedate():
 
    lt = time.localtime()
 
    if lt[8] == 1 and time.daylight:
 
        tz = time.altzone
 
    else:
 
        tz = time.timezone
 
    return time.mktime(lt), tz
 

	
 

	
 
def aslist(obj, sep=None, strip=True):
 
    """
 
    Returns given string separated by sep as list
 

	
 
    :param obj:
 
    :param sep:
 
    :param strip:
 
    """
 
    if isinstance(obj, (basestring)):
 
        lst = obj.split(sep)
 
        if strip:
 
            lst = [v.strip() for v in lst]
 
        return lst
 
@@ -129,62 +130,65 @@ def safe_str(unicode_, to_encoding=None)
 
    if not to_encoding:
 
        from kallithea.lib.vcs.conf import settings
 
        to_encoding = settings.DEFAULT_ENCODINGS
 

	
 
    if not isinstance(to_encoding, (list, tuple)):
 
        to_encoding = [to_encoding]
 

	
 
    for enc in to_encoding:
 
        try:
 
            return unicode_.encode(enc)
 
        except UnicodeEncodeError:
 
            pass
 

	
 
    try:
 
        import chardet
 
        encoding = chardet.detect(unicode_)['encoding']
 
        if encoding is None:
 
            raise UnicodeEncodeError()
 

	
 
        return unicode_.encode(encoding)
 
    except (ImportError, UnicodeEncodeError):
 
        return unicode_.encode(to_encoding[0], 'replace')
 

	
 

	
 
# Regex taken from http://www.regular-expressions.info/email.html
 
email_re = re.compile(
 
    r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
 
    r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
 
    re.IGNORECASE)
 

	
 
def author_email(author):
 
    """
 
    returns email address of given author.
 
    If any of <,> sign are found, it fallbacks to regex findall()
 
    and returns first found result or empty string
 
    Returns email address of given author string.
 
    If author contains <> brackets, only look inside that.
 
    If any RFC valid email address is found, return that.
 
    Else, return empty string.
 

	
 
    Regex taken from http://www.regular-expressions.info/email.html
 
    """
 
    if not author:
 
        return ''
 
    import re
 
    r = author.find('>')
 
    l = author.find('<')
 

	
 
    if l == -1 or r == -1:
 
        # fallback to regex match of email out of a string
 
        email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!"""
 
                              r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z"""
 
                              r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]"""
 
                              r"""*[a-z0-9])?""", re.IGNORECASE)
 
        m = re.findall(email_re, author)
 
        return safe_str(m[0]) if m else ''
 
    l = author.find('<') + 1
 
    if l != 0:
 
        r = author.find('>', l)
 
        if r != -1:
 
            author = author[l:r]
 

	
 
    return safe_str(author[l + 1:r].strip())
 
    m = email_re.search(author)
 
    if m is None:
 
        return ''
 
    return safe_str(m.group(0))
 

	
 

	
 
def author_name(author):
 
    """
 
    get name of author, or else username.
 
    It'll try to find an email in the author string and just cut it off
 
    to get the username
 
    """
 
    if not author:
 
        return ''
 
    if not '@' in author:
 
        return author
 
    return author.replace(author_email(author), '').replace('<', '') \
 
        .replace('>', '').strip()
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -583,60 +583,60 @@ class GitChangesetTest(unittest.TestCase
 

	
 
    def test_changeset_author_is_unicode(self):
 
        for cs in self.repo:
 
            self.assertEqual(type(cs.author), unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
        changeset = self.repo.get_changeset()
 
        for node in changeset.get_node('/'):
 
            if node.is_file():
 
                self.assertEqual(type(node.content), unicode)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
 
        path = 'foo/bar/setup.py'
 
        tip = self.repo.get_changeset()
 
        self.assertRaises(VCSError, tip.get_node, path)
 

	
 
    def test_author_email(self):
 
        self.assertEqual('marcin@python-blog.com',
 
          self.repo.get_changeset('c1214f7e79e02fc37156ff215cd71275450cffc3') \
 
          .author_email)
 
        self.assertEqual('lukasz.balcerzak@python-center.pl',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
 
          .author_email)
 
        self.assertEqual('none@none',
 
        self.assertEqual('',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
 
          .author_email)
 

	
 
    def test_author_username(self):
 
        self.assertEqual('Marcin Kuzminski',
 
          self.repo.get_changeset('c1214f7e79e02fc37156ff215cd71275450cffc3') \
 
          .author_name)
 
        self.assertEqual('Lukasz Balcerzak',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
 
          .author_name)
 
        self.assertEqual('marcink',
 
        self.assertEqual('marcink none@none',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
 
          .author_name)
 

	
 

	
 
class GitSpecificTest(unittest.TestCase):
 

	
 
    def test_error_is_raised_for_added_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
 
        with self.assertRaises(VCSError):
 
            changeset.added
 

	
 
    def test_error_is_raised_for_changed_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
 
        with self.assertRaises(VCSError):
 
            changeset.added
 

	
 
    def test_error_is_raised_for_removed_if_diff_name_status_is_wrong(self):
 
        repo = mock.MagicMock()
 
        changeset = GitChangeset(repo, 'foobar')
 
        changeset._diff_name_status = 'foobar'
kallithea/tests/vcs/test_utils.py
Show inline comments
 
@@ -185,49 +185,49 @@ class TestParseDatetime(unittest.TestCas
 

	
 

	
 
class TestAuthorExtractors(unittest.TestCase):
 
    TEST_AUTHORS = [("Username Last'o'Name <username@example.com>",
 
                    ("Username Last'o'Name", "username@example.com")),
 
                  ("Username Last'o'Name Spaces < username@example.com >",
 
                    ("Username Last'o'Name Spaces", "username@example.com")),
 
                  ("Username Last'o'Name <username.lastname@example.com>",
 
                    ("Username Last'o'Name", "username.lastname@example.com")),
 
                  ('mrf RFC_SPEC <username+lastname@example.com>',
 
                    ('mrf RFC_SPEC', 'username+lastname@example.com')),
 
                  ('username <user@example.com>',
 
                    ('username', 'user@example.com')),
 
                  ('username <user@example.com',
 
                   ('username', 'user@example.com')),
 
                  ('broken missing@example.com',
 
                   ('broken', 'missing@example.com')),
 
                  ('<justemail@example.com>',
 
                   ('', 'justemail@example.com')),
 
                  ('justname',
 
                   ('justname', '')),
 
                  ('Mr Double Name withemail@example.com ',
 
                   ('Mr Double Name', 'withemail@example.com')),
 
                  (u'John Doe <джондо à éẋàṁṗłê.ç°ḿ>',
 
                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '\xd0\xb4\xd0\xb6\xd0\xbe\xd0\xbd\xd0\xb4\xd0\xbe \xc3\xa0 \xc3\xa9\xe1\xba\x8b\xc3\xa0\xe1\xb9\x81\xe1\xb9\x97\xc5\x82\xc3\xaa.\xc3\xa7\xc2\xb0\xe1\xb8\xbf')),
 
                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '')),
 
                  ]
 

	
 
    def test_author_email(self):
 

	
 
        for test_str, result in self.TEST_AUTHORS:
 
            self.assertEqual(result[1], author_email(test_str))
 

	
 

	
 
    def test_author_name(self):
 

	
 
        for test_str, result in self.TEST_AUTHORS:
 
            self.assertEqual(result[0], author_name(test_str))
 

	
 

	
 
class TestGetDictForAttrs(unittest.TestCase):
 

	
 
    def test_returned_dict_has_expected_attrs(self):
 
        obj = mock.Mock()
 
        obj.NOT_INCLUDED = 'this key/value should not be included'
 
        obj.CONST = True
 
        obj.foo = 'aaa'
 
        obj.attrs = {'foo': 'bar'}
 
        obj.date = datetime.datetime(2010, 12, 31)
 
        obj.count = 1001
0 comments (0 inline, 0 general)