diff --git a/kallithea/lib/vcs/utils/__init__.py b/kallithea/lib/vcs/utils/__init__.py --- a/kallithea/lib/vcs/utils/__init__.py +++ b/kallithea/lib/vcs/utils/__init__.py @@ -5,6 +5,7 @@ output. It also includes some internal h import time import datetime +import re def makedate(): @@ -150,30 +151,33 @@ def safe_str(unicode_, to_encoding=None) return unicode_.encode(to_encoding[0], 'replace') +# Regex taken from http://www.regular-expressions.info/email.html +email_re = re.compile( + r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@""" + r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""", + re.IGNORECASE) + def author_email(author): """ - returns email address of given author. - If any of <,> sign are found, it fallbacks to regex findall() - and returns first found result or empty string + Returns email address of given author string. + If author contains <> brackets, only look inside that. + If any RFC valid email address is found, return that. + Else, return empty string. - Regex taken from http://www.regular-expressions.info/email.html """ if not author: return '' - import re - r = author.find('>') - l = author.find('<') - if l == -1 or r == -1: - # fallback to regex match of email out of a string - email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!""" - r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z""" - r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]""" - r"""*[a-z0-9])?""", re.IGNORECASE) - m = re.findall(email_re, author) - return safe_str(m[0]) if m else '' + l = author.find('<') + 1 + if l != 0: + r = author.find('>', l) + if r != -1: + author = author[l:r] - return safe_str(author[l + 1:r].strip()) + m = email_re.search(author) + if m is None: + return '' + return safe_str(m.group(0)) def author_name(author): diff --git a/kallithea/tests/vcs/test_git.py b/kallithea/tests/vcs/test_git.py --- a/kallithea/tests/vcs/test_git.py +++ b/kallithea/tests/vcs/test_git.py @@ -604,7 +604,7 @@ class GitChangesetTest(unittest.TestCase self.assertEqual('lukasz.balcerzak@python-center.pl', self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \ .author_email) - self.assertEqual('none@none', + self.assertEqual('', self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \ .author_email) @@ -615,7 +615,7 @@ class GitChangesetTest(unittest.TestCase self.assertEqual('Lukasz Balcerzak', self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \ .author_name) - self.assertEqual('marcink', + self.assertEqual('marcink none@none', self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \ .author_name) diff --git a/kallithea/tests/vcs/test_utils.py b/kallithea/tests/vcs/test_utils.py --- a/kallithea/tests/vcs/test_utils.py +++ b/kallithea/tests/vcs/test_utils.py @@ -206,7 +206,7 @@ class TestAuthorExtractors(unittest.Test ('Mr Double Name withemail@example.com ', ('Mr Double Name', 'withemail@example.com')), (u'John Doe <джондо à éẋàṁṗłê.ç°ḿ>', - (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '\xd0\xb4\xd0\xb6\xd0\xbe\xd0\xbd\xd0\xb4\xd0\xbe \xc3\xa0 \xc3\xa9\xe1\xba\x8b\xc3\xa0\xe1\xb9\x81\xe1\xb9\x97\xc5\x82\xc3\xaa.\xc3\xa7\xc2\xb0\xe1\xb8\xbf')), + (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '')), ] def test_author_email(self):