Changeset - 1fd82c81118d
[Not reviewed]
default
0 3 0
Mads Kiilerich - 10 years ago 2016-02-15 19:29:26
madski@unity3d.com
vcs: better handling of invalid email addresses: don't consider them email addresses

13da89053853 was in principle right in always returning email adresses as
string ... but unfortunately the function also returned invalid email addresses
that didn't fit into strings.

To fix this, the function is refactored to always use regexp matching of valid
email addresses ... and to be simpler. The behaviour should be the same as
before for all valid email addresses.
3 files changed with 23 insertions and 19 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/vcs/utils/__init__.py
Show inline comments
 
@@ -5,6 +5,7 @@ output. It also includes some internal h
 

	
 
import time
 
import datetime
 
import re
 

	
 

	
 
def makedate():
 
@@ -150,30 +151,33 @@ def safe_str(unicode_, to_encoding=None)
 
        return unicode_.encode(to_encoding[0], 'replace')
 

	
 

	
 
# Regex taken from http://www.regular-expressions.info/email.html
 
email_re = re.compile(
 
    r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
 
    r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
 
    re.IGNORECASE)
 

	
 
def author_email(author):
 
    """
 
    returns email address of given author.
 
    If any of <,> sign are found, it fallbacks to regex findall()
 
    and returns first found result or empty string
 
    Returns email address of given author string.
 
    If author contains <> brackets, only look inside that.
 
    If any RFC valid email address is found, return that.
 
    Else, return empty string.
 

	
 
    Regex taken from http://www.regular-expressions.info/email.html
 
    """
 
    if not author:
 
        return ''
 
    import re
 
    r = author.find('>')
 
    l = author.find('<')
 

	
 
    if l == -1 or r == -1:
 
        # fallback to regex match of email out of a string
 
        email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!"""
 
                              r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z"""
 
                              r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]"""
 
                              r"""*[a-z0-9])?""", re.IGNORECASE)
 
        m = re.findall(email_re, author)
 
        return safe_str(m[0]) if m else ''
 
    l = author.find('<') + 1
 
    if l != 0:
 
        r = author.find('>', l)
 
        if r != -1:
 
            author = author[l:r]
 

	
 
    return safe_str(author[l + 1:r].strip())
 
    m = email_re.search(author)
 
    if m is None:
 
        return ''
 
    return safe_str(m.group(0))
 

	
 

	
 
def author_name(author):
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -604,7 +604,7 @@ class GitChangesetTest(unittest.TestCase
 
        self.assertEqual('lukasz.balcerzak@python-center.pl',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
 
          .author_email)
 
        self.assertEqual('none@none',
 
        self.assertEqual('',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
 
          .author_email)
 

	
 
@@ -615,7 +615,7 @@ class GitChangesetTest(unittest.TestCase
 
        self.assertEqual('Lukasz Balcerzak',
 
          self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
 
          .author_name)
 
        self.assertEqual('marcink',
 
        self.assertEqual('marcink none@none',
 
          self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
 
          .author_name)
 

	
kallithea/tests/vcs/test_utils.py
Show inline comments
 
@@ -206,7 +206,7 @@ class TestAuthorExtractors(unittest.Test
 
                  ('Mr Double Name withemail@example.com ',
 
                   ('Mr Double Name', 'withemail@example.com')),
 
                  (u'John Doe <джондо à éẋàṁṗłê.ç°ḿ>',
 
                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '\xd0\xb4\xd0\xb6\xd0\xbe\xd0\xbd\xd0\xb4\xd0\xbe \xc3\xa0 \xc3\xa9\xe1\xba\x8b\xc3\xa0\xe1\xb9\x81\xe1\xb9\x97\xc5\x82\xc3\xaa.\xc3\xa7\xc2\xb0\xe1\xb8\xbf')),
 
                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '')),
 
                  ]
 

	
 
    def test_author_email(self):
0 comments (0 inline, 0 general)