Changeset - f91844b26269
[Not reviewed]
default
0 2 0
Thomas De Schampheleire - 8 years ago 2018-02-14 09:12:17
thomas.de_schampheleire@nokia.com
lib: fix detection of ' as issue reference

Commit 494c793cc160 changed HTML escaping to please HTML 4 email readers.
The HTML entity ''' was replaced by '''.
Unfortunately, the pound character '#' is often used to mark issue
references, like 'bug #56'. While this depends on the issue patterns
actually configured, this pattern is so common that we cannot expect users
to set their issue_pat regular expressions such that '{' is not
matched.

Instead, keep the original ''' replacement at first in method html_escape,
but introduce a final step that just replaces ''' with '''.

The order of replacement in urlify_text then changes from:
html_escape (to HTML4)
urlify_issues
to
html_escape (to HTML5)
urlify_issues
make HTML5 more like HTML4

Test coverage show the problem case being solved.
2 files changed with 8 insertions and 6 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -40,97 +40,97 @@ from webhelpers.text import chop_at, tru
 
from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
 
    convert_boolean_attrs, NotGiven, _make_safe_id_component
 

	
 
from kallithea.config.routing import url
 
from kallithea.lib.annotate import annotate_highlight
 
from kallithea.lib.pygmentsutils import get_custom_lexer
 
from kallithea.lib.utils2 import str2bool, safe_unicode, safe_str, \
 
    time_to_datetime, AttributeDict, safe_int, MENTIONS_REGEX
 
from kallithea.lib.markup_renderer import url_re
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def canonical_url(*args, **kargs):
 
    '''Like url(x, qualified=True), but returns url that not only is qualified
 
    but also canonical, as configured in canonical_url'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        kargs['host'] = parts[1].split('/', 1)[0]
 
        kargs['protocol'] = parts[0]
 
    except IndexError:
 
        kargs['qualified'] = True
 
    return url(*args, **kargs)
 

	
 

	
 
def canonical_hostname():
 
    '''Return canonical hostname of system'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        return parts[1].split('/', 1)[0]
 
    except IndexError:
 
        parts = url('home', qualified=True).split('://', 1)
 
        return parts[1].split('/', 1)[0]
 

	
 

	
 
def html_escape(s):
 
    """Return string with all html escaped.
 
    This is also safe for javascript in html but not necessarily correct.
 
    """
 
    return (s
 
        .replace('&', '&')
 
        .replace(">", ">")
 
        .replace("<", "&lt;")
 
        .replace('"', "&quot;")
 
        .replace("'", "&#39;") # some mail readers use HTML 4 and doesn't support &apos;
 
        .replace("'", "&apos;") # Note: this is HTML5 not HTML4 and might not work in mails
 
        )
 

	
 
def js(value):
 
    """Convert Python value to the corresponding JavaScript representation.
 

	
 
    This is necessary to safely insert arbitrary values into HTML <script>
 
    sections e.g. using Mako template expression substitution.
 

	
 
    Note: Rather than using this function, it's preferable to avoid the
 
    insertion of values into HTML <script> sections altogether. Instead,
 
    data should (to the extent possible) be passed to JavaScript using
 
    data attributes or AJAX calls, eliminating the need for JS specific
 
    escaping.
 

	
 
    Note: This is not safe for use in attributes (e.g. onclick), because
 
    quotes are not escaped.
 

	
 
    Because the rules for parsing <script> varies between XHTML (where
 
    normal rules apply for any special characters) and HTML (where
 
    entities are not interpreted, but the literal string "</script>"
 
    is forbidden), the function ensures that the result never contains
 
    '&', '<' and '>', thus making it safe in both those contexts (but
 
    not in attributes).
 
    """
 
    return literal(
 
        ('(' + json.dumps(value) + ')')
 
        # In JSON, the following can only appear in string literals.
 
        .replace('&', r'\x26')
 
        .replace('<', r'\x3c')
 
        .replace('>', r'\x3e')
 
    )
 

	
 

	
 
def jshtml(val):
 
    """HTML escapes a string value, then converts the resulting string
 
    to its corresponding JavaScript representation (see `js`).
 

	
 
    This is used when a plain-text string (possibly containing special
 
    HTML characters) will be used by a script in an HTML context (e.g.
 
    element.innerHTML or jQuery's 'html' method).
 

	
 
    If in doubt, err on the side of using `jshtml` over `js`, since it's
 
    better to escape too much than too little.
 
    """
 
    return js(escape(val))
 

	
 

	
 
def shorter(s, size=20, firstline=False, postfix='...'):
 
@@ -1047,96 +1047,100 @@ def urlify_text(s, repo_name=None, link_
 
            return '<b>%s</b>' % mention
 
        hash_ = match_obj.group('hash')
 
        if hash_ is not None and repo_name is not None:
 
            from kallithea.config.routing import url  # doh, we need to re-import url to mock it later
 
            return '<a class="changeset_hash" href="%(url)s">%(hash)s</a>' % {
 
                 'url': url('changeset_home', repo_name=repo_name, revision=hash_),
 
                 'hash': hash_,
 
                }
 
        bold = match_obj.group('bold')
 
        if bold is not None:
 
            return '<b>*%s*</b>' % _urlify(bold[1:-1])
 
        if stylize:
 
            seen = match_obj.group('seen')
 
            if seen:
 
                return '<div class="label label-meta" data-tag="see">see =&gt; %s</div>' % seen
 
            license = match_obj.group('license')
 
            if license:
 
                return '<div class="label label-meta" data-tag="license"><a href="http:\/\/www.opensource.org/licenses/%s">%s</a></div>' % (license, license)
 
            tagtype = match_obj.group('tagtype')
 
            if tagtype:
 
                tagvalue = match_obj.group('tagvalue')
 
                return '<div class="label label-meta" data-tag="%s">%s =&gt; <a href="/%s">%s</a></div>' % (tagtype, tagtype, tagvalue, tagvalue)
 
            lang = match_obj.group('lang')
 
            if lang:
 
                return '<div class="label label-meta" data-tag="lang">%s</div>' % lang
 
            tag = match_obj.group('tag')
 
            if tag:
 
                return '<div class="label label-meta" data-tag="%s">%s</div>' % (tag, tag)
 
        return match_obj.group(0)
 

	
 
    def _urlify(s):
 
        """
 
        Extract urls from text and make html links out of them
 
        """
 
        return _URLIFY_RE.sub(_replace, s)
 

	
 
    if truncate is None:
 
        s = s.rstrip()
 
    else:
 
        s = truncatef(s, truncate, whole_word=True)
 
    s = html_escape(s)
 
    s = _urlify(s)
 
    if repo_name is not None:
 
        s = urlify_issues(s, repo_name)
 
    if link_ is not None:
 
        # make href around everything that isn't a href already
 
        s = linkify_others(s, link_)
 
    s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
 
    # Turn HTML5 into more valid HTML4 as required by some mail readers.
 
    # (This is not done in one step in html_escape, because character codes like
 
    # &#123; risk to be seen as an issue reference due to the presence of '#'.)
 
    s = s.replace("&apos;", "&#39;")
 
    return literal(s)
 

	
 

	
 
def linkify_others(t, l):
 
    """Add a default link to html with links.
 
    HTML doesn't allow nesting of links, so the outer link must be broken up
 
    in pieces and give space for other links.
 
    """
 
    urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
    links = []
 
    for e in urls.split(t):
 
        if e.strip() and not urls.match(e):
 
            links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
 
        else:
 
            links.append(e)
 

	
 
    return ''.join(links)
 

	
 

	
 
# Global variable that will hold the actual urlify_issues function body.
 
# Will be set on first use when the global configuration has been read.
 
_urlify_issues_f = None
 

	
 

	
 
def urlify_issues(newtext, repo_name):
 
    """Urlify issue references according to .ini configuration"""
 
    global _urlify_issues_f
 
    if _urlify_issues_f is None:
 
        from kallithea import CONFIG
 
        from kallithea.model.db import URL_SEP
 
        assert CONFIG['sqlalchemy.url'] # make sure config has been loaded
 

	
 
        # Build chain of urlify functions, starting with not doing any transformation
 
        tmp_urlify_issues_f = lambda s: s
 

	
 
        issue_pat_re = re.compile(r'issue_pat(.*)')
 
        for k in CONFIG.keys():
 
            # Find all issue_pat* settings that also have corresponding server_link and prefix configuration
 
            m = issue_pat_re.match(k)
 
            if m is None:
 
                continue
 
            suffix = m.group(1)
 
            issue_pat = CONFIG.get(k)
 
            issue_server_link = CONFIG.get('issue_server_link%s' % suffix)
 
            issue_prefix = CONFIG.get('issue_prefix%s' % suffix)
 
            if issue_pat and issue_server_link and issue_prefix is not None: # issue_prefix can be empty but should be present
 
                log.debug('issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix)
 
            else:
kallithea/tests/other/test_libs.py
Show inline comments
 
@@ -336,140 +336,138 @@ class TestLibs(TestController):
 
        fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
 
        with mock.patch('kallithea.config.routing.url', fake_url):
 
            from kallithea.lib.helpers import urlify_text
 
            assert urlify_text(sample, 'repo_name') == expected
 

	
 
    @parametrize('sample,expected,url_', [
 
      ("",
 
       "",
 
       ""),
 
      ("https://svn.apache.org/repos",
 
       """url[https://svn.apache.org/repos]""",
 
       "https://svn.apache.org/repos"),
 
      ("http://svn.apache.org/repos",
 
       """url[http://svn.apache.org/repos]""",
 
       "http://svn.apache.org/repos"),
 
      ("from rev a also rev http://google.com",
 
       """from rev a also rev url[http://google.com]""",
 
       "http://google.com"),
 
      ("http://imgur.com/foo.gif inline http://imgur.com/foo.gif ending http://imgur.com/foo.gif",
 
       """url[http://imgur.com/foo.gif] inline url[http://imgur.com/foo.gif] ending url[http://imgur.com/foo.gif]""",
 
       "http://imgur.com/foo.gif"),
 
      ("""Multi line
 
       https://foo.bar.example.com
 
       some text lalala""",
 
       """Multi line<br/>"""
 
       """       url[https://foo.bar.example.com]<br/>"""
 
       """       some text lalala""",
 
       "https://foo.bar.example.com"),
 
      ("@mention @someone",
 
       """<b>@mention</b> <b>@someone</b>""",
 
       ""),
 
      ("deadbeefcafe 123412341234",
 
       """<a class="changeset_hash" href="/repo_name/changeset/deadbeefcafe">deadbeefcafe</a> <a class="changeset_hash" href="/repo_name/changeset/123412341234">123412341234</a>""",
 
       ""),
 
      ("We support * markup for *bold* markup of *single or multiple* words, "
 
       "*a bit @like http://slack.com*. "
 
       "The first * must come after whitespace and not be followed by whitespace, "
 
       "contain anything but * and newline until the next *, "
 
       "which must not come after whitespace "
 
       "and not be followed by * or alphanumerical *characters*.",
 
       """We support * markup for <b>*bold*</b> markup of <b>*single or multiple*</b> words, """
 
       """<b>*a bit <b>@like</b> <a href="http://slack.com">http://slack.com</a>*</b>. """
 
       """The first * must come after whitespace and not be followed by whitespace, """
 
       """contain anything but * and newline until the next *, """
 
       """which must not come after whitespace """
 
       """and not be followed by * or alphanumerical <b>*characters*</b>.""",
 
       "-"),
 
      ("HTML escaping: <abc> 'single' \"double\" &pointer",
 
       # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
 
       """HTML escaping: &lt;abc&gt; &<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>;single&<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>; &quot;double&quot; &amp;pointer""",
 
       "HTML escaping: &lt;abc&gt; &#39;single&#39; &quot;double&quot; &amp;pointer",
 
       "-"),
 
      # tags are covered by test_tag_extractor
 
    ])
 
    def test_urlify_test(self, sample, expected, url_):
 
        expected = self._quick_url(expected,
 
                                   tmpl="""<a href="%s">%s</a>""", url_=url_)
 
        fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
 
        with mock.patch('kallithea.config.routing.url', fake_url):
 
            from kallithea.lib.helpers import urlify_text
 
            assert urlify_text(sample, 'repo_name', stylize=True) == expected
 

	
 
    @parametrize('sample,expected', [
 
      ("deadbeefcafe @mention, and http://foo.bar/ yo",
 
       """<a class="changeset_hash" href="/repo_name/changeset/deadbeefcafe">deadbeefcafe</a>"""
 
       """<a class="message-link" href="#the-link"> <b>@mention</b>, and </a>"""
 
       """<a href="http://foo.bar/">http://foo.bar/</a>"""
 
       """<a class="message-link" href="#the-link"> yo</a>"""),
 
    ])
 
    def test_urlify_link(self, sample, expected):
 
        fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
 
        with mock.patch('kallithea.config.routing.url', fake_url):
 
            from kallithea.lib.helpers import urlify_text
 
            assert urlify_text(sample, 'repo_name', link_='#the-link') == expected
 

	
 
    @parametrize('issue_pat,issue_server,issue_prefix,sample,expected', [
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            'issue #123', 'issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            'issue#456', 'issue<a class="issue-tracker-link" href="http://foo/repo_name/issue/456">#456</a>'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', 'PR',
 
            'interesting issue #123', 'interesting issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">PR123</a>'),
 
        (r'BUG\d{5}', 'https://bar/{repo}/{id}', 'BUG',
 
            'silly me, I did not parenthesize the {id}, BUG12345.', 'silly me, I did not parenthesize the {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG</a>.'),
 
        (r'BUG(\d{5})', 'https://bar/{repo}/', 'BUG',
 
            'silly me, the URL does not contain {id}, BUG12345.', 'silly me, the URL does not contain {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG12345</a>.'),
 
        (r'(PR-\d+)', 'http://foo/{repo}/issue/{id}', '',
 
            'interesting issue #123, err PR-56', 'interesting issue #123, err <a class="issue-tracker-link" href="http://foo/repo_name/issue/PR-56">PR-56</a>'),
 
        # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            "some 'standard' text with apostrophes", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; text with apostrophes'),
 
            "some 'standard' text with apostrophes", 'some &#39;standard&#39; text with apostrophes'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            "some 'standard' issue #123", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
            "some 'standard' issue #123", 'some &#39;standard&#39; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
    ])
 
    def test_urlify_issues(self, issue_pat, issue_server, issue_prefix, sample, expected):
 
        from kallithea.lib.helpers import urlify_text
 
        config_stub = {
 
            'sqlalchemy.url': 'foo',
 
            'issue_pat': issue_pat,
 
            'issue_server_link': issue_server,
 
            'issue_prefix': issue_prefix,
 
        }
 
        # force recreation of lazy function
 
        with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
 
            with mock.patch('kallithea.CONFIG', config_stub):
 
                assert urlify_text(sample, 'repo_name') == expected
 

	
 
    @parametrize('sample,expected', [
 
        ('abc X5', 'abc <a class="issue-tracker-link" href="http://main/repo_name/main/5/">#5</a>'),
 
        ('abc pullrequest #6 xyz', 'abc <a class="issue-tracker-link" href="http://pr/repo_name/pr/6">PR#6</a> xyz'),
 
        ('pull request7 #', '<a class="issue-tracker-link" href="http://pr/repo_name/pr/7">PR#7</a> #'),
 
        ('look PR9 and pr #11', 'look <a class="issue-tracker-link" href="http://pr/repo_name/pr/9">PR#9</a> and <a class="issue-tracker-link" href="http://pr/repo_name/pr/11">PR#11</a>'),
 
        ('pullrequest#10 solves issue 9', '<a class="issue-tracker-link" href="http://pr/repo_name/pr/10">PR#10</a> solves <a class="issue-tracker-link" href="http://bug/repo_name/bug/9">bug#9</a>'),
 
        ('issue FAIL67', 'issue <a class="issue-tracker-link" href="http://fail/repo_name/67">67</a>'),
 
        ('issue FAILMORE89', 'issue FAILMORE89'), # no match because absent prefix
 
    ])
 
    def test_urlify_issues_multiple_issue_patterns(self, sample, expected):
 
        from kallithea.lib.helpers import urlify_text
 
        config_stub = {
 
            'sqlalchemy.url': 'foo',
 
            'issue_pat': 'X(\d+)',
 
            'issue_server_link': 'http://main/{repo}/main/{id}/',
 
            'issue_prefix': '#',
 
            'issue_pat_pr': '(?:pullrequest|pull request|PR|pr) ?#?(\d+)',
 
            'issue_server_link_pr': 'http://pr/{repo}/pr/{id}',
 
            'issue_prefix_pr': 'PR#',
 
            'issue_pat_bug': '(?:BUG|bug|issue) ?#?(\d+)',
 
            'issue_server_link_bug': 'http://bug/{repo}/bug/{id}',
 
            'issue_prefix_bug': 'bug#',
 
            'issue_pat_empty_prefix': 'FAIL(\d+)',
 
            'issue_server_link_empty_prefix': 'http://fail/{repo}/{id}',
 
            'issue_prefix_empty_prefix': '',
 
            'issue_pat_absent_prefix': 'FAILMORE(\d+)',
 
            'issue_server_link_absent_prefix': 'http://failmore/{repo}/{id}',
 
        }
 
        # force recreation of lazy function
 
        with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
 
            with mock.patch('kallithea.CONFIG', config_stub):
 
                assert urlify_text(sample, 'repo_name') == expected
 

	
 
    @parametrize('test,expected', [
0 comments (0 inline, 0 general)