Changeset - f91844b26269
[Not reviewed]
default
0 2 0
Thomas De Schampheleire - 8 years ago 2018-02-14 09:12:17
thomas.de_schampheleire@nokia.com
lib: fix detection of ' as issue reference

Commit 494c793cc160 changed HTML escaping to please HTML 4 email readers.
The HTML entity ''' was replaced by '''.
Unfortunately, the pound character '#' is often used to mark issue
references, like 'bug #56'. While this depends on the issue patterns
actually configured, this pattern is so common that we cannot expect users
to set their issue_pat regular expressions such that '{' is not
matched.

Instead, keep the original ''' replacement at first in method html_escape,
but introduce a final step that just replaces ''' with '''.

The order of replacement in urlify_text then changes from:
html_escape (to HTML4)
urlify_issues
to
html_escape (to HTML5)
urlify_issues
make HTML5 more like HTML4

Test coverage show the problem case being solved.
2 files changed with 8 insertions and 6 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -76,25 +76,25 @@ def canonical_hostname():
 
        return parts[1].split('/', 1)[0]
 

	
 

	
 
def html_escape(s):
 
    """Return string with all html escaped.
 
    This is also safe for javascript in html but not necessarily correct.
 
    """
 
    return (s
 
        .replace('&', '&')
 
        .replace(">", ">")
 
        .replace("<", "&lt;")
 
        .replace('"', "&quot;")
 
        .replace("'", "&#39;") # some mail readers use HTML 4 and doesn't support &apos;
 
        .replace("'", "&apos;") # Note: this is HTML5 not HTML4 and might not work in mails
 
        )
 

	
 
def js(value):
 
    """Convert Python value to the corresponding JavaScript representation.
 

	
 
    This is necessary to safely insert arbitrary values into HTML <script>
 
    sections e.g. using Mako template expression substitution.
 

	
 
    Note: Rather than using this function, it's preferable to avoid the
 
    insertion of values into HTML <script> sections altogether. Instead,
 
    data should (to the extent possible) be passed to JavaScript using
 
    data attributes or AJAX calls, eliminating the need for JS specific
 
@@ -1083,24 +1083,28 @@ def urlify_text(s, repo_name=None, link_
 
    if truncate is None:
 
        s = s.rstrip()
 
    else:
 
        s = truncatef(s, truncate, whole_word=True)
 
    s = html_escape(s)
 
    s = _urlify(s)
 
    if repo_name is not None:
 
        s = urlify_issues(s, repo_name)
 
    if link_ is not None:
 
        # make href around everything that isn't a href already
 
        s = linkify_others(s, link_)
 
    s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
 
    # Turn HTML5 into more valid HTML4 as required by some mail readers.
 
    # (This is not done in one step in html_escape, because character codes like
 
    # &#123; risk to be seen as an issue reference due to the presence of '#'.)
 
    s = s.replace("&apos;", "&#39;")
 
    return literal(s)
 

	
 

	
 
def linkify_others(t, l):
 
    """Add a default link to html with links.
 
    HTML doesn't allow nesting of links, so the outer link must be broken up
 
    in pieces and give space for other links.
 
    """
 
    urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
    links = []
 
    for e in urls.split(t):
 
        if e.strip() and not urls.match(e):
kallithea/tests/other/test_libs.py
Show inline comments
 
@@ -372,26 +372,25 @@ class TestLibs(TestController):
 
       "The first * must come after whitespace and not be followed by whitespace, "
 
       "contain anything but * and newline until the next *, "
 
       "which must not come after whitespace "
 
       "and not be followed by * or alphanumerical *characters*.",
 
       """We support * markup for <b>*bold*</b> markup of <b>*single or multiple*</b> words, """
 
       """<b>*a bit <b>@like</b> <a href="http://slack.com">http://slack.com</a>*</b>. """
 
       """The first * must come after whitespace and not be followed by whitespace, """
 
       """contain anything but * and newline until the next *, """
 
       """which must not come after whitespace """
 
       """and not be followed by * or alphanumerical <b>*characters*</b>.""",
 
       "-"),
 
      ("HTML escaping: <abc> 'single' \"double\" &pointer",
 
       # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
 
       """HTML escaping: &lt;abc&gt; &<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>;single&<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>; &quot;double&quot; &amp;pointer""",
 
       "HTML escaping: &lt;abc&gt; &#39;single&#39; &quot;double&quot; &amp;pointer",
 
       "-"),
 
      # tags are covered by test_tag_extractor
 
    ])
 
    def test_urlify_test(self, sample, expected, url_):
 
        expected = self._quick_url(expected,
 
                                   tmpl="""<a href="%s">%s</a>""", url_=url_)
 
        fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
 
        with mock.patch('kallithea.config.routing.url', fake_url):
 
            from kallithea.lib.helpers import urlify_text
 
            assert urlify_text(sample, 'repo_name', stylize=True) == expected
 

	
 
    @parametrize('sample,expected', [
 
@@ -411,29 +410,28 @@ class TestLibs(TestController):
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            'issue #123', 'issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            'issue#456', 'issue<a class="issue-tracker-link" href="http://foo/repo_name/issue/456">#456</a>'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', 'PR',
 
            'interesting issue #123', 'interesting issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">PR123</a>'),
 
        (r'BUG\d{5}', 'https://bar/{repo}/{id}', 'BUG',
 
            'silly me, I did not parenthesize the {id}, BUG12345.', 'silly me, I did not parenthesize the {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG</a>.'),
 
        (r'BUG(\d{5})', 'https://bar/{repo}/', 'BUG',
 
            'silly me, the URL does not contain {id}, BUG12345.', 'silly me, the URL does not contain {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG12345</a>.'),
 
        (r'(PR-\d+)', 'http://foo/{repo}/issue/{id}', '',
 
            'interesting issue #123, err PR-56', 'interesting issue #123, err <a class="issue-tracker-link" href="http://foo/repo_name/issue/PR-56">PR-56</a>'),
 
        # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            "some 'standard' text with apostrophes", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; text with apostrophes'),
 
            "some 'standard' text with apostrophes", 'some &#39;standard&#39; text with apostrophes'),
 
        (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
 
            "some 'standard' issue #123", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
            "some 'standard' issue #123", 'some &#39;standard&#39; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
 
    ])
 
    def test_urlify_issues(self, issue_pat, issue_server, issue_prefix, sample, expected):
 
        from kallithea.lib.helpers import urlify_text
 
        config_stub = {
 
            'sqlalchemy.url': 'foo',
 
            'issue_pat': issue_pat,
 
            'issue_server_link': issue_server,
 
            'issue_prefix': issue_prefix,
 
        }
 
        # force recreation of lazy function
 
        with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
 
            with mock.patch('kallithea.CONFIG', config_stub):
0 comments (0 inline, 0 general)