kallithea Changeset - f91844b26269

Changeset - f91844b26269

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Thomas De Schampheleire - 8 years ago 2018-02-14 09:12:17
thomas.de_schampheleire@nokia.com

lib: fix detection of ' as issue reference

Commit 494c793cc160 changed HTML escaping to please HTML 4 email readers.
The HTML entity ''' was replaced by '''.
Unfortunately, the pound character '#' is often used to mark issue
references, like 'bug #56'. While this depends on the issue patterns
actually configured, this pattern is so common that we cannot expect users
to set their issue_pat regular expressions such that '{' is not
matched.

Instead, keep the original ''' replacement at first in method html_escape,
but introduce a final step that just replaces ''' with '''.

The order of replacement in urlify_text then changes from:
html_escape (to HTML4)
urlify_issues
to
html_escape (to HTML5)
urlify_issues
make HTML5 more like HTML4

Test coverage show the problem case being solved.

2 files changed with 8 insertions and 6 deletions:

kallithea/lib/helpers.py

kallithea/tests/other/test_libs.py

0 comments (0 inline, 0 general)

kallithea/lib/helpers.py

➞

Show inline comments

@@ @@ -40,97 +40,97 @@ from webhelpers.text import chop_at, tru @@
 from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
     convert_boolean_attrs, NotGiven, _make_safe_id_component
 from kallithea.config.routing import url
 from kallithea.lib.annotate import annotate_highlight
 from kallithea.lib.pygmentsutils import get_custom_lexer
 from kallithea.lib.utils2 import str2bool, safe_unicode, safe_str, \
     time_to_datetime, AttributeDict, safe_int, MENTIONS_REGEX
 from kallithea.lib.markup_renderer import url_re
 from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 log = logging.getLogger(__name__)
 def canonical_url(*args, **kargs):
     '''Like url(x, qualified=True), but returns url that not only is qualified
     but also canonical, as configured in canonical_url'''
     from kallithea import CONFIG
     try:
         parts = CONFIG.get('canonical_url', '').split('://', 1)
         kargs['host'] = parts[1].split('/', 1)[0]
         kargs['protocol'] = parts[0]
     except IndexError:
         kargs['qualified'] = True
     return url(*args, **kargs)
 def canonical_hostname():
     '''Return canonical hostname of system'''
     from kallithea import CONFIG
     try:
         parts = CONFIG.get('canonical_url', '').split('://', 1)
         return parts[1].split('/', 1)[0]
     except IndexError:
         parts = url('home', qualified=True).split('://', 1)
         return parts[1].split('/', 1)[0]
 def html_escape(s):
     """Return string with all html escaped.
     This is also safe for javascript in html but not necessarily correct.
     """
     return (s
         .replace('&', '&amp;')
         .replace(">", "&gt;")
         .replace("<", "&lt;")
         .replace('"', "&quot;")
-        .replace("'", "&#39;") # some mail readers use HTML 4 and doesn't support &apos;
+        .replace("'", "&apos;") # Note: this is HTML5 not HTML4 and might not work in mails
+        )
 def js(value):
     """Convert Python value to the corresponding JavaScript representation.
     This is necessary to safely insert arbitrary values into HTML <script>
     sections e.g. using Mako template expression substitution.
     Note: Rather than using this function, it's preferable to avoid the
     insertion of values into HTML <script> sections altogether. Instead,
     data should (to the extent possible) be passed to JavaScript using
     data attributes or AJAX calls, eliminating the need for JS specific
     escaping.
     Note: This is not safe for use in attributes (e.g. onclick), because
     quotes are not escaped.
     Because the rules for parsing <script> varies between XHTML (where
     normal rules apply for any special characters) and HTML (where
     entities are not interpreted, but the literal string "</script>"
     is forbidden), the function ensures that the result never contains
     '&', '<' and '>', thus making it safe in both those contexts (but
     not in attributes).
     """
     return literal(
         ('(' + json.dumps(value) + ')')
         # In JSON, the following can only appear in string literals.
         .replace('&', r'\x26')
         .replace('<', r'\x3c')
         .replace('>', r'\x3e')
+    )
 def jshtml(val):
     """HTML escapes a string value, then converts the resulting string
     to its corresponding JavaScript representation (see `js`).
     This is used when a plain-text string (possibly containing special
     HTML characters) will be used by a script in an HTML context (e.g.
     element.innerHTML or jQuery's 'html' method).
     If in doubt, err on the side of using `jshtml` over `js`, since it's
     better to escape too much than too little.
     """
     return js(escape(val))
 def shorter(s, size=20, firstline=False, postfix='...'):
@@ @@ -1047,96 +1047,100 @@ def urlify_text(s, repo_name=None, link_ @@
             return '<b>%s</b>' % mention
         hash_ = match_obj.group('hash')
         if hash_ is not None and repo_name is not None:
             from kallithea.config.routing import url  # doh, we need to re-import url to mock it later
             return '<a class="changeset_hash" href="%(url)s">%(hash)s</a>' % {
                  'url': url('changeset_home', repo_name=repo_name, revision=hash_),
                  'hash': hash_,
+                }
         bold = match_obj.group('bold')
         if bold is not None:
             return '<b>*%s*</b>' % _urlify(bold[1:-1])
         if stylize:
             seen = match_obj.group('seen')
             if seen:
                 return '<div class="label label-meta" data-tag="see">see =&gt; %s</div>' % seen
             license = match_obj.group('license')
             if license:
                 return '<div class="label label-meta" data-tag="license"><a href="http:\/\/www.opensource.org/licenses/%s">%s</a></div>' % (license, license)
             tagtype = match_obj.group('tagtype')
             if tagtype:
                 tagvalue = match_obj.group('tagvalue')
                 return '<div class="label label-meta" data-tag="%s">%s =&gt; <a href="/%s">%s</a></div>' % (tagtype, tagtype, tagvalue, tagvalue)
             lang = match_obj.group('lang')
             if lang:
                 return '<div class="label label-meta" data-tag="lang">%s</div>' % lang
             tag = match_obj.group('tag')
             if tag:
                 return '<div class="label label-meta" data-tag="%s">%s</div>' % (tag, tag)
         return match_obj.group(0)
     def _urlify(s):
         """
         Extract urls from text and make html links out of them
         """
         return _URLIFY_RE.sub(_replace, s)
     if truncate is None:
         s = s.rstrip()
     else:
         s = truncatef(s, truncate, whole_word=True)
     s = html_escape(s)
     s = _urlify(s)
     if repo_name is not None:
         s = urlify_issues(s, repo_name)
     if link_ is not None:
         # make href around everything that isn't a href already
         s = linkify_others(s, link_)
     s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
     # Turn HTML5 into more valid HTML4 as required by some mail readers.
     # (This is not done in one step in html_escape, because character codes like
     # &#123; risk to be seen as an issue reference due to the presence of '#'.)
     s = s.replace("&apos;", "&#39;")
     return literal(s)
 def linkify_others(t, l):
     """Add a default link to html with links.
     HTML doesn't allow nesting of links, so the outer link must be broken up
     in pieces and give space for other links.
     """
     urls = re.compile(r'(\<a.*?\<\/a\>)',)
     links = []
     for e in urls.split(t):
         if e.strip() and not urls.match(e):
             links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
         else:
             links.append(e)
     return ''.join(links)
 # Global variable that will hold the actual urlify_issues function body.
 # Will be set on first use when the global configuration has been read.
 _urlify_issues_f = None
 def urlify_issues(newtext, repo_name):
     """Urlify issue references according to .ini configuration"""
     global _urlify_issues_f
     if _urlify_issues_f is None:
         from kallithea import CONFIG
         from kallithea.model.db import URL_SEP
         assert CONFIG['sqlalchemy.url'] # make sure config has been loaded
         # Build chain of urlify functions, starting with not doing any transformation
         tmp_urlify_issues_f = lambda s: s
         issue_pat_re = re.compile(r'issue_pat(.*)')
         for k in CONFIG.keys():
             # Find all issue_pat* settings that also have corresponding server_link and prefix configuration
             m = issue_pat_re.match(k)
             if m is None:
                 continue
             suffix = m.group(1)
             issue_pat = CONFIG.get(k)
             issue_server_link = CONFIG.get('issue_server_link%s' % suffix)
             issue_prefix = CONFIG.get('issue_prefix%s' % suffix)
             if issue_pat and issue_server_link and issue_prefix is not None: # issue_prefix can be empty but should be present
                 log.debug('issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix)
             else:

kallithea/tests/other/test_libs.py

➞

Show inline comments

@@ @@ -336,140 +336,138 @@ class TestLibs(TestController): @@
         fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
         with mock.patch('kallithea.config.routing.url', fake_url):
             from kallithea.lib.helpers import urlify_text
             assert urlify_text(sample, 'repo_name') == expected
     @parametrize('sample,expected,url_', [
       ("",
        "",
        ""),
       ("https://svn.apache.org/repos",
        """url[https://svn.apache.org/repos]""",
        "https://svn.apache.org/repos"),
       ("http://svn.apache.org/repos",
        """url[http://svn.apache.org/repos]""",
        "http://svn.apache.org/repos"),
       ("from rev a also rev http://google.com",
        """from rev a also rev url[http://google.com]""",
        "http://google.com"),
       ("http://imgur.com/foo.gif inline http://imgur.com/foo.gif ending http://imgur.com/foo.gif",
        """url[http://imgur.com/foo.gif] inline url[http://imgur.com/foo.gif] ending url[http://imgur.com/foo.gif]""",
        "http://imgur.com/foo.gif"),
       ("""Multi line
        https://foo.bar.example.com
        some text lalala""",
        """Multi line<br/>"""
        """       url[https://foo.bar.example.com]<br/>"""
        """       some text lalala""",
        "https://foo.bar.example.com"),
       ("@mention @someone",
        """<b>@mention</b> <b>@someone</b>""",
        ""),
       ("deadbeefcafe 123412341234",
        """<a class="changeset_hash" href="/repo_name/changeset/deadbeefcafe">deadbeefcafe</a> <a class="changeset_hash" href="/repo_name/changeset/123412341234">123412341234</a>""",
        ""),
       ("We support * markup for *bold* markup of *single or multiple* words, "
        "*a bit @like http://slack.com*. "
        "The first * must come after whitespace and not be followed by whitespace, "
        "contain anything but * and newline until the next *, "
        "which must not come after whitespace "
        "and not be followed by * or alphanumerical *characters*.",
        """We support * markup for <b>*bold*</b> markup of <b>*single or multiple*</b> words, """
        """<b>*a bit <b>@like</b> <a href="http://slack.com">http://slack.com</a>*</b>. """
        """The first * must come after whitespace and not be followed by whitespace, """
        """contain anything but * and newline until the next *, """
        """which must not come after whitespace """
        """and not be followed by * or alphanumerical <b>*characters*</b>.""",
        "-"),
       ("HTML escaping: <abc> 'single' \"double\" &pointer",
        # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
        """HTML escaping: &lt;abc&gt; &<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>;single&<a class="issue-tracker-link" href="https://issues.example.com/repo_name/issue/39">#39</a>; &quot;double&quot; &amp;pointer""",
        "HTML escaping: &lt;abc&gt; &#39;single&#39; &quot;double&quot; &amp;pointer",
        "-"),
       # tags are covered by test_tag_extractor
     ])
     def test_urlify_test(self, sample, expected, url_):
         expected = self._quick_url(expected,
                                    tmpl="""<a href="%s">%s</a>""", url_=url_)
         fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
         with mock.patch('kallithea.config.routing.url', fake_url):
             from kallithea.lib.helpers import urlify_text
             assert urlify_text(sample, 'repo_name', stylize=True) == expected
     @parametrize('sample,expected', [
       ("deadbeefcafe @mention, and http://foo.bar/ yo",
        """<a class="changeset_hash" href="/repo_name/changeset/deadbeefcafe">deadbeefcafe</a>"""
        """<a class="message-link" href="#the-link"> <b>@mention</b>, and </a>"""
        """<a href="http://foo.bar/">http://foo.bar/</a>"""
        """<a class="message-link" href="#the-link"> yo</a>"""),
     ])
     def test_urlify_link(self, sample, expected):
         fake_url = FakeUrlGenerator(changeset_home='/%(repo_name)s/changeset/%(revision)s')
         with mock.patch('kallithea.config.routing.url', fake_url):
             from kallithea.lib.helpers import urlify_text
             assert urlify_text(sample, 'repo_name', link_='#the-link') == expected
     @parametrize('issue_pat,issue_server,issue_prefix,sample,expected', [
         (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
             'issue #123', 'issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
         (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
             'issue#456', 'issue<a class="issue-tracker-link" href="http://foo/repo_name/issue/456">#456</a>'),
         (r'#(\d+)', 'http://foo/{repo}/issue/{id}', 'PR',
             'interesting issue #123', 'interesting issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">PR123</a>'),
         (r'BUG\d{5}', 'https://bar/{repo}/{id}', 'BUG',
             'silly me, I did not parenthesize the {id}, BUG12345.', 'silly me, I did not parenthesize the {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG</a>.'),
         (r'BUG(\d{5})', 'https://bar/{repo}/', 'BUG',
             'silly me, the URL does not contain {id}, BUG12345.', 'silly me, the URL does not contain {id}, <a class="issue-tracker-link" href="https://bar/repo_name/">BUG12345</a>.'),
         (r'(PR-\d+)', 'http://foo/{repo}/issue/{id}', '',
             'interesting issue #123, err PR-56', 'interesting issue #123, err <a class="issue-tracker-link" href="http://foo/repo_name/issue/PR-56">PR-56</a>'),
         # problem: ' is encoded as &#39; which however is interpreted as #39 and expanded to a issue link
         (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
-            "some 'standard' text with apostrophes", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; text with apostrophes'),
             "some 'standard' text with apostrophes", 'some &#39;standard&#39; text with apostrophes'),
         (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#',
-            "some 'standard' issue #123", 'some &<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>;standard&<a class="issue-tracker-link" href="http://foo/repo_name/issue/39">#39</a>; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
             "some 'standard' issue #123", 'some &#39;standard&#39; issue <a class="issue-tracker-link" href="http://foo/repo_name/issue/123">#123</a>'),
     ])
     def test_urlify_issues(self, issue_pat, issue_server, issue_prefix, sample, expected):
         from kallithea.lib.helpers import urlify_text
         config_stub = {
             'sqlalchemy.url': 'foo',
             'issue_pat': issue_pat,
             'issue_server_link': issue_server,
             'issue_prefix': issue_prefix,
+        }
         # force recreation of lazy function
         with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
             with mock.patch('kallithea.CONFIG', config_stub):
                 assert urlify_text(sample, 'repo_name') == expected
     @parametrize('sample,expected', [
         ('abc X5', 'abc <a class="issue-tracker-link" href="http://main/repo_name/main/5/">#5</a>'),
         ('abc pullrequest #6 xyz', 'abc <a class="issue-tracker-link" href="http://pr/repo_name/pr/6">PR#6</a> xyz'),
         ('pull request7 #', '<a class="issue-tracker-link" href="http://pr/repo_name/pr/7">PR#7</a> #'),
         ('look PR9 and pr #11', 'look <a class="issue-tracker-link" href="http://pr/repo_name/pr/9">PR#9</a> and <a class="issue-tracker-link" href="http://pr/repo_name/pr/11">PR#11</a>'),
         ('pullrequest#10 solves issue 9', '<a class="issue-tracker-link" href="http://pr/repo_name/pr/10">PR#10</a> solves <a class="issue-tracker-link" href="http://bug/repo_name/bug/9">bug#9</a>'),
         ('issue FAIL67', 'issue <a class="issue-tracker-link" href="http://fail/repo_name/67">67</a>'),
         ('issue FAILMORE89', 'issue FAILMORE89'), # no match because absent prefix
     ])
     def test_urlify_issues_multiple_issue_patterns(self, sample, expected):
         from kallithea.lib.helpers import urlify_text
         config_stub = {
             'sqlalchemy.url': 'foo',
             'issue_pat': 'X(\d+)',
             'issue_server_link': 'http://main/{repo}/main/{id}/',
             'issue_prefix': '#',
             'issue_pat_pr': '(?:pullrequest|pull request|PR|pr) ?#?(\d+)',
             'issue_server_link_pr': 'http://pr/{repo}/pr/{id}',
             'issue_prefix_pr': 'PR#',
             'issue_pat_bug': '(?:BUG|bug|issue) ?#?(\d+)',
             'issue_server_link_bug': 'http://bug/{repo}/bug/{id}',
             'issue_prefix_bug': 'bug#',
             'issue_pat_empty_prefix': 'FAIL(\d+)',
             'issue_server_link_empty_prefix': 'http://fail/{repo}/{id}',
             'issue_prefix_empty_prefix': '',
             'issue_pat_absent_prefix': 'FAILMORE(\d+)',
             'issue_server_link_absent_prefix': 'http://failmore/{repo}/{id}',
+        }
         # force recreation of lazy function
         with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):
             with mock.patch('kallithea.CONFIG', config_stub):
                 assert urlify_text(sample, 'repo_name') == expected
     @parametrize('test,expected', [

0 comments (0 inline, 0 general)