kallithea Changeset - 563708f2275d

Changeset - 563708f2275d

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Mads Kiilerich - 6 years ago 2019-11-29 11:47:32
mads@kiilerich.com

Grafted from: 3ce63212133f

tests: add some doctests for urlify and markup

2 files changed with 6 insertions and 0 deletions:

kallithea/lib/helpers.py

kallithea/lib/markup_renderer.py

0 comments (0 inline, 0 general)

kallithea/lib/helpers.py

➞

Show inline comments

@@ @@ -969,192 +969,194 @@ def fancy_file_stats(stats): @@
         MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE
     a, d = stats['added'], stats['deleted']
     width = 100
     if stats['binary']:
         # binary mode
         lbl = ''
         bin_op = 1
         if BIN_FILENODE in stats['ops']:
             lbl = 'bin+'
         if NEW_FILENODE in stats['ops']:
             lbl += _('new file')
             bin_op = NEW_FILENODE
         elif MOD_FILENODE in stats['ops']:
             lbl += _('mod')
             bin_op = MOD_FILENODE
         elif DEL_FILENODE in stats['ops']:
             lbl += _('del')
             bin_op = DEL_FILENODE
         elif RENAMED_FILENODE in stats['ops']:
             lbl += _('rename')
             bin_op = RENAMED_FILENODE
         # chmod can go with other operations
         if CHMOD_FILENODE in stats['ops']:
             _org_lbl = _('chmod')
             lbl += _org_lbl if lbl.endswith('+') else '+%s' % _org_lbl
         #import ipdb;ipdb.set_trace()
         b_d = '<div class="bin bin%s progress-bar" style="width:100%%">%s</div>' % (bin_op, lbl)
         b_a = '<div class="bin bin1" style="width:0%"></div>'
         return literal('<div style="width:%spx" class="progress">%s%s</div>' % (width, b_a, b_d))
     t = stats['added'] + stats['deleted']
     unit = float(width) / (t or 1)
     # needs > 9% of width to be visible or 0 to be hidden
     a_p = max(9, unit * a) if a > 0 else 0
     d_p = max(9, unit * d) if d > 0 else 0
     p_sum = a_p + d_p
     if p_sum > width:
         # adjust the percentage to be == 100% since we adjusted to 9
         if a_p > d_p:
             a_p = a_p - (p_sum - width)
         else:
             d_p = d_p - (p_sum - width)
     a_v = a if a > 0 else ''
     d_v = d if d > 0 else ''
     d_a = '<div class="added progress-bar" style="width:%s%%">%s</div>' % (
         a_p, a_v
+    )
     d_d = '<div class="deleted progress-bar" style="width:%s%%">%s</div>' % (
         d_p, d_v
+    )
     return literal('<div class="progress" style="width:%spx">%s%s</div>' % (width, d_a, d_d))
 _URLIFY_RE = re.compile(r'''
 # URL markup
 (?P<url>%s) |
 # @mention markup
 (?P<mention>%s) |
 # Changeset hash markup
 (?<!\w|[-_])
   (?P<hash>[0-9a-f]{12,40})
 (?!\w|[-_]) |
 # Markup of *bold text*
 (?:
   (?:^|(?<=\s))
   (?P<bold> [*] (?!\s) [^*\n]* (?<!\s) [*] )
   (?![*\w])
 ) |
 # "Stylize" markup
 \[see\ \=&gt;\ *(?P<seen>[a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\] |
 \[license\ \=&gt;\ *(?P<license>[a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\] |
 \[(?P<tagtype>requires|recommends|conflicts|base)\ \=&gt;\ *(?P<tagvalue>[a-zA-Z0-9\-\/]*)\] |
 \[(?:lang|language)\ \=&gt;\ *(?P<lang>[a-zA-Z\-\/\#\+]*)\] |
 \[(?P<tag>[a-z]+)\]
 ''' % (url_re.pattern, MENTIONS_REGEX.pattern),
     re.VERBOSE | re.MULTILINE | re.IGNORECASE)
 def urlify_text(s, repo_name=None, link_=None, truncate=None, stylize=False, truncatef=truncate):
     """
     Parses given text message and make literal html with markup.
     The text will be truncated to the specified length.
     Hashes are turned into changeset links to specified repository.
     URLs links to what they say.
     Issues are linked to given issue-server.
     If link_ is provided, all text not already linking somewhere will link there.
     >>> urlify_text("Urlify http://example.com/ and 'https://example.com' *and* <b>markup/b>")
     literal('Urlify <a href="http://example.com/">http://example.com/</a> and &#39;<a href="https://example.com&apos">https://example.com&apos</a>; <b>*and*</b> &lt;b&gt;markup/b&gt;')
     """
     def _replace(match_obj):
         url = match_obj.group('url')
         if url is not None:
             return '<a href="%(url)s">%(url)s</a>' % {'url': url}
         mention = match_obj.group('mention')
         if mention is not None:
             return '<b>%s</b>' % mention
         hash_ = match_obj.group('hash')
         if hash_ is not None and repo_name is not None:
             from kallithea.config.routing import url  # doh, we need to re-import url to mock it later
             return '<a class="changeset_hash" href="%(url)s">%(hash)s</a>' % {
                  'url': url('changeset_home', repo_name=repo_name, revision=hash_),
                  'hash': hash_,
+                }
         bold = match_obj.group('bold')
         if bold is not None:
             return '<b>*%s*</b>' % _urlify(bold[1:-1])
         if stylize:
             seen = match_obj.group('seen')
             if seen:
                 return '<div class="label label-meta" data-tag="see">see =&gt; %s</div>' % seen
             license = match_obj.group('license')
             if license:
                 return '<div class="label label-meta" data-tag="license"><a href="http://www.opensource.org/licenses/%s">%s</a></div>' % (license, license)
             tagtype = match_obj.group('tagtype')
             if tagtype:
                 tagvalue = match_obj.group('tagvalue')
                 return '<div class="label label-meta" data-tag="%s">%s =&gt; <a href="/%s">%s</a></div>' % (tagtype, tagtype, tagvalue, tagvalue)
             lang = match_obj.group('lang')
             if lang:
                 return '<div class="label label-meta" data-tag="lang">%s</div>' % lang
             tag = match_obj.group('tag')
             if tag:
                 return '<div class="label label-meta" data-tag="%s">%s</div>' % (tag, tag)
         return match_obj.group(0)
     def _urlify(s):
         """
         Extract urls from text and make html links out of them
         """
         return _URLIFY_RE.sub(_replace, s)
     if truncate is None:
         s = s.rstrip()
     else:
         s = truncatef(s, truncate, whole_word=True)
     s = html_escape(s)
     s = _urlify(s)
     if repo_name is not None:
         s = urlify_issues(s, repo_name)
     if link_ is not None:
         # make href around everything that isn't a href already
         s = linkify_others(s, link_)
     s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
     # Turn HTML5 into more valid HTML4 as required by some mail readers.
     # (This is not done in one step in html_escape, because character codes like
     # &#123; risk to be seen as an issue reference due to the presence of '#'.)
     s = s.replace("&apos;", "&#39;")
     return literal(s)
 def linkify_others(t, l):
     """Add a default link to html with links.
     HTML doesn't allow nesting of links, so the outer link must be broken up
     in pieces and give space for other links.
     """
     urls = re.compile(r'(\<a.*?\<\/a\>)',)
     links = []
     for e in urls.split(t):
         if e.strip() and not urls.match(e):
             links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
         else:
             links.append(e)
     return ''.join(links)
 # Global variable that will hold the actual urlify_issues function body.
 # Will be set on first use when the global configuration has been read.
 _urlify_issues_f = None
 def urlify_issues(newtext, repo_name):
     """Urlify issue references according to .ini configuration"""
     global _urlify_issues_f
     if _urlify_issues_f is None:
         from kallithea import CONFIG
         from kallithea.model.db import URL_SEP
         assert CONFIG['sqlalchemy.url'] # make sure config has been loaded
         # Build chain of urlify functions, starting with not doing any transformation
         tmp_urlify_issues_f = lambda s: s
         issue_pat_re = re.compile(r'issue_pat(.*)')

kallithea/lib/markup_renderer.py

➞

Show inline comments

@@ @@ -57,192 +57,196 @@ class MarkupRenderer(object): @@
         from a markup language
         filename can be also explicitly a renderer name
         """
         if cls.MARKDOWN_PAT.findall(filename):
             return cls.markdown
         elif cls.RST_PAT.findall(filename):
             return cls.rst
         elif cls.PLAIN_PAT.findall(filename):
             return cls.rst
         return cls.plain
     @classmethod
     def _flavored_markdown(cls, text):
         """
         Github style flavored markdown
         :param text:
         """
         from hashlib import md5
         # Extract pre blocks.
         extractions = {}
         def pre_extraction_callback(matchobj):
             digest = md5(matchobj.group(0)).hexdigest()
             extractions[digest] = matchobj.group(0)
             return "{gfm-extraction-%s}" % digest
         pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
         text = re.sub(pattern, pre_extraction_callback, text)
         # Prevent foo_bar_baz from ending up with an italic word in the middle.
         def italic_callback(matchobj):
             s = matchobj.group(0)
             if list(s).count('_') >= 2:
                 return s.replace('_', r'\_')
             return s
         text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
         # In very clear cases, let newlines become <br /> tags.
         def newline_callback(matchobj):
             if len(matchobj.group(1)) == 1:
                 return matchobj.group(0).rstrip() + '  \n'
             else:
                 return matchobj.group(0)
         pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
         text = re.sub(pattern, newline_callback, text)
         # Insert pre block extractions.
         def pre_insert_callback(matchobj):
             return '\n\n' + extractions[matchobj.group(1)]
         text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
                       pre_insert_callback, text)
         return text
     @classmethod
     def render(cls, source, filename=None):
         """
         Renders a given filename using detected renderer
         it detects renderers based on file extension or mimetype.
         At last it will just do a simple html replacing new lines with <br/>
         >>> MarkupRenderer.render('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''', '.md')
         '<p><img id="a" src="http://example.com/test.jpg" style="color: red;"></p>'
         >>> MarkupRenderer.render('''<img class="c d" src="file://localhost/test.jpg">''', 'b.mkd')
         '<p><img class="c d"></p>'
         >>> MarkupRenderer.render('''<a href="foo">foo</a>''', 'c.mkdn')
         '<p><a href="foo">foo</a></p>'
         >>> MarkupRenderer.render('''<script>alert(1)</script>''', 'd.mdown')
         '&lt;script&gt;alert(1)&lt;/script&gt;'
         >>> MarkupRenderer.render('''<div onclick="alert(2)">yo</div>''', 'markdown')
         '<div>yo</div>'
         >>> MarkupRenderer.render('''<a href="javascript:alert(3)">yo</a>''', 'md')
         '<p><a>yo</a></p>'
         """
         renderer = cls._detect_renderer(source, filename)
         readme_data = renderer(source)
         # Allow most HTML, while preventing XSS issues:
         # no <script> tags, no onclick attributes, no javascript
         # "protocol", and also limit styling to prevent defacing.
         return bleach.clean(readme_data,
             tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
                   'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
                   'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
                   'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
                   'thead', 'tr', 'ul'],
             attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
             styles=['color'],
             protocols=['http', 'https', 'mailto'],
+            )
     @classmethod
     def plain(cls, source, universal_newline=True):
         """
         >>> MarkupRenderer.plain('https://example.com/')
         '<br /><a href="https://example.com/">https://example.com/</a>'
         """
         source = safe_str(source)
         if universal_newline:
             newline = '\n'
             source = newline.join(source.splitlines())
         def url_func(match_obj):
             url_full = match_obj.group(0)
             return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
         source = url_re.sub(url_func, source)
         return '<br />' + source.replace("\n", '<br />')
     @classmethod
     def markdown(cls, source, safe=True, flavored=False):
         """
         Convert Markdown (possibly GitHub Flavored) to INSECURE HTML, possibly
         with "safe" fall-back to plaintext. Output from this method should be sanitized before use.
         >>> MarkupRenderer.markdown('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''')
         '<p><img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg"></p>'
         >>> MarkupRenderer.markdown('''<img class="c d" src="file://localhost/test.jpg">''')
         '<p><img class="c d" src="file://localhost/test.jpg"></p>'
         >>> MarkupRenderer.markdown('''<a href="foo">foo</a>''')
         '<p><a href="foo">foo</a></p>'
         >>> MarkupRenderer.markdown('''<script>alert(1)</script>''')
         '<script>alert(1)</script>'
         >>> MarkupRenderer.markdown('''<div onclick="alert(2)">yo</div>''')
         '<div onclick="alert(2)">yo</div>'
         >>> MarkupRenderer.markdown('''<a href="javascript:alert(3)">yo</a>''')
         '<p><a href="javascript:alert(3)">yo</a></p>'
         >>> MarkupRenderer.markdown('''## Foo''')
         '<h2>Foo</h2>'
         >>> print(MarkupRenderer.markdown('''
         ...     #!/bin/bash
         ...     echo "hello"
         ... '''))
         <table class="code-highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1
 </pre></div></td><td class="code"><div class="code-highlight"><pre><span></span><span class="ch">#!/bin/bash</span>
         <span class="nb">echo</span> <span class="s2">&quot;hello&quot;</span>
         </pre></div>
         </td></tr></table>
         """
         source = safe_str(source)
         try:
             if flavored:
                 source = cls._flavored_markdown(source)
             return markdown_mod.markdown(
                 source,
                 extensions=['markdown.extensions.codehilite', 'markdown.extensions.extra'],
                 extension_configs={'markdown.extensions.codehilite': {'css_class': 'code-highlight'}})
         except Exception:
             log.error(traceback.format_exc())
             if safe:
                 log.debug('Falling back to render in plain mode')
                 return cls.plain(source)
             else:
                 raise
     @classmethod
     def rst(cls, source, safe=True):
         source = safe_str(source)
         try:
             from docutils.core import publish_parts
             from docutils.parsers.rst import directives
             docutils_settings = dict([(alias, None) for alias in
                                 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
             docutils_settings.update({'input_encoding': 'unicode',
                                       'report_level': 4})
             for k, v in docutils_settings.items():
                 directives.register_directive(k, v)
             parts = publish_parts(source=source,
                                   writer_name="html4css1",
                                   settings_overrides=docutils_settings)
             return parts['html_title'] + parts["fragment"]
         except ImportError:
             log.warning('Install docutils to use this function')
             return cls.plain(source)
         except Exception:
             log.error(traceback.format_exc())
             if safe:
                 log.debug('Falling back to render in plain mode')
                 return cls.plain(source)
             else:
                 raise
     @classmethod
     def rst_with_mentions(cls, source):
         def wrapp(match_obj):
             uname = match_obj.groups()[0]
             return r'\ **@%(uname)s**\ ' % {'uname': uname}
         mention_hl = MENTIONS_REGEX.sub(wrapp, source).strip()
         return cls.rst(mention_hl)

0 comments (0 inline, 0 general)