Changeset - 563708f2275d
[Not reviewed]
default
0 2 0
Mads Kiilerich - 6 years ago 2019-11-29 11:47:32
mads@kiilerich.com
Grafted from: 3ce63212133f
tests: add some doctests for urlify and markup
2 files changed with 6 insertions and 0 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -969,192 +969,194 @@ def fancy_file_stats(stats):
 
        MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE
 

	
 
    a, d = stats['added'], stats['deleted']
 
    width = 100
 

	
 
    if stats['binary']:
 
        # binary mode
 
        lbl = ''
 
        bin_op = 1
 

	
 
        if BIN_FILENODE in stats['ops']:
 
            lbl = 'bin+'
 

	
 
        if NEW_FILENODE in stats['ops']:
 
            lbl += _('new file')
 
            bin_op = NEW_FILENODE
 
        elif MOD_FILENODE in stats['ops']:
 
            lbl += _('mod')
 
            bin_op = MOD_FILENODE
 
        elif DEL_FILENODE in stats['ops']:
 
            lbl += _('del')
 
            bin_op = DEL_FILENODE
 
        elif RENAMED_FILENODE in stats['ops']:
 
            lbl += _('rename')
 
            bin_op = RENAMED_FILENODE
 

	
 
        # chmod can go with other operations
 
        if CHMOD_FILENODE in stats['ops']:
 
            _org_lbl = _('chmod')
 
            lbl += _org_lbl if lbl.endswith('+') else '+%s' % _org_lbl
 

	
 
        #import ipdb;ipdb.set_trace()
 
        b_d = '<div class="bin bin%s progress-bar" style="width:100%%">%s</div>' % (bin_op, lbl)
 
        b_a = '<div class="bin bin1" style="width:0%"></div>'
 
        return literal('<div style="width:%spx" class="progress">%s%s</div>' % (width, b_a, b_d))
 

	
 
    t = stats['added'] + stats['deleted']
 
    unit = float(width) / (t or 1)
 

	
 
    # needs > 9% of width to be visible or 0 to be hidden
 
    a_p = max(9, unit * a) if a > 0 else 0
 
    d_p = max(9, unit * d) if d > 0 else 0
 
    p_sum = a_p + d_p
 

	
 
    if p_sum > width:
 
        # adjust the percentage to be == 100% since we adjusted to 9
 
        if a_p > d_p:
 
            a_p = a_p - (p_sum - width)
 
        else:
 
            d_p = d_p - (p_sum - width)
 

	
 
    a_v = a if a > 0 else ''
 
    d_v = d if d > 0 else ''
 

	
 
    d_a = '<div class="added progress-bar" style="width:%s%%">%s</div>' % (
 
        a_p, a_v
 
    )
 
    d_d = '<div class="deleted progress-bar" style="width:%s%%">%s</div>' % (
 
        d_p, d_v
 
    )
 
    return literal('<div class="progress" style="width:%spx">%s%s</div>' % (width, d_a, d_d))
 

	
 

	
 
_URLIFY_RE = re.compile(r'''
 
# URL markup
 
(?P<url>%s) |
 
# @mention markup
 
(?P<mention>%s) |
 
# Changeset hash markup
 
(?<!\w|[-_])
 
  (?P<hash>[0-9a-f]{12,40})
 
(?!\w|[-_]) |
 
# Markup of *bold text*
 
(?:
 
  (?:^|(?<=\s))
 
  (?P<bold> [*] (?!\s) [^*\n]* (?<!\s) [*] )
 
  (?![*\w])
 
) |
 
# "Stylize" markup
 
\[see\ \=&gt;\ *(?P<seen>[a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\] |
 
\[license\ \=&gt;\ *(?P<license>[a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\] |
 
\[(?P<tagtype>requires|recommends|conflicts|base)\ \=&gt;\ *(?P<tagvalue>[a-zA-Z0-9\-\/]*)\] |
 
\[(?:lang|language)\ \=&gt;\ *(?P<lang>[a-zA-Z\-\/\#\+]*)\] |
 
\[(?P<tag>[a-z]+)\]
 
''' % (url_re.pattern, MENTIONS_REGEX.pattern),
 
    re.VERBOSE | re.MULTILINE | re.IGNORECASE)
 

	
 

	
 
def urlify_text(s, repo_name=None, link_=None, truncate=None, stylize=False, truncatef=truncate):
 
    """
 
    Parses given text message and make literal html with markup.
 
    The text will be truncated to the specified length.
 
    Hashes are turned into changeset links to specified repository.
 
    URLs links to what they say.
 
    Issues are linked to given issue-server.
 
    If link_ is provided, all text not already linking somewhere will link there.
 
    >>> urlify_text("Urlify http://example.com/ and 'https://example.com' *and* <b>markup/b>")
 
    literal('Urlify <a href="http://example.com/">http://example.com/</a> and &#39;<a href="https://example.com&apos">https://example.com&apos</a>; <b>*and*</b> &lt;b&gt;markup/b&gt;')
 
    """
 

	
 
    def _replace(match_obj):
 
        url = match_obj.group('url')
 
        if url is not None:
 
            return '<a href="%(url)s">%(url)s</a>' % {'url': url}
 
        mention = match_obj.group('mention')
 
        if mention is not None:
 
            return '<b>%s</b>' % mention
 
        hash_ = match_obj.group('hash')
 
        if hash_ is not None and repo_name is not None:
 
            from kallithea.config.routing import url  # doh, we need to re-import url to mock it later
 
            return '<a class="changeset_hash" href="%(url)s">%(hash)s</a>' % {
 
                 'url': url('changeset_home', repo_name=repo_name, revision=hash_),
 
                 'hash': hash_,
 
                }
 
        bold = match_obj.group('bold')
 
        if bold is not None:
 
            return '<b>*%s*</b>' % _urlify(bold[1:-1])
 
        if stylize:
 
            seen = match_obj.group('seen')
 
            if seen:
 
                return '<div class="label label-meta" data-tag="see">see =&gt; %s</div>' % seen
 
            license = match_obj.group('license')
 
            if license:
 
                return '<div class="label label-meta" data-tag="license"><a href="http://www.opensource.org/licenses/%s">%s</a></div>' % (license, license)
 
            tagtype = match_obj.group('tagtype')
 
            if tagtype:
 
                tagvalue = match_obj.group('tagvalue')
 
                return '<div class="label label-meta" data-tag="%s">%s =&gt; <a href="/%s">%s</a></div>' % (tagtype, tagtype, tagvalue, tagvalue)
 
            lang = match_obj.group('lang')
 
            if lang:
 
                return '<div class="label label-meta" data-tag="lang">%s</div>' % lang
 
            tag = match_obj.group('tag')
 
            if tag:
 
                return '<div class="label label-meta" data-tag="%s">%s</div>' % (tag, tag)
 
        return match_obj.group(0)
 

	
 
    def _urlify(s):
 
        """
 
        Extract urls from text and make html links out of them
 
        """
 
        return _URLIFY_RE.sub(_replace, s)
 

	
 
    if truncate is None:
 
        s = s.rstrip()
 
    else:
 
        s = truncatef(s, truncate, whole_word=True)
 
    s = html_escape(s)
 
    s = _urlify(s)
 
    if repo_name is not None:
 
        s = urlify_issues(s, repo_name)
 
    if link_ is not None:
 
        # make href around everything that isn't a href already
 
        s = linkify_others(s, link_)
 
    s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
 
    # Turn HTML5 into more valid HTML4 as required by some mail readers.
 
    # (This is not done in one step in html_escape, because character codes like
 
    # &#123; risk to be seen as an issue reference due to the presence of '#'.)
 
    s = s.replace("&apos;", "&#39;")
 
    return literal(s)
 

	
 

	
 
def linkify_others(t, l):
 
    """Add a default link to html with links.
 
    HTML doesn't allow nesting of links, so the outer link must be broken up
 
    in pieces and give space for other links.
 
    """
 
    urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
    links = []
 
    for e in urls.split(t):
 
        if e.strip() and not urls.match(e):
 
            links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
 
        else:
 
            links.append(e)
 

	
 
    return ''.join(links)
 

	
 

	
 
# Global variable that will hold the actual urlify_issues function body.
 
# Will be set on first use when the global configuration has been read.
 
_urlify_issues_f = None
 

	
 

	
 
def urlify_issues(newtext, repo_name):
 
    """Urlify issue references according to .ini configuration"""
 
    global _urlify_issues_f
 
    if _urlify_issues_f is None:
 
        from kallithea import CONFIG
 
        from kallithea.model.db import URL_SEP
 
        assert CONFIG['sqlalchemy.url'] # make sure config has been loaded
 

	
 
        # Build chain of urlify functions, starting with not doing any transformation
 
        tmp_urlify_issues_f = lambda s: s
 

	
 
        issue_pat_re = re.compile(r'issue_pat(.*)')
kallithea/lib/markup_renderer.py
Show inline comments
 
@@ -57,192 +57,196 @@ class MarkupRenderer(object):
 
        from a markup language
 

	
 
        filename can be also explicitly a renderer name
 
        """
 

	
 
        if cls.MARKDOWN_PAT.findall(filename):
 
            return cls.markdown
 
        elif cls.RST_PAT.findall(filename):
 
            return cls.rst
 
        elif cls.PLAIN_PAT.findall(filename):
 
            return cls.rst
 
        return cls.plain
 

	
 
    @classmethod
 
    def _flavored_markdown(cls, text):
 
        """
 
        Github style flavored markdown
 

	
 
        :param text:
 
        """
 
        from hashlib import md5
 

	
 
        # Extract pre blocks.
 
        extractions = {}
 

	
 
        def pre_extraction_callback(matchobj):
 
            digest = md5(matchobj.group(0)).hexdigest()
 
            extractions[digest] = matchobj.group(0)
 
            return "{gfm-extraction-%s}" % digest
 
        pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
 
        text = re.sub(pattern, pre_extraction_callback, text)
 

	
 
        # Prevent foo_bar_baz from ending up with an italic word in the middle.
 
        def italic_callback(matchobj):
 
            s = matchobj.group(0)
 
            if list(s).count('_') >= 2:
 
                return s.replace('_', r'\_')
 
            return s
 
        text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
 

	
 
        # In very clear cases, let newlines become <br /> tags.
 
        def newline_callback(matchobj):
 
            if len(matchobj.group(1)) == 1:
 
                return matchobj.group(0).rstrip() + '  \n'
 
            else:
 
                return matchobj.group(0)
 
        pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
 
        text = re.sub(pattern, newline_callback, text)
 

	
 
        # Insert pre block extractions.
 
        def pre_insert_callback(matchobj):
 
            return '\n\n' + extractions[matchobj.group(1)]
 
        text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
 
                      pre_insert_callback, text)
 

	
 
        return text
 

	
 
    @classmethod
 
    def render(cls, source, filename=None):
 
        """
 
        Renders a given filename using detected renderer
 
        it detects renderers based on file extension or mimetype.
 
        At last it will just do a simple html replacing new lines with <br/>
 

	
 
        >>> MarkupRenderer.render('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''', '.md')
 
        '<p><img id="a" src="http://example.com/test.jpg" style="color: red;"></p>'
 
        >>> MarkupRenderer.render('''<img class="c d" src="file://localhost/test.jpg">''', 'b.mkd')
 
        '<p><img class="c d"></p>'
 
        >>> MarkupRenderer.render('''<a href="foo">foo</a>''', 'c.mkdn')
 
        '<p><a href="foo">foo</a></p>'
 
        >>> MarkupRenderer.render('''<script>alert(1)</script>''', 'd.mdown')
 
        '&lt;script&gt;alert(1)&lt;/script&gt;'
 
        >>> MarkupRenderer.render('''<div onclick="alert(2)">yo</div>''', 'markdown')
 
        '<div>yo</div>'
 
        >>> MarkupRenderer.render('''<a href="javascript:alert(3)">yo</a>''', 'md')
 
        '<p><a>yo</a></p>'
 
        """
 

	
 
        renderer = cls._detect_renderer(source, filename)
 
        readme_data = renderer(source)
 
        # Allow most HTML, while preventing XSS issues:
 
        # no <script> tags, no onclick attributes, no javascript
 
        # "protocol", and also limit styling to prevent defacing.
 
        return bleach.clean(readme_data,
 
            tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
 
                  'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
 
                  'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
 
                  'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
 
                  'thead', 'tr', 'ul'],
 
            attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
 
            styles=['color'],
 
            protocols=['http', 'https', 'mailto'],
 
            )
 

	
 
    @classmethod
 
    def plain(cls, source, universal_newline=True):
 
        """
 
        >>> MarkupRenderer.plain('https://example.com/')
 
        '<br /><a href="https://example.com/">https://example.com/</a>'
 
        """
 
        source = safe_str(source)
 
        if universal_newline:
 
            newline = '\n'
 
            source = newline.join(source.splitlines())
 

	
 
        def url_func(match_obj):
 
            url_full = match_obj.group(0)
 
            return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 
        source = url_re.sub(url_func, source)
 
        return '<br />' + source.replace("\n", '<br />')
 

	
 
    @classmethod
 
    def markdown(cls, source, safe=True, flavored=False):
 
        """
 
        Convert Markdown (possibly GitHub Flavored) to INSECURE HTML, possibly
 
        with "safe" fall-back to plaintext. Output from this method should be sanitized before use.
 

	
 
        >>> MarkupRenderer.markdown('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''')
 
        '<p><img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg"></p>'
 
        >>> MarkupRenderer.markdown('''<img class="c d" src="file://localhost/test.jpg">''')
 
        '<p><img class="c d" src="file://localhost/test.jpg"></p>'
 
        >>> MarkupRenderer.markdown('''<a href="foo">foo</a>''')
 
        '<p><a href="foo">foo</a></p>'
 
        >>> MarkupRenderer.markdown('''<script>alert(1)</script>''')
 
        '<script>alert(1)</script>'
 
        >>> MarkupRenderer.markdown('''<div onclick="alert(2)">yo</div>''')
 
        '<div onclick="alert(2)">yo</div>'
 
        >>> MarkupRenderer.markdown('''<a href="javascript:alert(3)">yo</a>''')
 
        '<p><a href="javascript:alert(3)">yo</a></p>'
 
        >>> MarkupRenderer.markdown('''## Foo''')
 
        '<h2>Foo</h2>'
 
        >>> print(MarkupRenderer.markdown('''
 
        ...     #!/bin/bash
 
        ...     echo "hello"
 
        ... '''))
 
        <table class="code-highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1
 
        2</pre></div></td><td class="code"><div class="code-highlight"><pre><span></span><span class="ch">#!/bin/bash</span>
 
        <span class="nb">echo</span> <span class="s2">&quot;hello&quot;</span>
 
        </pre></div>
 
        </td></tr></table>
 
        """
 
        source = safe_str(source)
 
        try:
 
            if flavored:
 
                source = cls._flavored_markdown(source)
 
            return markdown_mod.markdown(
 
                source,
 
                extensions=['markdown.extensions.codehilite', 'markdown.extensions.extra'],
 
                extension_configs={'markdown.extensions.codehilite': {'css_class': 'code-highlight'}})
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Falling back to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst(cls, source, safe=True):
 
        source = safe_str(source)
 
        try:
 
            from docutils.core import publish_parts
 
            from docutils.parsers.rst import directives
 
            docutils_settings = dict([(alias, None) for alias in
 
                                cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
 

	
 
            docutils_settings.update({'input_encoding': 'unicode',
 
                                      'report_level': 4})
 

	
 
            for k, v in docutils_settings.items():
 
                directives.register_directive(k, v)
 

	
 
            parts = publish_parts(source=source,
 
                                  writer_name="html4css1",
 
                                  settings_overrides=docutils_settings)
 

	
 
            return parts['html_title'] + parts["fragment"]
 
        except ImportError:
 
            log.warning('Install docutils to use this function')
 
            return cls.plain(source)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Falling back to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst_with_mentions(cls, source):
 

	
 
        def wrapp(match_obj):
 
            uname = match_obj.groups()[0]
 
            return r'\ **@%(uname)s**\ ' % {'uname': uname}
 
        mention_hl = MENTIONS_REGEX.sub(wrapp, source).strip()
 
        return cls.rst(mention_hl)
0 comments (0 inline, 0 general)