Changeset - 5746cc3b3fa5
[Not reviewed]
stable
0 2 0
Mads Kiilerich - 7 years ago 2018-10-21 17:44:06
mads@kiilerich.com
Grafted from: 35cfc37c3c9b
lib: use bleach to sanitize HTML generated from markdown - fix XSS issue when repo front page shows README.md

Reported by Bob Hogg <wombat@rwhogg.site> .
2 files changed with 21 insertions and 7 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/markup_renderer.py
Show inline comments
 
@@ -22,24 +22,25 @@ Original author and date, and relevant c
 
:created_on: Oct 27, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import re
 
import logging
 
import traceback
 

	
 
import markdown as markdown_mod
 
import bleach
 

	
 
from kallithea.lib.utils2 import safe_unicode, MENTIONS_REGEX
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
url_re = re.compile(r'''(\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
 
                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:;?!*()-]*[/a-zA-Z0-9_=@#~])?)''')
 

	
 
class MarkupRenderer(object):
 
    RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
 

	
 
@@ -133,46 +134,58 @@ class MarkupRenderer(object):
 
            newline = '\n'
 
            source = newline.join(source.splitlines())
 

	
 
        def url_func(match_obj):
 
            url_full = match_obj.groups()[0]
 
            return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 
        source = url_re.sub(url_func, source)
 
        return '<br />' + source.replace("\n", '<br />')
 

	
 
    @classmethod
 
    def markdown(cls, source, safe=True, flavored=False):
 
        """
 
        Convert Markdown (possibly GitHub Flavored) to HTML, possibly
 
        Convert Markdown (possibly GitHub Flavored) to XSS safe HTML, possibly
 
        with "safe" fall-back to plaintext.
 

	
 
        >>> MarkupRenderer.markdown('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''')
 
        u'<p><img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg"></p>'
 
        u'<p><img id="a" src="http://example.com/test.jpg" style="color: red;"></p>'
 
        >>> MarkupRenderer.markdown('''<img class="c d" src="file://localhost/test.jpg">''')
 
        u'<p><img class="c d" src="file://localhost/test.jpg"></p>'
 
        u'<p><img class="c d"></p>'
 
        >>> MarkupRenderer.markdown('''<a href="foo">foo</a>''')
 
        u'<p><a href="foo">foo</a></p>'
 
        >>> MarkupRenderer.markdown('''<script>alert(1)</script>''')
 
        u'<script>alert(1)</script>'
 
        u'&lt;script&gt;alert(1)&lt;/script&gt;'
 
        >>> MarkupRenderer.markdown('''<div onclick="alert(2)">yo</div>''')
 
        u'<div onclick="alert(2)">yo</div>'
 
        u'<div>yo</div>'
 
        >>> MarkupRenderer.markdown('''<a href="javascript:alert(3)">yo</a>''')
 
        u'<p><a href="javascript:alert(3)">yo</a></p>'
 
        u'<p><a>yo</a></p>'
 
        """
 
        source = safe_unicode(source)
 
        try:
 
            if flavored:
 
                source = cls._flavored_markdown(source)
 
            markdown_html = markdown_mod.markdown(source, ['codehilite', 'extra'])
 
            return markdown_html
 
            # Allow most HTML, while preventing XSS issues:
 
            # no <script> tags, no onclick attributes, no javascript
 
            # "protocol", and also limit styling to prevent defacing.
 
            return bleach.clean(markdown_html,
 
                tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
 
                      'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
 
                      'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
 
                      'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
 
                      'thead', 'tr', 'ul'],
 
                attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
 
                styles=['color'],
 
                protocols=['http', 'https', 'mailto'],
 
                )
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Falling back to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst(cls, source, safe=True):
 
        source = safe_unicode(source)
 
        try:
setup.py
Show inline comments
 
@@ -48,24 +48,25 @@ requirements = [
 
    "pygments>=1.5",
 
    "whoosh>=2.4.0,<=2.5.7",
 
    "celery>=2.2.5,<2.3",
 
    "babel>=0.9.6,<=1.3",
 
    "python-dateutil>=1.5.0,<2.0.0",
 
    "markdown==2.2.1",
 
    "docutils>=0.8.1,<=0.11",
 
    "mock",
 
    "URLObject==2.3.4",
 
    "Routes==1.13",
 
    "dulwich>=0.9.9,<=0.9.9",
 
    "mercurial>=2.9,<4.3",
 
    "bleach >= 3.0, < 3.1",
 
]
 

	
 
if sys.version_info < (2, 7):
 
    requirements.append("importlib==1.0.1")
 
    requirements.append("unittest2")
 
    requirements.append("argparse")
 

	
 
if not is_windows:
 
    requirements.append("py-bcrypt>=0.3.0,<=0.4")
 

	
 

	
 
dependency_links = [
0 comments (0 inline, 0 general)