Changeset - 94f25a680aad
[Not reviewed]
default
0 2 0
Mads Kiilerich - 11 years ago 2014-12-15 13:47:36
madski@unity3d.com
helpers: tweak URL matching patterns - don't include trailing punctuation
2 files changed with 6 insertions and 12 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -12,97 +12,97 @@
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
Helper functions
 

	
 
Consists of functions to typically be used within templates, but also
 
available to Controllers. This module is available to both as 'h'.
 
"""
 
import random
 
import hashlib
 
import StringIO
 
import math
 
import logging
 
import re
 
import urlparse
 
import textwrap
 

	
 
from pygments.formatters.html import HtmlFormatter
 
from pygments import highlight as code_highlight
 
from pylons import url
 
from pylons.i18n.translation import _, ungettext
 
from hashlib import md5
 

	
 
from webhelpers.html import literal, HTML, escape
 
from webhelpers.html.tools import *
 
from webhelpers.html.builder import make_tag
 
from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
 
    end_form, file, form, hidden, image, javascript_link, link_to, \
 
    link_to_if, link_to_unless, ol, required_legend, select, stylesheet_link, \
 
    submit, text, password, textarea, title, ul, xml_declaration, radio
 
from webhelpers.html.tools import auto_link, button_to, highlight, \
 
    js_obfuscate, mail_to, strip_links, strip_tags, tag_re
 
from webhelpers.number import format_byte_size, format_bit_size
 
from webhelpers.pylonslib import Flash as _Flash
 
from webhelpers.pylonslib.secure_form import secure_form
 
from webhelpers.text import chop_at, collapse, convert_accented_entities, \
 
    convert_misc_entities, lchop, plural, rchop, remove_formatting, \
 
    replace_whitespace, urlify, truncate, wrap_paragraphs
 
from webhelpers.date import time_ago_in_words
 
from webhelpers.paginate import Page as _Page
 
from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
 
    convert_boolean_attrs, NotGiven, _make_safe_id_component
 

	
 
from kallithea.lib.annotate import annotate_highlight
 
from kallithea.lib.utils import repo_name_slug, get_custom_lexer
 
from kallithea.lib.utils2 import str2bool, safe_unicode, safe_str, \
 
    get_changeset_safe, datetime_to_time, time_to_datetime, AttributeDict,\
 
    safe_int
 
from kallithea.lib.markup_renderer import MarkupRenderer
 
from kallithea.lib.markup_renderer import MarkupRenderer, url_re
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from kallithea.config.conf import DATE_FORMAT, DATETIME_FORMAT
 
from kallithea.model.changeset_status import ChangesetStatusModel
 
from kallithea.model.db import URL_SEP, Permission
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def canonical_url(*args, **kargs):
 
    '''Like url(x, qualified=True), but returns url that not only is qualified
 
    but also canonical, as configured in canonical_url'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        kargs['host'] = parts[1].split('/', 1)[0]
 
        kargs['protocol'] = parts[0]
 
    except IndexError:
 
        kargs['qualified'] = True
 
    return url(*args, **kargs)
 

	
 
def canonical_hostname():
 
    '''Return canonical hostname of system'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        return parts[1].split('/', 1)[0]
 
    except IndexError:
 
        parts = url('home', qualified=True).split('://', 1)
 
        return parts[1].split('/', 1)[0]
 

	
 
def html_escape(text, html_escape_table=None):
 
    """Produce entities within text."""
 
    if not html_escape_table:
 
        html_escape_table = {
 
            "&": "&amp;",
 
            '"': "&quot;",
 
            "'": "&apos;",
 
            ">": "&gt;",
 
            "<": "&lt;",
 
        }
 
    return "".join(html_escape_table.get(c, c) for c in text)
 

	
 

	
 
def shorter(text, size=20):
 
    postfix = '...'
 
    if len(text) > size:
 
        return text[:size - len(postfix)] + postfix
 
@@ -1211,103 +1211,100 @@ def fancy_file_stats(stats):
 
        elif RENAMED_FILENODE in stats['ops']:
 
            lbl += _('rename')
 
            bin_op = RENAMED_FILENODE
 

	
 
        #chmod can go with other operations
 
        if CHMOD_FILENODE in stats['ops']:
 
            _org_lbl = _('chmod')
 
            lbl += _org_lbl if lbl.endswith('+') else '+%s' % _org_lbl
 

	
 
        #import ipdb;ipdb.set_trace()
 
        b_d = '<div class="bin bin%s %s" style="width:100%%">%s</div>' % (bin_op, cgen('a', a_v='', d_v=0), lbl)
 
        b_a = '<div class="bin bin1" style="width:0%%"></div>'
 
        return literal('<div style="width:%spx">%s%s</div>' % (width, b_a, b_d))
 

	
 
    t = stats['added'] + stats['deleted']
 
    unit = float(width) / (t or 1)
 

	
 
    # needs > 9% of width to be visible or 0 to be hidden
 
    a_p = max(9, unit * a) if a > 0 else 0
 
    d_p = max(9, unit * d) if d > 0 else 0
 
    p_sum = a_p + d_p
 

	
 
    if p_sum > width:
 
        #adjust the percentage to be == 100% since we adjusted to 9
 
        if a_p > d_p:
 
            a_p = a_p - (p_sum - width)
 
        else:
 
            d_p = d_p - (p_sum - width)
 

	
 
    a_v = a if a > 0 else ''
 
    d_v = d if d > 0 else ''
 

	
 
    d_a = '<div class="added %s" style="width:%s%%">%s</div>' % (
 
        cgen('a', a_v, d_v), a_p, a_v
 
    )
 
    d_d = '<div class="deleted %s" style="width:%s%%">%s</div>' % (
 
        cgen('d', a_v, d_v), d_p, d_v
 
    )
 
    return literal('<div style="width:%spx">%s%s</div>' % (width, d_a, d_d))
 

	
 

	
 
def urlify_text(text_, safe=True):
 
    """
 
    Extrac urls from text and make html links out of them
 

	
 
    :param text_:
 
    """
 

	
 
    url_pat = re.compile(r'''(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'''
 
                         '''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''')
 

	
 
    def url_func(match_obj):
 
        url_full = match_obj.groups()[0]
 
        return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 
    _newtext = url_pat.sub(url_func, text_)
 
    _newtext = url_re.sub(url_func, text_)
 
    if safe:
 
        return literal(_newtext)
 
    return _newtext
 

	
 

	
 
def urlify_changesets(text_, repository):
 
    """
 
    Extract revision ids from changeset and make link from them
 

	
 
    :param text_:
 
    :param repository: repo name to build the URL with
 
    """
 
    from pylons import url  # doh, we need to re-import url to mock it later
 

	
 
    def url_func(match_obj):
 
        rev = match_obj.group(0)
 
        return '<a class="revision-link" href="%(url)s">%(rev)s</a>' % {
 
         'url': url('changeset_home', repo_name=repository, revision=rev),
 
         'rev': rev,
 
        }
 

	
 
    return re.sub(r'(?:^|(?<=\s))([0-9a-fA-F]{12,40})(?=$|\s|[.,:])', url_func, text_)
 

	
 

	
 
def urlify_commit(text_, repository, link_=None):
 
    """
 
    Parses given text message and makes proper links.
 
    issues are linked to given issue-server, and rest is a changeset link
 
    if link_ is given, in other case it's a plain text
 

	
 
    :param text_:
 
    :param repository:
 
    :param link_: changeset link
 
    """
 
    import traceback
 
    from pylons import url  # doh, we need to re-import url to mock it later
 

	
 
    def escaper(string):
 
        return string.replace('<', '&lt;').replace('>', '&gt;')
 

	
 
    def linkify_others(t, l):
 
        urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
        links = []
 
        for e in urls.split(t):
 
            if not urls.match(e):
 
                links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
 
            else:
 
                links.append(e)
kallithea/lib/markup_renderer.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.markup_renderer
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Renderer for markup languages with ability to parse using rst or markdown
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Oct 27, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import re
 
import logging
 
import traceback
 

	
 
from kallithea.lib.utils2 import safe_unicode, MENTIONS_REGEX
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
url_re = re.compile(r'''(\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
 
                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:?!*()-]*[/a-zA-Z0-9_=@#~])?)''')
 

	
 
class MarkupRenderer(object):
 
    RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
 

	
 
    MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
 
    RST_PAT = re.compile(r're?st', re.IGNORECASE)
 
    PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
 

	
 
    def _detect_renderer(self, source, filename=None):
 
        """
 
        runs detection of what renderer should be used for generating html
 
        from a markup language
 

	
 
        filename can be also explicitly a renderer name
 

	
 
        :param source:
 
        :param filename:
 
        """
 

	
 
        if MarkupRenderer.MARKDOWN_PAT.findall(filename):
 
            detected_renderer = 'markdown'
 
        elif MarkupRenderer.RST_PAT.findall(filename):
 
            detected_renderer = 'rst'
 
        elif MarkupRenderer.PLAIN_PAT.findall(filename):
 
            detected_renderer = 'rst'
 
        else:
 
            detected_renderer = 'plain'
 

	
 
        return getattr(MarkupRenderer, detected_renderer)
 

	
 
    @classmethod
 
    def _flavored_markdown(cls, text):
 
        """
 
        Github style flavored markdown
 

	
 
        :param text:
 
        """
 
        from hashlib import md5
 

	
 
        # Extract pre blocks.
 
        extractions = {}
 
        def pre_extraction_callback(matchobj):
 
            digest = md5(matchobj.group(0)).hexdigest()
 
            extractions[digest] = matchobj.group(0)
 
            return "{gfm-extraction-%s}" % digest
 
        pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
 
        text = re.sub(pattern, pre_extraction_callback, text)
 

	
 
        # Prevent foo_bar_baz from ending up with an italic word in the middle.
 
        def italic_callback(matchobj):
 
            s = matchobj.group(0)
 
            if list(s).count('_') >= 2:
 
                return s.replace('_', '\_')
 
            return s
 
        text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
 

	
 
        # In very clear cases, let newlines become <br /> tags.
 
        def newline_callback(matchobj):
 
            if len(matchobj.group(1)) == 1:
 
                return matchobj.group(0).rstrip() + '  \n'
 
            else:
 
                return matchobj.group(0)
 
        pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
 
        text = re.sub(pattern, newline_callback, text)
 

	
 
        # Insert pre block extractions.
 
        def pre_insert_callback(matchobj):
 
            return '\n\n' + extractions[matchobj.group(1)]
 
        text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
 
                      pre_insert_callback, text)
 

	
 
        return text
 

	
 
    def render(self, source, filename=None):
 
        """
 
        Renders a given filename using detected renderer
 
        it detects renderers based on file extension or mimetype.
 
        At last it will just do a simple html replacing new lines with <br/>
 

	
 
        :param file_name:
 
        :param source:
 
        """
 

	
 
        renderer = self._detect_renderer(source, filename)
 
        readme_data = renderer(source)
 
        return readme_data
 

	
 
    @classmethod
 
    def plain(cls, source, universal_newline=True):
 
        source = safe_unicode(source)
 
        if universal_newline:
 
            newline = '\n'
 
            source = newline.join(source.splitlines())
 
        def urlify_text(text):
 
            url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'
 
                                 '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
 

	
 
            def url_func(match_obj):
 
                url_full = match_obj.groups()[0]
 
                return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 

	
 
            return url_pat.sub(url_func, text)
 

	
 
        source = urlify_text(source)
 
        source = url_re.sub(url_func, source)
 
        return '<br />' + source.replace("\n", '<br />')
 

	
 
    @classmethod
 
    def markdown(cls, source, safe=True, flavored=False):
 
        source = safe_unicode(source)
 
        try:
 
            import markdown as __markdown
 
            if flavored:
 
                source = cls._flavored_markdown(source)
 
            return __markdown.markdown(source, ['codehilite', 'extra'])
 
        except ImportError:
 
            log.warning('Install markdown to use this function')
 
            return cls.plain(source)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Fallbacking to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst(cls, source, safe=True):
 
        source = safe_unicode(source)
 
        try:
 
            from docutils.core import publish_parts
 
            from docutils.parsers.rst import directives
 
            docutils_settings = dict([(alias, None) for alias in
 
                                cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
 

	
 
            docutils_settings.update({'input_encoding': 'unicode',
 
                                      'report_level': 4})
 

	
 
            for k, v in docutils_settings.iteritems():
 
                directives.register_directive(k, v)
 

	
 
            parts = publish_parts(source=source,
 
                                  writer_name="html4css1",
 
                                  settings_overrides=docutils_settings)
 

	
 
            return parts['html_title'] + parts["fragment"]
 
        except ImportError:
 
            log.warning('Install docutils to use this function')
 
            return cls.plain(source)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Fallbacking to render in plain mode')
0 comments (0 inline, 0 general)