Changeset - 94f25a680aad
[Not reviewed]
default
0 2 0
Mads Kiilerich - 11 years ago 2014-12-15 13:47:36
madski@unity3d.com
helpers: tweak URL matching patterns - don't include trailing punctuation
2 files changed with 6 insertions and 12 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
Helper functions
 

	
 
Consists of functions to typically be used within templates, but also
 
available to Controllers. This module is available to both as 'h'.
 
"""
 
import random
 
import hashlib
 
import StringIO
 
import math
 
import logging
 
import re
 
import urlparse
 
import textwrap
 

	
 
from pygments.formatters.html import HtmlFormatter
 
from pygments import highlight as code_highlight
 
from pylons import url
 
from pylons.i18n.translation import _, ungettext
 
from hashlib import md5
 

	
 
from webhelpers.html import literal, HTML, escape
 
from webhelpers.html.tools import *
 
from webhelpers.html.builder import make_tag
 
from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
 
    end_form, file, form, hidden, image, javascript_link, link_to, \
 
    link_to_if, link_to_unless, ol, required_legend, select, stylesheet_link, \
 
    submit, text, password, textarea, title, ul, xml_declaration, radio
 
from webhelpers.html.tools import auto_link, button_to, highlight, \
 
    js_obfuscate, mail_to, strip_links, strip_tags, tag_re
 
from webhelpers.number import format_byte_size, format_bit_size
 
from webhelpers.pylonslib import Flash as _Flash
 
from webhelpers.pylonslib.secure_form import secure_form
 
from webhelpers.text import chop_at, collapse, convert_accented_entities, \
 
    convert_misc_entities, lchop, plural, rchop, remove_formatting, \
 
    replace_whitespace, urlify, truncate, wrap_paragraphs
 
from webhelpers.date import time_ago_in_words
 
from webhelpers.paginate import Page as _Page
 
from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
 
    convert_boolean_attrs, NotGiven, _make_safe_id_component
 

	
 
from kallithea.lib.annotate import annotate_highlight
 
from kallithea.lib.utils import repo_name_slug, get_custom_lexer
 
from kallithea.lib.utils2 import str2bool, safe_unicode, safe_str, \
 
    get_changeset_safe, datetime_to_time, time_to_datetime, AttributeDict,\
 
    safe_int
 
from kallithea.lib.markup_renderer import MarkupRenderer
 
from kallithea.lib.markup_renderer import MarkupRenderer, url_re
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from kallithea.config.conf import DATE_FORMAT, DATETIME_FORMAT
 
from kallithea.model.changeset_status import ChangesetStatusModel
 
from kallithea.model.db import URL_SEP, Permission
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def canonical_url(*args, **kargs):
 
    '''Like url(x, qualified=True), but returns url that not only is qualified
 
    but also canonical, as configured in canonical_url'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        kargs['host'] = parts[1].split('/', 1)[0]
 
        kargs['protocol'] = parts[0]
 
    except IndexError:
 
        kargs['qualified'] = True
 
    return url(*args, **kargs)
 

	
 
def canonical_hostname():
 
    '''Return canonical hostname of system'''
 
    from kallithea import CONFIG
 
    try:
 
        parts = CONFIG.get('canonical_url', '').split('://', 1)
 
        return parts[1].split('/', 1)[0]
 
    except IndexError:
 
        parts = url('home', qualified=True).split('://', 1)
 
        return parts[1].split('/', 1)[0]
 

	
 
def html_escape(text, html_escape_table=None):
 
    """Produce entities within text."""
 
    if not html_escape_table:
 
        html_escape_table = {
 
            "&": "&amp;",
 
            '"': "&quot;",
 
            "'": "&apos;",
 
            ">": "&gt;",
 
            "<": "&lt;",
 
        }
 
    return "".join(html_escape_table.get(c, c) for c in text)
 

	
 

	
 
def shorter(text, size=20):
 
    postfix = '...'
 
    if len(text) > size:
 
        return text[:size - len(postfix)] + postfix
 
    return text
 

	
 

	
 
def _reset(name, value=None, id=NotGiven, type="reset", **attrs):
 
    """
 
    Reset button
 
    """
 
    _set_input_attrs(attrs, type, name, value)
 
    _set_id_attr(attrs, id, name)
 
    convert_boolean_attrs(attrs, ["disabled"])
 
    return HTML.input(**attrs)
 

	
 
reset = _reset
 
safeid = _make_safe_id_component
 

	
 

	
 
def FID(raw_id, path):
 
    """
 
    Creates a uniqe ID for filenode based on it's hash of path and revision
 
    it's safe to use in urls
 

	
 
    :param raw_id:
 
    :param path:
 
    """
 

	
 
    return 'C-%s-%s' % (short_id(raw_id), md5(safe_str(path)).hexdigest()[:12])
 

	
 

	
 
def get_token():
 
    """Return the current authentication token, creating one if one doesn't
 
    already exist.
 
    """
 
    token_key = "_authentication_token"
 
    from pylons import session
 
    if not token_key in session:
 
        try:
 
            token = hashlib.sha1(str(random.getrandbits(128))).hexdigest()
 
        except AttributeError:  # Python < 2.4
 
            token = hashlib.sha1(str(random.randrange(2 ** 128))).hexdigest()
 
        session[token_key] = token
 
        if hasattr(session, 'save'):
 
            session.save()
 
    return session[token_key]
 

	
 

	
 
class _GetError(object):
 
    """Get error from form_errors, and represent it as span wrapped error
 
    message
 

	
 
    :param field_name: field to fetch errors for
 
    :param form_errors: form errors dict
 
    """
 

	
 
    def __call__(self, field_name, form_errors):
 
        tmpl = """<span class="error_msg">%s</span>"""
 
        if form_errors and field_name in form_errors:
 
            return literal(tmpl % form_errors.get(field_name))
 

	
 
get_error = _GetError()
 

	
 

	
 
class _ToolTip(object):
 

	
 
    def __call__(self, tooltip_title, trim_at=50):
 
        """
 
        Special function just to wrap our text into nice formatted
 
        autowrapped text
 

	
 
        :param tooltip_title:
 
        """
 
        tooltip_title = escape(tooltip_title)
 
        tooltip_title = tooltip_title.replace('<', '&lt;').replace('>', '&gt;')
 
        return tooltip_title
 
tooltip = _ToolTip()
 

	
 

	
 
class _FilesBreadCrumbs(object):
 

	
 
    def __call__(self, repo_name, rev, paths):
 
        if isinstance(paths, str):
 
            paths = safe_unicode(paths)
 
        url_l = [link_to(repo_name, url('files_home',
 
                                        repo_name=repo_name,
 
                                        revision=rev, f_path=''),
 
                         class_='ypjax-link')]
 
        paths_l = paths.split('/')
 
        for cnt, p in enumerate(paths_l):
 
            if p != '':
 
                url_l.append(link_to(p,
 
                                     url('files_home',
 
                                         repo_name=repo_name,
 
                                         revision=rev,
 
                                         f_path='/'.join(paths_l[:cnt + 1])
 
                                         ),
 
                                     class_='ypjax-link'
 
                                     )
 
                             )
 

	
 
        return literal('/'.join(url_l))
 

	
 
files_breadcrumbs = _FilesBreadCrumbs()
 

	
 

	
 
class CodeHtmlFormatter(HtmlFormatter):
 
    """
 
    My code Html Formatter for source codes
 
    """
 

	
 
    def wrap(self, source, outfile):
 
        return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
 

	
 
    def _wrap_code(self, source):
 
        for cnt, it in enumerate(source):
 
            i, t = it
 
            t = '<div id="L%s">%s</div>' % (cnt + 1, t)
 
            yield i, t
 

	
 
    def _wrap_tablelinenos(self, inner):
 
        dummyoutfile = StringIO.StringIO()
 
        lncount = 0
 
        for t, line in inner:
 
            if t:
 
                lncount += 1
 
            dummyoutfile.write(line)
 

	
 
        fl = self.linenostart
 
        mw = len(str(lncount + fl - 1))
 
        sp = self.linenospecial
 
        st = self.linenostep
 
        la = self.lineanchors
 
        aln = self.anchorlinenos
 
        nocls = self.noclasses
 
        if sp:
 
            lines = []
 

	
 
            for i in range(fl, fl + lncount):
 
                if i % st == 0:
 
                    if i % sp == 0:
 
                        if aln:
 
                            lines.append('<a href="#%s%d" class="special">%*d</a>' %
 
                                         (la, i, mw, i))
 
                        else:
 
                            lines.append('<span class="special">%*d</span>' % (mw, i))
 
                    else:
 
@@ -1067,391 +1067,388 @@ class RepoPage(Page):
 
        # we calculate that ourselves.
 
        if item_count is not None:
 
            self.item_count = item_count
 
        else:
 
            self.item_count = len(self.collection)
 

	
 
        # Compute the number of the first and last available page
 
        if self.item_count > 0:
 
            self.first_page = 1
 
            self.page_count = int(math.ceil(float(self.item_count) /
 
                                            self.items_per_page))
 
            self.last_page = self.first_page + self.page_count - 1
 

	
 
            # Make sure that the requested page number is the range of
 
            # valid pages
 
            if self.page > self.last_page:
 
                self.page = self.last_page
 
            elif self.page < self.first_page:
 
                self.page = self.first_page
 

	
 
            # Note: the number of items on this page can be less than
 
            #       items_per_page if the last page is not full
 
            self.first_item = max(0, (self.item_count) - (self.page *
 
                                                          items_per_page))
 
            self.last_item = ((self.item_count - 1) - items_per_page *
 
                              (self.page - 1))
 

	
 
            self.items = list(self.collection[self.first_item:self.last_item + 1])
 

	
 
            # Links to previous and next page
 
            if self.page > self.first_page:
 
                self.previous_page = self.page - 1
 
            else:
 
                self.previous_page = None
 

	
 
            if self.page < self.last_page:
 
                self.next_page = self.page + 1
 
            else:
 
                self.next_page = None
 

	
 
        # No items available
 
        else:
 
            self.first_page = None
 
            self.page_count = 0
 
            self.last_page = None
 
            self.first_item = None
 
            self.last_item = None
 
            self.previous_page = None
 
            self.next_page = None
 
            self.items = []
 

	
 
        # This is a subclass of the 'list' type. Initialise the list now.
 
        list.__init__(self, reversed(self.items))
 

	
 

	
 
def changed_tooltip(nodes):
 
    """
 
    Generates a html string for changed nodes in changeset page.
 
    It limits the output to 30 entries
 

	
 
    :param nodes: LazyNodesGenerator
 
    """
 
    if nodes:
 
        pref = ': <br/> '
 
        suf = ''
 
        if len(nodes) > 30:
 
            suf = '<br/>' + _(' and %s more') % (len(nodes) - 30)
 
        return literal(pref + '<br/> '.join([safe_unicode(x.path)
 
                                             for x in nodes[:30]]) + suf)
 
    else:
 
        return ': ' + _('No Files')
 

	
 

	
 
def repo_link(groups_and_repos):
 
    """
 
    Makes a breadcrumbs link to repo within a group
 
    joins &raquo; on each group to create a fancy link
 

	
 
    ex::
 
        group >> subgroup >> repo
 

	
 
    :param groups_and_repos:
 
    :param last_url:
 
    """
 
    groups, just_name, repo_name = groups_and_repos
 
    last_url = url('summary_home', repo_name=repo_name)
 
    last_link = link_to(just_name, last_url)
 

	
 
    def make_link(group):
 
        return link_to(group.name,
 
                       url('repos_group_home', group_name=group.group_name))
 
    return literal(' &raquo; '.join(map(make_link, groups) + ['<span>%s</span>' % last_link]))
 

	
 

	
 
def fancy_file_stats(stats):
 
    """
 
    Displays a fancy two colored bar for number of added/deleted
 
    lines of code on file
 

	
 
    :param stats: two element list of added/deleted lines of code
 
    """
 
    from kallithea.lib.diffs import NEW_FILENODE, DEL_FILENODE, \
 
        MOD_FILENODE, RENAMED_FILENODE, CHMOD_FILENODE, BIN_FILENODE
 

	
 
    def cgen(l_type, a_v, d_v):
 
        mapping = {'tr': 'top-right-rounded-corner-mid',
 
                   'tl': 'top-left-rounded-corner-mid',
 
                   'br': 'bottom-right-rounded-corner-mid',
 
                   'bl': 'bottom-left-rounded-corner-mid'}
 
        map_getter = lambda x: mapping[x]
 

	
 
        if l_type == 'a' and d_v:
 
            #case when added and deleted are present
 
            return ' '.join(map(map_getter, ['tl', 'bl']))
 

	
 
        if l_type == 'a' and not d_v:
 
            return ' '.join(map(map_getter, ['tr', 'br', 'tl', 'bl']))
 

	
 
        if l_type == 'd' and a_v:
 
            return ' '.join(map(map_getter, ['tr', 'br']))
 

	
 
        if l_type == 'd' and not a_v:
 
            return ' '.join(map(map_getter, ['tr', 'br', 'tl', 'bl']))
 

	
 
    a, d = stats['added'], stats['deleted']
 
    width = 100
 

	
 
    if stats['binary']:
 
        #binary mode
 
        lbl = ''
 
        bin_op = 1
 

	
 
        if BIN_FILENODE in stats['ops']:
 
            lbl = 'bin+'
 

	
 
        if NEW_FILENODE in stats['ops']:
 
            lbl += _('new file')
 
            bin_op = NEW_FILENODE
 
        elif MOD_FILENODE in stats['ops']:
 
            lbl += _('mod')
 
            bin_op = MOD_FILENODE
 
        elif DEL_FILENODE in stats['ops']:
 
            lbl += _('del')
 
            bin_op = DEL_FILENODE
 
        elif RENAMED_FILENODE in stats['ops']:
 
            lbl += _('rename')
 
            bin_op = RENAMED_FILENODE
 

	
 
        #chmod can go with other operations
 
        if CHMOD_FILENODE in stats['ops']:
 
            _org_lbl = _('chmod')
 
            lbl += _org_lbl if lbl.endswith('+') else '+%s' % _org_lbl
 

	
 
        #import ipdb;ipdb.set_trace()
 
        b_d = '<div class="bin bin%s %s" style="width:100%%">%s</div>' % (bin_op, cgen('a', a_v='', d_v=0), lbl)
 
        b_a = '<div class="bin bin1" style="width:0%%"></div>'
 
        return literal('<div style="width:%spx">%s%s</div>' % (width, b_a, b_d))
 

	
 
    t = stats['added'] + stats['deleted']
 
    unit = float(width) / (t or 1)
 

	
 
    # needs > 9% of width to be visible or 0 to be hidden
 
    a_p = max(9, unit * a) if a > 0 else 0
 
    d_p = max(9, unit * d) if d > 0 else 0
 
    p_sum = a_p + d_p
 

	
 
    if p_sum > width:
 
        #adjust the percentage to be == 100% since we adjusted to 9
 
        if a_p > d_p:
 
            a_p = a_p - (p_sum - width)
 
        else:
 
            d_p = d_p - (p_sum - width)
 

	
 
    a_v = a if a > 0 else ''
 
    d_v = d if d > 0 else ''
 

	
 
    d_a = '<div class="added %s" style="width:%s%%">%s</div>' % (
 
        cgen('a', a_v, d_v), a_p, a_v
 
    )
 
    d_d = '<div class="deleted %s" style="width:%s%%">%s</div>' % (
 
        cgen('d', a_v, d_v), d_p, d_v
 
    )
 
    return literal('<div style="width:%spx">%s%s</div>' % (width, d_a, d_d))
 

	
 

	
 
def urlify_text(text_, safe=True):
 
    """
 
    Extrac urls from text and make html links out of them
 

	
 
    :param text_:
 
    """
 

	
 
    url_pat = re.compile(r'''(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'''
 
                         '''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''')
 

	
 
    def url_func(match_obj):
 
        url_full = match_obj.groups()[0]
 
        return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 
    _newtext = url_pat.sub(url_func, text_)
 
    _newtext = url_re.sub(url_func, text_)
 
    if safe:
 
        return literal(_newtext)
 
    return _newtext
 

	
 

	
 
def urlify_changesets(text_, repository):
 
    """
 
    Extract revision ids from changeset and make link from them
 

	
 
    :param text_:
 
    :param repository: repo name to build the URL with
 
    """
 
    from pylons import url  # doh, we need to re-import url to mock it later
 

	
 
    def url_func(match_obj):
 
        rev = match_obj.group(0)
 
        return '<a class="revision-link" href="%(url)s">%(rev)s</a>' % {
 
         'url': url('changeset_home', repo_name=repository, revision=rev),
 
         'rev': rev,
 
        }
 

	
 
    return re.sub(r'(?:^|(?<=\s))([0-9a-fA-F]{12,40})(?=$|\s|[.,:])', url_func, text_)
 

	
 

	
 
def urlify_commit(text_, repository, link_=None):
 
    """
 
    Parses given text message and makes proper links.
 
    issues are linked to given issue-server, and rest is a changeset link
 
    if link_ is given, in other case it's a plain text
 

	
 
    :param text_:
 
    :param repository:
 
    :param link_: changeset link
 
    """
 
    import traceback
 
    from pylons import url  # doh, we need to re-import url to mock it later
 

	
 
    def escaper(string):
 
        return string.replace('<', '&lt;').replace('>', '&gt;')
 

	
 
    def linkify_others(t, l):
 
        urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
        links = []
 
        for e in urls.split(t):
 
            if not urls.match(e):
 
                links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
 
            else:
 
                links.append(e)
 

	
 
        return ''.join(links)
 

	
 
    # urlify changesets - extract revisions and make link out of them
 
    newtext = urlify_changesets(escaper(text_), repository)
 

	
 
    # extract http/https links and make them real urls
 
    newtext = urlify_text(newtext, safe=False)
 

	
 
    try:
 
        from kallithea import CONFIG
 
        conf = CONFIG
 

	
 
        # allow multiple issue servers to be used
 
        valid_indices = [
 
            x.group(1)
 
            for x in map(lambda x: re.match(r'issue_pat(.*)', x), conf.keys())
 
            if x and 'issue_server_link%s' % x.group(1) in conf
 
            and 'issue_prefix%s' % x.group(1) in conf
 
        ]
 

	
 
        log.debug('found issue server suffixes `%s` during valuation of: %s'
 
                  % (','.join(valid_indices), newtext))
 

	
 
        for pattern_index in valid_indices:
 
            ISSUE_PATTERN = conf.get('issue_pat%s' % pattern_index)
 
            ISSUE_SERVER_LNK = conf.get('issue_server_link%s' % pattern_index)
 
            ISSUE_PREFIX = conf.get('issue_prefix%s' % pattern_index)
 

	
 
            log.debug('pattern suffix `%s` PAT:%s SERVER_LINK:%s PREFIX:%s'
 
                      % (pattern_index, ISSUE_PATTERN, ISSUE_SERVER_LNK,
 
                         ISSUE_PREFIX))
 

	
 
            URL_PAT = re.compile(r'%s' % ISSUE_PATTERN)
 

	
 
            def url_func(match_obj):
 
                pref = ''
 
                if match_obj.group().startswith(' '):
 
                    pref = ' '
 

	
 
                issue_id = ''.join(match_obj.groups())
 
                issue_url = ISSUE_SERVER_LNK.replace('{id}', issue_id)
 
                if repository:
 
                    issue_url = issue_url.replace('{repo}', repository)
 
                    repo_name = repository.split(URL_SEP)[-1]
 
                    issue_url = issue_url.replace('{repo_name}', repo_name)
 

	
 
                return (
 
                    '%(pref)s<a class="%(cls)s" href="%(url)s">'
 
                    '%(issue-prefix)s%(id-repr)s'
 
                    '</a>'
 
                    ) % {
 
                     'pref': pref,
 
                     'cls': 'issue-tracker-link',
 
                     'url': issue_url,
 
                     'id-repr': issue_id,
 
                     'issue-prefix': ISSUE_PREFIX,
 
                     'serv': ISSUE_SERVER_LNK,
 
                    }
 
            newtext = URL_PAT.sub(url_func, newtext)
 
            log.debug('processed prefix:`%s` => %s' % (pattern_index, newtext))
 

	
 
        # if we actually did something above
 
        if link_:
 
            # wrap not links into final link => link_
 
            newtext = linkify_others(newtext, link_)
 
    except Exception:
 
        log.error(traceback.format_exc())
 
        pass
 

	
 
    return literal(newtext)
 

	
 

	
 
def rst(source):
 
    return literal('<div class="rst-block">%s</div>' %
 
                   MarkupRenderer.rst(source))
 

	
 

	
 
def rst_w_mentions(source):
 
    """
 
    Wrapped rst renderer with @mention highlighting
 

	
 
    :param source:
 
    """
 
    return literal('<div class="rst-block">%s</div>' %
 
                   MarkupRenderer.rst_with_mentions(source))
 

	
 
def short_ref(ref_type, ref_name):
 
    if ref_type == 'rev':
 
        return short_id(ref_name)
 
    return ref_name
 

	
 
def link_to_ref(repo_name, ref_type, ref_name, rev=None):
 
    """
 
    Return full markup for a href to changeset_home for a changeset.
 
    If ref_type is branch it will link to changelog.
 
    ref_name is shortened if ref_type is 'rev'.
 
    if rev is specified show it too, explicitly linking to that revision.
 
    """
 
    txt = short_ref(ref_type, ref_name)
 
    if ref_type == 'branch':
 
        u = url('changelog_home', repo_name=repo_name, branch=ref_name)
 
    else:
 
        u = url('changeset_home', repo_name=repo_name, revision=ref_name)
 
    l = link_to(repo_name + '#' + txt, u)
 
    if rev and ref_type != 'rev':
 
        l = literal('%s (%s)' % (l, link_to(short_id(rev), url('changeset_home', repo_name=repo_name, revision=rev))))
 
    return l
 

	
 
def changeset_status(repo, revision):
 
    return ChangesetStatusModel().get_status(repo, revision)
 

	
 

	
 
def changeset_status_lbl(changeset_status):
 
    return dict(ChangesetStatus.STATUSES).get(changeset_status)
 

	
 

	
 
def get_permission_name(key):
 
    return dict(Permission.PERMS).get(key)
 

	
 

	
 
def journal_filter_help():
 
    return _(textwrap.dedent('''
 
        Example filter terms:
 
            repository:vcs
 
            username:developer
 
            action:*push*
 
            ip:127.0.0.1
 
            date:20120101
 
            date:[20120101100000 TO 20120102]
 

	
 
        Generate wildcards using '*' character:
 
            "repositroy:vcs*" - search everything starting with 'vcs'
 
            "repository:*vcs*" - search for repository containing 'vcs'
 

	
 
        Optional AND / OR operators in queries
 
            "repository:vcs OR repository:test"
 
            "username:test AND repository:test*"
 
    '''))
 

	
 

	
 
def not_mapped_error(repo_name):
 
    flash(_('%s repository is not mapped to db perhaps'
 
            ' it was created or renamed from the filesystem'
kallithea/lib/markup_renderer.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.markup_renderer
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Renderer for markup languages with ability to parse using rst or markdown
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Oct 27, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import re
 
import logging
 
import traceback
 

	
 
from kallithea.lib.utils2 import safe_unicode, MENTIONS_REGEX
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
url_re = re.compile(r'''(\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
 
                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:?!*()-]*[/a-zA-Z0-9_=@#~])?)''')
 

	
 
class MarkupRenderer(object):
 
    RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
 

	
 
    MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
 
    RST_PAT = re.compile(r're?st', re.IGNORECASE)
 
    PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
 

	
 
    def _detect_renderer(self, source, filename=None):
 
        """
 
        runs detection of what renderer should be used for generating html
 
        from a markup language
 

	
 
        filename can be also explicitly a renderer name
 

	
 
        :param source:
 
        :param filename:
 
        """
 

	
 
        if MarkupRenderer.MARKDOWN_PAT.findall(filename):
 
            detected_renderer = 'markdown'
 
        elif MarkupRenderer.RST_PAT.findall(filename):
 
            detected_renderer = 'rst'
 
        elif MarkupRenderer.PLAIN_PAT.findall(filename):
 
            detected_renderer = 'rst'
 
        else:
 
            detected_renderer = 'plain'
 

	
 
        return getattr(MarkupRenderer, detected_renderer)
 

	
 
    @classmethod
 
    def _flavored_markdown(cls, text):
 
        """
 
        Github style flavored markdown
 

	
 
        :param text:
 
        """
 
        from hashlib import md5
 

	
 
        # Extract pre blocks.
 
        extractions = {}
 
        def pre_extraction_callback(matchobj):
 
            digest = md5(matchobj.group(0)).hexdigest()
 
            extractions[digest] = matchobj.group(0)
 
            return "{gfm-extraction-%s}" % digest
 
        pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
 
        text = re.sub(pattern, pre_extraction_callback, text)
 

	
 
        # Prevent foo_bar_baz from ending up with an italic word in the middle.
 
        def italic_callback(matchobj):
 
            s = matchobj.group(0)
 
            if list(s).count('_') >= 2:
 
                return s.replace('_', '\_')
 
            return s
 
        text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
 

	
 
        # In very clear cases, let newlines become <br /> tags.
 
        def newline_callback(matchobj):
 
            if len(matchobj.group(1)) == 1:
 
                return matchobj.group(0).rstrip() + '  \n'
 
            else:
 
                return matchobj.group(0)
 
        pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
 
        text = re.sub(pattern, newline_callback, text)
 

	
 
        # Insert pre block extractions.
 
        def pre_insert_callback(matchobj):
 
            return '\n\n' + extractions[matchobj.group(1)]
 
        text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
 
                      pre_insert_callback, text)
 

	
 
        return text
 

	
 
    def render(self, source, filename=None):
 
        """
 
        Renders a given filename using detected renderer
 
        it detects renderers based on file extension or mimetype.
 
        At last it will just do a simple html replacing new lines with <br/>
 

	
 
        :param file_name:
 
        :param source:
 
        """
 

	
 
        renderer = self._detect_renderer(source, filename)
 
        readme_data = renderer(source)
 
        return readme_data
 

	
 
    @classmethod
 
    def plain(cls, source, universal_newline=True):
 
        source = safe_unicode(source)
 
        if universal_newline:
 
            newline = '\n'
 
            source = newline.join(source.splitlines())
 
        def urlify_text(text):
 
            url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'
 
                                 '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
 

	
 
            def url_func(match_obj):
 
                url_full = match_obj.groups()[0]
 
                return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 

	
 
            return url_pat.sub(url_func, text)
 

	
 
        source = urlify_text(source)
 
        source = url_re.sub(url_func, source)
 
        return '<br />' + source.replace("\n", '<br />')
 

	
 
    @classmethod
 
    def markdown(cls, source, safe=True, flavored=False):
 
        source = safe_unicode(source)
 
        try:
 
            import markdown as __markdown
 
            if flavored:
 
                source = cls._flavored_markdown(source)
 
            return __markdown.markdown(source, ['codehilite', 'extra'])
 
        except ImportError:
 
            log.warning('Install markdown to use this function')
 
            return cls.plain(source)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Fallbacking to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst(cls, source, safe=True):
 
        source = safe_unicode(source)
 
        try:
 
            from docutils.core import publish_parts
 
            from docutils.parsers.rst import directives
 
            docutils_settings = dict([(alias, None) for alias in
 
                                cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
 

	
 
            docutils_settings.update({'input_encoding': 'unicode',
 
                                      'report_level': 4})
 

	
 
            for k, v in docutils_settings.iteritems():
 
                directives.register_directive(k, v)
 

	
 
            parts = publish_parts(source=source,
 
                                  writer_name="html4css1",
 
                                  settings_overrides=docutils_settings)
 

	
 
            return parts['html_title'] + parts["fragment"]
 
        except ImportError:
 
            log.warning('Install docutils to use this function')
 
            return cls.plain(source)
 
        except Exception:
 
            log.error(traceback.format_exc())
 
            if safe:
 
                log.debug('Fallbacking to render in plain mode')
 
                return cls.plain(source)
 
            else:
 
                raise
 

	
 
    @classmethod
 
    def rst_with_mentions(cls, source):
 
        mention_pat = re.compile(MENTIONS_REGEX)
 

	
 
        def wrapp(match_obj):
 
            uname = match_obj.groups()[0]
 
            return ' **@%(uname)s** ' % {'uname': uname}
 
        mention_hl = mention_pat.sub(wrapp, source).strip()
 
        return cls.rst(mention_hl)
0 comments (0 inline, 0 general)