Changeset - 39a59e6915bb
[Not reviewed]
default
0 2 0
Mads Kiilerich - 9 years ago 2016-09-25 17:21:07
madski@unity3d.com
helpers: refactor and optimize urlify_issues

Avoid parsing the configuration and compiling regexps every time a string is
processed.

Instead, the configuration is parsed once and turned into a function that apply
the compiled regexps in a chain.

A next iteration can perhaps run them in parallel and integrate with the
general urlify machinery.
2 files changed with 56 insertions and 50 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -1307,153 +1307,155 @@ def urlify_text(s, repo_name=None, link_
 
            if tagtype:
 
                tagvalue = match_obj.group('tagvalue')
 
                return '<div class="metatag" tag="%s">%s =&gt; <a href="/%s">%s</a></div>' % (tagtype, tagtype, tagvalue, tagvalue)
 
            lang = match_obj.group('lang')
 
            if lang:
 
                return '<div class="metatag" tag="lang">%s</div>' % lang
 
            tag = match_obj.group('tag')
 
            if tag:
 
                return '<div class="metatag" tag="%s">%s</div>' % (tag, tag)
 
        return match_obj.group(0)
 

	
 
    def _urlify(s):
 
        """
 
        Extract urls from text and make html links out of them
 
        """
 
        return _URLIFY_RE.sub(_replace, s)
 

	
 
    if truncate is None:
 
        s = s.rstrip()
 
    else:
 
        s = truncatef(s, truncate, whole_word=True)
 
    s = html_escape(s)
 
    s = _urlify(s)
 
    if repo_name is not None:
 
        s = urlify_issues(s, repo_name)
 
    if link_ is not None:
 
        # make href around everything that isn't a href already
 
        s = linkify_others(s, link_)
 
    s = s.replace('\r\n', '<br/>').replace('\n', '<br/>')
 
    return literal(s)
 

	
 

	
 
def linkify_others(t, l):
 
    """Add a default link to html with links.
 
    HTML doesn't allow nesting of links, so the outer link must be broken up
 
    in pieces and give space for other links.
 
    """
 
    urls = re.compile(r'(\<a.*?\<\/a\>)',)
 
    links = []
 
    for e in urls.split(t):
 
        if e.strip() and not urls.match(e):
 
            links.append('<a class="message-link" href="%s">%s</a>' % (l, e))
 
        else:
 
            links.append(e)
 

	
 
    return ''.join(links)
 

	
 

	
 
def _urlify_issues_replace_f(repo_name, ISSUE_SERVER_LNK, ISSUE_PREFIX):
 
    def urlify_issues_replace(match_obj):
 
        pref = ''
 
        if match_obj.group().startswith(' '):
 
            pref = ' '
 

	
 
        issue_id = ''.join(match_obj.groups())
 
        issue_url = ISSUE_SERVER_LNK.replace('{id}', issue_id)
 
        issue_url = issue_url.replace('{repo}', repo_name)
 
        issue_url = issue_url.replace('{repo_name}', repo_name.split(URL_SEP)[-1])
 

	
 
        return (
 
            '%(pref)s<a class="%(cls)s" href="%(url)s">'
 
            '%(issue-prefix)s%(id-repr)s'
 
            '</a>'
 
            ) % {
 
             'pref': pref,
 
             'cls': 'issue-tracker-link',
 
             'url': issue_url,
 
             'id-repr': issue_id,
 
             'issue-prefix': ISSUE_PREFIX,
 
             'serv': ISSUE_SERVER_LNK,
 
            }
 
    return urlify_issues_replace
 
# Global variable that will hold the actual urlify_issues function body.
 
# Will be set on first use when the global configuration has been read.
 
_urlify_issues_f = None
 

	
 

	
 
def urlify_issues(newtext, repo_name):
 
    from kallithea import CONFIG as conf
 
    """Urlify issue references according to .ini configuration"""
 
    global _urlify_issues_f
 
    if _urlify_issues_f is None:
 
        from kallithea import CONFIG
 
        assert CONFIG['sqlalchemy.url'] # make sure config has been loaded
 

	
 
        # Build chain of urlify functions, starting with not doing any transformation
 
        tmp_urlify_issues_f = lambda s: s
 

	
 
    # allow multiple issue servers to be used
 
    valid_indices = [
 
        x.group(1)
 
        for x in map(lambda x: re.match(r'issue_pat(.*)', x), conf.keys())
 
        if x and 'issue_server_link%s' % x.group(1) in conf
 
        and 'issue_prefix%s' % x.group(1) in conf
 
    ]
 

	
 
    if valid_indices:
 
        log.debug('found issue server suffixes `%s` during valuation of: %s',
 
                  ','.join(valid_indices), newtext)
 
        issue_pat_re = re.compile(r'issue_pat(.*)')
 
        for k in CONFIG.keys():
 
            # Find all issue_pat* settings that also have corresponding server_link and prefix configuration
 
            m = issue_pat_re.match(k)
 
            if m is None:
 
                continue
 
            suffix = m.group(1)
 
            issue_pat = CONFIG.get(k)
 
            issue_server_link = CONFIG.get('issue_server_link%s' % suffix)
 
            issue_prefix = CONFIG.get('issue_prefix%s' % suffix)
 
            if issue_pat and issue_server_link and issue_prefix:
 
                log.debug('issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix)
 
            else:
 
                log.error('skipping incomplete issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix)
 
                continue
 

	
 
    for pattern_index in valid_indices:
 
        ISSUE_PATTERN = conf.get('issue_pat%s' % pattern_index)
 
        ISSUE_SERVER_LNK = conf.get('issue_server_link%s' % pattern_index)
 
        ISSUE_PREFIX = conf.get('issue_prefix%s' % pattern_index)
 
            # Wrap tmp_urlify_issues_f with substitution of this pattern, while making sure all loop variables (and compiled regexpes) are bound
 
            issue_re = re.compile(issue_pat)
 
            def issues_replace(match_obj,
 
                               issue_server_link=issue_server_link, issue_prefix=issue_prefix):
 
                leadingspace = ' ' if match_obj.group().startswith(' ') else ''
 
                issue_id = ''.join(match_obj.groups())
 
                issue_url = issue_server_link.replace('{id}', issue_id)
 
                issue_url = issue_url.replace('{repo}', repo_name)
 
                issue_url = issue_url.replace('{repo_name}', repo_name.split(URL_SEP)[-1])
 
                return (
 
                    '%(leadingspace)s<a class="issue-tracker-link" href="%(url)s">'
 
                    '%(issue-prefix)s%(id-repr)s'
 
                    '</a>'
 
                    ) % {
 
                     'leadingspace': leadingspace,
 
                     'url': issue_url,
 
                     'id-repr': issue_id,
 
                     'issue-prefix': issue_prefix,
 
                     'serv': issue_server_link,
 
                    }
 
            tmp_urlify_issues_f = (lambda s,
 
                                          issue_re=issue_re, issues_replace=issues_replace, chain_f=tmp_urlify_issues_f:
 
                                   issue_re.sub(issues_replace, chain_f(s)))
 

	
 
        log.debug('pattern suffix `%s` PAT:%s SERVER_LINK:%s PREFIX:%s',
 
                  pattern_index, ISSUE_PATTERN, ISSUE_SERVER_LNK,
 
                  ISSUE_PREFIX)
 

	
 
        URL_PAT = re.compile(ISSUE_PATTERN)
 
        # Set tmp function globally - atomically
 
        _urlify_issues_f = tmp_urlify_issues_f
 

	
 
        urlify_issues_replace = _urlify_issues_replace_f(repo_name, ISSUE_SERVER_LNK, ISSUE_PREFIX)
 
        newtext = URL_PAT.sub(urlify_issues_replace, newtext)
 
        log.debug('processed prefix:`%s` => %s', pattern_index, newtext)
 

	
 
    return newtext
 
    return _urlify_issues_f(newtext)
 

	
 

	
 
def render_w_mentions(source, repo_name=None):
 
    """
 
    Render plain text with revision hashes and issue references urlified
 
    and with @mention highlighting.
 
    """
 
    s = safe_unicode(source)
 
    s = urlify_text(s, repo_name=repo_name)
 
    return literal('<div class="formatted-fixed">%s</div>' % s)
 

	
 

	
 
def short_ref(ref_type, ref_name):
 
    if ref_type == 'rev':
 
        return short_id(ref_name)
 
    return ref_name
 

	
 
def link_to_ref(repo_name, ref_type, ref_name, rev=None):
 
    """
 
    Return full markup for a href to changeset_home for a changeset.
 
    If ref_type is branch it will link to changelog.
 
    ref_name is shortened if ref_type is 'rev'.
 
    if rev is specified show it too, explicitly linking to that revision.
 
    """
 
    txt = short_ref(ref_type, ref_name)
 
    if ref_type == 'branch':
 
        u = url('changelog_home', repo_name=repo_name, branch=ref_name)
 
    else:
 
        u = url('changeset_home', repo_name=repo_name, revision=ref_name)
 
    l = link_to(repo_name + '#' + txt, u)
 
    if rev and ref_type != 'rev':
 
        l = literal('%s (%s)' % (l, link_to(short_id(rev), url('changeset_home', repo_name=repo_name, revision=rev))))
 
    return l
 

	
 
def changeset_status(repo, revision):
 
    return ChangesetStatusModel().get_status(repo, revision)
 

	
 

	
 
def changeset_status_lbl(changeset_status):
 
    return ChangesetStatus.get_status_lbl(changeset_status)
 

	
 

	
 
def get_permission_name(key):
 
    return dict(Permission.PERMS).get(key)
 

	
 

	
 
def journal_filter_help():
 
    return _(textwrap.dedent('''
kallithea/tests/functional/test_files.py
Show inline comments
 
@@ -47,96 +47,100 @@ class TestFilesController(TestController
 
        response.mustcontain('<a class="browser-file ypjax-link" href="/%s/files/96507bd11ecc815ebc6270fdf6db110928c09c1e/setup.py"><i class="icon-doc"></i><span>setup.py</span></a>' % HG_REPO)
 
        response.mustcontain('<a class="browser-file ypjax-link" href="/%s/files/96507bd11ecc815ebc6270fdf6db110928c09c1e/test_and_report.sh"><i class="icon-doc"></i><span>test_and_report.sh</span></a>' % HG_REPO)
 
        response.mustcontain('<a class="browser-file ypjax-link" href="/%s/files/96507bd11ecc815ebc6270fdf6db110928c09c1e/tox.ini"><i class="icon-doc"></i><span>tox.ini</span></a>' % HG_REPO)
 

	
 
    def test_index_revision(self):
 
        self.log_user()
 

	
 
        response = self.app.get(
 
            url(controller='files', action='index',
 
                repo_name=HG_REPO,
 
                revision='7ba66bec8d6dbba14a2155be32408c435c5f4492',
 
                f_path='/')
 
        )
 

	
 
        #Test response...
 

	
 
        response.mustcontain('<a class="browser-dir ypjax-link" href="/%s/files/7ba66bec8d6dbba14a2155be32408c435c5f4492/docs"><i class="icon-folder-open"></i><span>docs</span></a>' % HG_REPO)
 
        response.mustcontain('<a class="browser-dir ypjax-link" href="/%s/files/7ba66bec8d6dbba14a2155be32408c435c5f4492/tests"><i class="icon-folder-open"></i><span>tests</span></a>' % HG_REPO)
 
        response.mustcontain('<a class="browser-file ypjax-link" href="/%s/files/7ba66bec8d6dbba14a2155be32408c435c5f4492/README.rst"><i class="icon-doc"></i><span>README.rst</span></a>' % HG_REPO)
 
        response.mustcontain('1.1 KiB')
 

	
 
    def test_index_different_branch(self):
 
        self.log_user()
 

	
 
        response = self.app.get(url(controller='files', action='index',
 
                                    repo_name=HG_REPO,
 
                                    revision='97e8b885c04894463c51898e14387d80c30ed1ee',
 
                                    f_path='/'))
 

	
 
        response.mustcontain("""<option selected="selected" value="97e8b885c04894463c51898e14387d80c30ed1ee">git at 97e8b885c048</option>""")
 

	
 
    def test_index_paging(self):
 
        self.log_user()
 

	
 
        for r in [(73, 'a066b25d5df7016b45a41b7e2a78c33b57adc235'),
 
                  (92, 'cc66b61b8455b264a7a8a2d8ddc80fcfc58c221e'),
 
                  (109, '75feb4c33e81186c87eac740cee2447330288412'),
 
                  (1, '3d8f361e72ab303da48d799ff1ac40d5ac37c67e'),
 
                  (0, 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]:
 

	
 
            response = self.app.get(url(controller='files', action='index',
 
                                    repo_name=HG_REPO,
 
                                    revision=r[1],
 
                                    f_path='/'))
 

	
 
            response.mustcontain("""@ r%s:%s""" % (r[0], r[1][:12]))
 

	
 
    def test_file_source(self):
 
        # Force the global cache to be populated now when we know the right .ini has been loaded.
 
        # (Without this, the test would fail.)
 
        import kallithea.lib.helpers
 
        kallithea.lib.helpers._urlify_issues_f = None
 
        self.log_user()
 
        response = self.app.get(url(controller='files', action='index',
 
                                    repo_name=HG_REPO,
 
                                    revision='8911406ad776fdd3d0b9932a2e89677e57405a48',
 
                                    f_path='vcs/nodes.py'))
 

	
 
        response.mustcontain("""<div class="commit">Partially implemented <a class="issue-tracker-link" href="https://issues.example.com/vcs_test_hg/issue/16">#16</a>. filecontent/commit message/author/node name are safe_unicode now.<br/>"""
 
"""In addition some other __str__ are unicode as well<br/>"""
 
"""Added test for unicode<br/>"""
 
"""Improved test to clone into uniq repository.<br/>"""
 
"""removed extra unicode conversion in diff.</div>
 
""")
 

	
 
        response.mustcontain("""<option selected="selected" value="8911406ad776fdd3d0b9932a2e89677e57405a48">default at 8911406ad776</option>""")
 

	
 
    def test_file_source_history(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='files', action='history',
 
                                    repo_name=HG_REPO,
 
                                    revision='tip',
 
                                    f_path='vcs/nodes.py'),
 
                                extra_environ={'HTTP_X_PARTIAL_XHR': '1'},)
 
        assert response.body == HG_NODE_HISTORY
 

	
 
    def test_file_source_history_git(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='files', action='history',
 
                                    repo_name=GIT_REPO,
 
                                    revision='master',
 
                                    f_path='vcs/nodes.py'),
 
                                extra_environ={'HTTP_X_PARTIAL_XHR': '1'},)
 
        assert response.body == GIT_NODE_HISTORY
 

	
 
    def test_file_annotation(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='files', action='index',
 
                                    repo_name=HG_REPO,
 
                                    revision='tip',
 
                                    f_path='vcs/nodes.py',
 
                                    annotate=True))
 

	
 
        response.mustcontain("""r356:25213a5fbb04""")
 

	
 
    def test_file_annotation_git(self):
 
        self.log_user()
 
        response = self.app.get(url(controller='files', action='index',
 
                                    repo_name=GIT_REPO,
 
                                    revision='master',
0 comments (0 inline, 0 general)