kallithea Changeset - ea5ff843b200

Changeset - ea5ff843b200

Parent rev.

Child rev.

[Not reviewed]

beta

0 3 0

Marcin Kuzminski - 14 years ago 2012-04-18 02:07:22
marcin@python-works.com

#426 fixed mention extracting regex

3 files changed with 22 insertions and 11 deletions:

rhodecode/lib/markup_renderer.py

rhodecode/lib/utils2.py

rhodecode/tests/test_libs.py

0 comments (0 inline, 0 general)

rhodecode/lib/markup_renderer.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     rhodecode.lib.markup_renderer
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     Renderer for markup languages with ability to parse using rst or markdown
     :created_on: Oct 27, 2011
     :author: marcink
     :copyright: (C) 2011-2012 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import re
 import logging
 from rhodecode.lib.utils2 import safe_unicode
+from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX
 log = logging.getLogger(__name__)
 class MarkupRenderer(object):
     RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
     MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
     RST_PAT = re.compile(r're?st', re.IGNORECASE)
     PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
     def __detect_renderer(self, source, filename=None):
         """
         runs detection of what renderer should be used for generating html
         from a markup language
         filename can be also explicitly a renderer name
         :param source:
         :param filename:
         """
         if MarkupRenderer.MARKDOWN_PAT.findall(filename):
             detected_renderer = 'markdown'
         elif MarkupRenderer.RST_PAT.findall(filename):
             detected_renderer = 'rst'
         elif MarkupRenderer.PLAIN_PAT.findall(filename):
             detected_renderer = 'rst'
         else:
             detected_renderer = 'plain'
         return getattr(MarkupRenderer, detected_renderer)
     def render(self, source, filename=None):
         """
         Renders a given filename using detected renderer
         it detects renderers based on file extension or mimetype.
         At last it will just do a simple html replacing new lines with <br/>
         :param file_name:
         :param source:
         """
         renderer = self.__detect_renderer(source, filename)
         readme_data = renderer(source)
         return readme_data
     @classmethod
     def plain(cls, source):
         source = safe_unicode(source)
         def urlify_text(text):
             url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
                                  '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
             def url_func(match_obj):
                 url_full = match_obj.groups()[0]
                 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
             return url_pat.sub(url_func, text)
         source = urlify_text(source)
         return '<br />' + source.replace("\n", '<br />')
     @classmethod
     def markdown(cls, source):
         source = safe_unicode(source)
         try:
             import markdown as __markdown
             return __markdown.markdown(source, ['codehilite'])
         except ImportError:
             log.warning('Install markdown to use this function')
             return cls.plain(source)
     @classmethod
     def rst(cls, source):
         source = safe_unicode(source)
         try:
             from docutils.core import publish_parts
             from docutils.parsers.rst import directives
             docutils_settings = dict([(alias, None) for alias in
                                 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
             docutils_settings.update({'input_encoding': 'unicode',
                                       'report_level': 4})
             for k, v in docutils_settings.iteritems():
                 directives.register_directive(k, v)
             parts = publish_parts(source=source,
                                   writer_name="html4css1",
                                   settings_overrides=docutils_settings)
             return parts['html_title'] + parts["fragment"]
         except ImportError:
             log.warning('Install docutils to use this function')
             return cls.plain(source)
     @classmethod
     def rst_with_mentions(cls, source):
-        mention_pat = re.compile(r'(?:^@|\s@)(\w+)')
+        mention_pat = re.compile(MENTIONS_REGEX)
         def wrapp(match_obj):
             uname = match_obj.groups()[0]
             return ' **@%(uname)s** ' % {'uname':uname}
         mention_hl = mention_pat.sub(wrapp, source).strip()
         return cls.rst(mention_hl)

rhodecode/lib/utils2.py

➞

Show inline comments

@@ @@ -203,203 +203,207 @@ def safe_str(unicode_, to_encoding=None) @@
         return unicode_
     if not to_encoding:
         import rhodecode
         DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
         to_encoding = DEFAULT_ENCODING
     try:
         return unicode_.encode(to_encoding)
     except UnicodeEncodeError:
         pass
     try:
         import chardet
         encoding = chardet.detect(unicode_)['encoding']
         print encoding
         if encoding is None:
             raise UnicodeEncodeError()
         return unicode_.encode(encoding)
     except (ImportError, UnicodeEncodeError):
         return unicode_.encode(to_encoding, 'replace')
     return safe_str
 def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
     """
     Custom engine_from_config functions that makes sure we use NullPool for
     file based sqlite databases. This prevents errors on sqlite. This only
     applies to sqlalchemy versions < 0.7.0
     """
     import sqlalchemy
     from sqlalchemy import engine_from_config as efc
     import logging
     if int(sqlalchemy.__version__.split('.')[1]) < 7:
         # This solution should work for sqlalchemy < 0.7.0, and should use
         # proxy=TimerProxy() for execution time profiling
         from sqlalchemy.pool import NullPool
         url = configuration[prefix + 'url']
         if url.startswith('sqlite'):
             kwargs.update({'poolclass': NullPool})
         return efc(configuration, prefix, **kwargs)
     else:
         import time
         from sqlalchemy import event
         from sqlalchemy.engine import Engine
         log = logging.getLogger('sqlalchemy.engine')
         BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = xrange(30, 38)
         engine = efc(configuration, prefix, **kwargs)
         def color_sql(sql):
             COLOR_SEQ = "\033[1;%dm"
             COLOR_SQL = YELLOW
             normal = '\x1b[0m'
             return ''.join([COLOR_SEQ % COLOR_SQL, sql, normal])
         if configuration['debug']:
             #attach events only for debug configuration
             def before_cursor_execute(conn, cursor, statement,
                                     parameters, context, executemany):
                 context._query_start_time = time.time()
                 log.info(color_sql(">>>>> STARTING QUERY >>>>>"))
             def after_cursor_execute(conn, cursor, statement,
                                     parameters, context, executemany):
                 total = time.time() - context._query_start_time
                 log.info(color_sql("<<<<< TOTAL TIME: %f <<<<<" % total))
             event.listen(engine, "before_cursor_execute",
                          before_cursor_execute)
             event.listen(engine, "after_cursor_execute",
                          after_cursor_execute)
     return engine
 def age(curdate):
     """
     turns a datetime into an age string.
     :param curdate: datetime object
     :rtype: unicode
     :returns: unicode words describing age
     """
     from datetime import datetime
     from webhelpers.date import time_ago_in_words
     _ = lambda s: s
     if not curdate:
         return ''
     agescales = [(_(u"year"), 3600 * 24 * 365),
                  (_(u"month"), 3600 * 24 * 30),
                  (_(u"day"), 3600 * 24),
                  (_(u"hour"), 3600),
                  (_(u"minute"), 60),
                  (_(u"second"), 1), ]
     age = datetime.now() - curdate
     age_seconds = (age.days * agescales[2][1]) + age.seconds
     pos = 1
     for scale in agescales:
         if scale[1] <= age_seconds:
             if pos == 6:
                 pos = 5
             return '%s %s' % (time_ago_in_words(curdate,
                                                 agescales[pos][0]), _('ago'))
         pos += 1
     return _(u'just now')
 def uri_filter(uri):
     """
     Removes user:password from given url string
     :param uri:
     :rtype: unicode
     :returns: filtered list of strings
     """
     if not uri:
         return ''
     proto = ''
     for pat in ('https://', 'http://'):
         if uri.startswith(pat):
             uri = uri[len(pat):]
             proto = pat
             break
     # remove passwords and username
     uri = uri[uri.find('@') + 1:]
     # get the port
     cred_pos = uri.find(':')
     if cred_pos == -1:
         host, port = uri, None
     else:
         host, port = uri[:cred_pos], uri[cred_pos + 1:]
     return filter(None, [proto, host, port])
 def credentials_filter(uri):
     """
     Returns a url with removed credentials
     :param uri:
     """
     uri = uri_filter(uri)
     #check if we have port
     if len(uri) > 2 and uri[2]:
         uri[2] = ':' + uri[2]
     return ''.join(uri)
 def get_changeset_safe(repo, rev):
     """
     Safe version of get_changeset if this changeset doesn't exists for a
     repo it returns a Dummy one instead
     :param repo:
     :param rev:
     """
     from rhodecode.lib.vcs.backends.base import BaseRepository
     from rhodecode.lib.vcs.exceptions import RepositoryError
     if not isinstance(repo, BaseRepository):
         raise Exception('You must pass an Repository '
                         'object as first argument got %s', type(repo))
     try:
         cs = repo.get_changeset(rev)
     except RepositoryError:
         from rhodecode.lib.utils import EmptyChangeset
         cs = EmptyChangeset(requested_revision=rev)
     return cs
 MENTIONS_REGEX = r'(?:^@|\s@)([a-zA-Z0-9]{1}[a-zA-Z0-9\-\_\.]+)(?:\s{1})'
 def extract_mentioned_users(s):
     """
     Returns unique usernames from given string s that have @mention
     :param s: string to get mentions
     """
     usrs = {}
     for username in re.findall(r'(?:^@|\s@)(\w+)', s):
         usrs[username] = username
     usrs = set()
     for username in re.findall(MENTIONS_REGEX, s):
         usrs.add(username)
     return sorted(usrs.keys())
     return sorted(list(usrs), key=lambda k: k.lower())

rhodecode/tests/test_libs.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     rhodecode.tests.test_libs
     ~~~~~~~~~~~~~~~~~~~~~~~~~
     Package for testing various lib/helper functions in rhodecode
     :created_on: Jun 9, 2011
     :copyright: (C) 2011-2012 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import unittest
 from rhodecode.tests import *
 proto = 'http'
 TEST_URLS = [
     ('%s://127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://marcink@127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://marcink:pass@127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://127.0.0.1:8080' % proto, ['%s://' % proto, '127.0.0.1', '8080'],
      '%s://127.0.0.1:8080' % proto),
     ('%s://domain.org' % proto, ['%s://' % proto, 'domain.org'],
      '%s://domain.org' % proto),
     ('%s://user:pass@domain.org:8080' % proto, ['%s://' % proto, 'domain.org',
                                                 '8080'],
      '%s://domain.org:8080' % proto),
+]
 proto = 'https'
 TEST_URLS += [
     ('%s://127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://marcink@127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://marcink:pass@127.0.0.1' % proto, ['%s://' % proto, '127.0.0.1'],
      '%s://127.0.0.1' % proto),
     ('%s://127.0.0.1:8080' % proto, ['%s://' % proto, '127.0.0.1', '8080'],
      '%s://127.0.0.1:8080' % proto),
     ('%s://domain.org' % proto, ['%s://' % proto, 'domain.org'],
      '%s://domain.org' % proto),
     ('%s://user:pass@domain.org:8080' % proto, ['%s://' % proto, 'domain.org',
                                                 '8080'],
      '%s://domain.org:8080' % proto),
+]
 class TestLibs(unittest.TestCase):
     def test_uri_filter(self):
         from rhodecode.lib.utils2 import uri_filter
         for url in TEST_URLS:
             self.assertEqual(uri_filter(url[0]), url[1])
     def test_credentials_filter(self):
         from rhodecode.lib.utils2 import credentials_filter
         for url in TEST_URLS:
             self.assertEqual(credentials_filter(url[0]), url[2])
     def test_str2bool(self):
         from rhodecode.lib.utils2 import str2bool
         test_cases = [
             ('t', True),
             ('true', True),
             ('y', True),
             ('yes', True),
             ('on', True),
             ('1', True),
             ('Y', True),
             ('yeS', True),
             ('Y', True),
             ('TRUE', True),
             ('T', True),
             ('False', False),
             ('F', False),
             ('FALSE', False),
             ('0', False),
             ('-1', False),
             ('', False), ]
         for case in test_cases:
             self.assertEqual(str2bool(case[0]), case[1])
     def test_mention_extractor(self):
         from rhodecode.lib.utils2 import extract_mentioned_users
         sample = ("@first hi there @marcink here's my email marcin@email.com "
                   "@lukaszb check it pls @ ttwelve @D[] @one@two@three "
                   "@MARCIN    @maRCiN @2one_more22")
         s = ['2one_more22', 'D', 'MARCIN', 'first', 'lukaszb',
              'maRCiN', 'marcink', 'one']
         sample = (
             "@first hi there @marcink here's my email marcin@email.com "
             "@lukaszb check @one_more22 it pls @ ttwelve @D[] @one@two@three "
             "@MARCIN    @maRCiN @2one_more22 @john please see this http://org.pl "
             "@marian.user just do it @marco-polo and next extract @marco_polo "
             "user.dot  hej ! not-needed maril@domain.org"
+        )
         s = sorted([
         'first', 'marcink', 'lukaszb', 'one_more22', 'MARCIN', 'maRCiN', 'john',
         'marian.user', 'marco-polo', 'marco_polo'
         ], key=lambda k: k.lower())
         self.assertEqual(s, extract_mentioned_users(sample))

0 comments (0 inline, 0 general)