kallithea Changeset - 36fe593dfe4b

Changeset - 36fe593dfe4b

Parent rev.

Child rev.

[Not reviewed]

beta

0 3 0

Marcin Kuzminski - 15 years ago 2011-03-18 19:39:48
marcin@python-works.com

simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

3 files changed with 43 insertions and 35 deletions:

rhodecode/lib/__init__.py

rhodecode/lib/helpers.py

rhodecode/lib/indexers/daemon.py

0 comments (0 inline, 0 general)

rhodecode/lib/__init__.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     rhodecode.lib.__init__
     ~~~~~~~~~~~~~~~~~~~~~~~
     Some simple helper functions
     :created_on: Jan 5, 2011
     :author: marcink
     :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2
 # of the License or (at your opinion) any later version of the license.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 # MA  02110-1301, USA.
 def str2bool(v):
     if isinstance(v, (str, unicode)):
         obj = v.strip().lower()
         if obj in ['true', 'yes', 'on', 'y', 't', '1']:
             return True
         elif obj in ['false', 'no', 'off', 'n', 'f', '0']:
 def str2bool(s):
     if s is None:
             return False
         else:
             if not safe:
                 raise ValueError("String is not true/false: %r" % obj)
     return bool(obj)
     if s in (True, False):
         return s
     s = str(s).strip().lower()
     return s in ('t', 'true', 'y', 'yes', 'on', '1')
 def generate_api_key(username, salt=None):
     """
     Generates uniq API key for given username
     :param username: username as string
     :param salt: salt to hash generate KEY
     """
     from tempfile import _RandomNameSequence
     import hashlib
     if salt is None:
         salt = _RandomNameSequence().next()
     return hashlib.sha1(username + salt).hexdigest()
 def safe_unicode(str):
     """
     safe unicode function. In case of UnicodeDecode error we try to return
     unicode with errors replace, if this fails we return unicode with
     string_escape decoding
     """
     try:
         u_str = unicode(str)
     except UnicodeDecodeError:
         try:
             u_str = unicode(str, 'utf-8', 'replace')
         except UnicodeDecodeError:
             #incase we have a decode error just represent as byte string
             u_str = unicode(str(str).encode('string_escape'))
     return u_str

rhodecode/lib/helpers.py

➞

Show inline comments

 """Helper functions
 Consists of functions to typically be used within templates, but also
 available to Controllers. This module is available to both as 'h'.
 """
 import random
 import hashlib
 import StringIO
 import urllib
 from datetime import datetime
 from pygments.formatters import HtmlFormatter
 from pygments import highlight as code_highlight
 from pylons import url, request, config
 from pylons.i18n.translation import _, ungettext
 from webhelpers.html import literal, HTML, escape
 from webhelpers.html.tools import *
 from webhelpers.html.builder import make_tag
 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
     end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
     link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
     password, textarea, title, ul, xml_declaration, radio
 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
     mail_to, strip_links, strip_tags, tag_re
 from webhelpers.number import format_byte_size, format_bit_size
 from webhelpers.pylonslib import Flash as _Flash
 from webhelpers.pylonslib.secure_form import secure_form
 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
     convert_misc_entities, lchop, plural, rchop, remove_formatting, \
     replace_whitespace, urlify, truncate, wrap_paragraphs
 from webhelpers.date import time_ago_in_words
 from webhelpers.paginate import Page
 from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
     convert_boolean_attrs, NotGiven
 from vcs.utils.annotate import annotate_highlight
 from rhodecode.lib.utils import repo_name_slug
 from rhodecode.lib import str2bool
 def _reset(name, value=None, id=NotGiven, type="reset", **attrs):
     """Reset button
     """
     Reset button
     """
     _set_input_attrs(attrs, type, name, value)
     _set_id_attr(attrs, id, name)
     convert_boolean_attrs(attrs, ["disabled"])
     return HTML.input(**attrs)
 reset = _reset
 def get_token():
     """Return the current authentication token, creating one if one doesn't
     already exist.
     """
     token_key = "_authentication_token"
     from pylons import session
     if not token_key in session:
         try:
             token = hashlib.sha1(str(random.getrandbits(128))).hexdigest()
         except AttributeError: # Python < 2.4
             token = hashlib.sha1(str(random.randrange(2 ** 128))).hexdigest()
         session[token_key] = token
         if hasattr(session, 'save'):
             session.save()
     return session[token_key]
 class _GetError(object):
     """Get error from form_errors, and represent it as span wrapped error
     message
     :param field_name: field to fetch errors for
     :param form_errors: form errors dict
     """
     def __call__(self, field_name, form_errors):
         tmpl = """<span class="error_msg">%s</span>"""
         if form_errors and form_errors.has_key(field_name):
             return literal(tmpl % form_errors.get(field_name))
 get_error = _GetError()
 class _ToolTip(object):
     def __call__(self, tooltip_title, trim_at=50):
         """Special function just to wrap our text into nice formatted
         autowrapped text
         :param tooltip_title:
         """
         return wrap_paragraphs(escape(tooltip_title), trim_at)\
                        .replace('\n', '<br/>')
     def activate(self):
         """Adds tooltip mechanism to the given Html all tooltips have to have
         set class `tooltip` and set attribute `tooltip_title`.
         Then a tooltip will be generated based on that. All with yui js tooltip
         """
         js = '''
         YAHOO.util.Event.onDOMReady(function(){
             function toolTipsId(){
                 var ids = [];
                 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
                 for (var i = 0; i < tts.length; i++) {
                     //if element doesn't not have and id autogenerate one for tooltip
                     if (!tts[i].id){
                         tts[i].id='tt'+i*100;
+                    }
                     ids.push(tts[i].id);
+                }
                 return ids
             };
             var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
                 context: toolTipsId(),
                 monitorresize:false,
                 xyoffset :[0,0],
                 autodismissdelay:300000,
                 hidedelay:5,
                 showdelay:20,
             });
             // Set the text for the tooltip just before we display it. Lazy method
             myToolTips.contextTriggerEvent.subscribe(
                  function(type, args) {
                         var context = args[0];
                         //positioning of tooltip
                         var tt_w = this.element.clientWidth;//tooltip width
                         var tt_h = this.element.clientHeight;//tooltip height
                         var context_w = context.offsetWidth;
                         var context_h = context.offsetHeight;
                         var pos_x = YAHOO.util.Dom.getX(context);
                         var pos_y = YAHOO.util.Dom.getY(context);
                         var display_strategy = 'right';
                         var xy_pos = [0,0];
                         switch (display_strategy){
                             case 'top':
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = (pos_y-tt_h-4);
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'bottom':
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = pos_y+context_h+4;
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'left':
                                 var cur_x = (pos_x-tt_w-4);
                                 var cur_y = pos_y-((tt_h/2)-context_h/2);
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'right':
                                 var cur_x = (pos_x+context_w+4);
                                 var cur_y = pos_y-((tt_h/2)-context_h/2);
                                 xy_pos = [cur_x,cur_y];
                                 break;
                              default:
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = pos_y-tt_h-4;
                                 xy_pos = [cur_x,cur_y];
                                 break;
+                        }
                         this.cfg.setProperty("xy",xy_pos);
                   });
             //Mouse out
             myToolTips.contextMouseOutEvent.subscribe(
                 function(type, args) {
                     var context = args[0];
                 });
         });
         '''
         return literal(js)
 tooltip = _ToolTip()
 class _FilesBreadCrumbs(object):
     def __call__(self, repo_name, rev, paths):
         if isinstance(paths, str):
             paths = paths.decode('utf-8', 'replace')
         url_l = [link_to(repo_name, url('files_home',
                                         repo_name=repo_name,
                                         revision=rev, f_path=''))]
         paths_l = paths.split('/')
         for cnt, p in enumerate(paths_l):
             if p != '':
                 url_l.append(link_to(p, url('files_home',
                                             repo_name=repo_name,
                                             revision=rev,
                                             f_path='/'.join(paths_l[:cnt + 1]))))
         return literal('/'.join(url_l))
 files_breadcrumbs = _FilesBreadCrumbs()
 class CodeHtmlFormatter(HtmlFormatter):
     """My code Html Formatter for source codes
     """
     def wrap(self, source, outfile):
         return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
     def _wrap_code(self, source):
         for cnt, it in enumerate(source):
             i, t = it
             t = '<div id="L%s">%s</div>' % (cnt + 1, t)
             yield i, t
     def _wrap_tablelinenos(self, inner):
         dummyoutfile = StringIO.StringIO()
         lncount = 0
         for t, line in inner:
             if t:
                 lncount += 1
             dummyoutfile.write(line)
         fl = self.linenostart
         mw = len(str(lncount + fl - 1))
         sp = self.linenospecial
         st = self.linenostep
         la = self.lineanchors
         aln = self.anchorlinenos
         nocls = self.noclasses
         if sp:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if i % sp == 0:
                         if aln:
                             lines.append('<a href="#%s%d" class="special">%*d</a>' %
                                          (la, i, mw, i))
                         else:
                             lines.append('<span class="special">%*d</span>' % (mw, i))
                     else:
                         if aln:
                             lines.append('<a href="#%s%d">%*d</a>' % (la, i, mw, i))
                         else:
                             lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         else:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if aln:
                         lines.append('<a href="#%s%d">%*d</a>' % (la, i, mw, i))
                     else:
                         lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         # in case you wonder about the seemingly redundant <div> here: since the
         # content in the other cell also is wrapped in a div, some browsers in
         # some configurations seem to mess up the formatting...
         if nocls:
             yield 0, ('<table class="%stable">' % self.cssclass +
                       '<tr><td><div class="linenodiv" '
                       'style="background-color: #f0f0f0; padding-right: 10px">'
                       '<pre style="line-height: 125%">' +
                       ls + '</pre></div></td><td class="code">')
         else:
             yield 0, ('<table class="%stable">' % self.cssclass +
                       '<tr><td class="linenos"><div class="linenodiv"><pre>' +
                       ls + '</pre></div></td><td class="code">')
         yield 0, dummyoutfile.getvalue()
         yield 0, '</td></tr></table>'
 def pygmentize(filenode, **kwargs):
     """pygmentize function using pygments
     :param filenode:
     """
     return literal(code_highlight(filenode.content,
                                   filenode.lexer, CodeHtmlFormatter(**kwargs)))
 def pygmentize_annotation(filenode, **kwargs):
     """pygmentize function for annotation
     :param filenode:
     """
     color_dict = {}
     def gen_color(n=10000):
         """generator for getting n of evenly distributed colors using
         hsv color and golden ratio. It always return same order of colors
         :returns: RGB tuple
         """
         import colorsys
         golden_ratio = 0.618033988749895
         h = 0.22717784590367374
         for c in xrange(n):
             h += golden_ratio
             h %= 1
             HSV_tuple = [h, 0.95, 0.95]
             RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
             yield map(lambda x:str(int(x * 256)), RGB_tuple)
     cgenerator = gen_color()
     def get_color_string(cs):
         if color_dict.has_key(cs):
             col = color_dict[cs]
         else:
             col = color_dict[cs] = cgenerator.next()
         return "color: rgb(%s)! important;" % (', '.join(col))
     def url_func(changeset):
         tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>" + \
         " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
         tooltip_html = tooltip_html % (changeset.author,
                                                changeset.date,
                                                tooltip(changeset.message))
         lnk_format = '%5s:%s' % ('r%s' % changeset.revision,
                                  short_id(changeset.raw_id))
         uri = link_to(
                 lnk_format,
                 url('changeset_home', repo_name=changeset.repository.name,
                     revision=changeset.raw_id),
                 style=get_color_string(changeset.raw_id),
                 class_='tooltip',
                 title=tooltip_html
+              )
         uri += '\n'
         return uri
     return literal(annotate_highlight(filenode, url_func, **kwargs))
 def get_changeset_safe(repo, rev):
     from vcs.backends.base import BaseRepository
     from vcs.exceptions import RepositoryError
     if not isinstance(repo, BaseRepository):
         raise Exception('You must pass an Repository '
                         'object as first argument got %s', type(repo))
     try:
         cs = repo.get_changeset(rev)
     except RepositoryError:
         from rhodecode.lib.utils import EmptyChangeset
         cs = EmptyChangeset()
     return cs
 def is_following_repo(repo_name, user_id):
     from rhodecode.model.scm import ScmModel
     return ScmModel().is_following_repo(repo_name, user_id)
 flash = _Flash()
 #==============================================================================
 # MERCURIAL FILTERS available via h.
 #==============================================================================
 from mercurial import util
 from mercurial.templatefilters import person as _person
 def _age(curdate):
     """turns a datetime into an age string."""
     if not curdate:
         return ''
     from datetime import timedelta, datetime
     agescales = [("year", 3600 * 24 * 365),
                  ("month", 3600 * 24 * 30),
                  ("day", 3600 * 24),
                  ("hour", 3600),
                  ("minute", 60),
                  ("second", 1), ]
     age = datetime.now() - curdate
     age_seconds = (age.days * agescales[2][1]) + age.seconds
     pos = 1
     for scale in agescales:
         if scale[1] <= age_seconds:
             if pos == 6:pos = 5
             return time_ago_in_words(curdate, agescales[pos][0]) + ' ' + _('ago')
         pos += 1
     return _('just now')
 age = lambda  x:_age(x)
 capitalize = lambda x: x.capitalize()
 email = util.email
 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
 person = lambda x: _person(x)
 short_id = lambda x: x[:12]
 def bool2icon(value):
     """Returns True/False values represented as small html image of true/false
     icons
     :param value: bool value
     """
     if value is True:
         return HTML.tag('img', src=url("/images/icons/accept.png"),
                         alt=_('True'))
     if value is False:
         return HTML.tag('img', src=url("/images/icons/cancel.png"),
                         alt=_('False'))
     return value
 def action_parser(user_log, feed=False):
     """This helper will action_map the specified string action into translated
     fancy names with icons and links
     :param user_log: user log instance
     :param feed: use output for feeds (no html and fancy icons)
     """
     action = user_log.action
     action_params = ' '
     x = action.split(':')
     if len(x) > 1:
         action, action_params = x
     def get_cs_links():
         revs_limit = 5 #display this amount always
         revs_top_limit = 50 #show upto this amount of changesets hidden
         revs = action_params.split(',')
         repo_name = user_log.repository.repo_name
         from rhodecode.model.scm import ScmModel
         repo, dbrepo = ScmModel().get(repo_name, retval='repo',
                                       invalidation_list=[])
         message = lambda rev: get_changeset_safe(repo, rev).message
         cs_links = " " + ', '.join ([link_to(rev,
                 url('changeset_home',
                 repo_name=repo_name,
                 revision=rev), title=tooltip(message(rev)),
                 class_='tooltip') for rev in revs[:revs_limit] ])
         compare_view = (' <div class="compare_view tooltip" title="%s">'
                         '<a href="%s">%s</a> '
                         '</div>' % (_('Show all combined changesets %s->%s' \
                                       % (revs[0], revs[-1])),
                                     url('changeset_home', repo_name=repo_name,
                                         revision='%s...%s' % (revs[0], revs[-1])
                                     ),
                                     _('compare view'))
+                        )
         if len(revs) > revs_limit:
             uniq_id = revs[0]
             html_tmpl = ('<span> %s '
             '<a class="show_more" id="_%s" href="#more">%s</a> '
             '%s</span>')
             if not feed:
                 cs_links += html_tmpl % (_('and'), uniq_id, _('%s more') \
                                         % (len(revs) - revs_limit),
                                         _('revisions'))
             if not feed:
                 html_tmpl = '<span id="%s" style="display:none"> %s </span>'
             else:
                 html_tmpl = '<span id="%s"> %s </span>'
             cs_links += html_tmpl % (uniq_id, ', '.join([link_to(rev,
                 url('changeset_home',
                 repo_name=repo_name, revision=rev),
                 title=message(rev), class_='tooltip')
                 for rev in revs[revs_limit:revs_top_limit]]))
         if len(revs) > 1:
             cs_links += compare_view
         return cs_links
     def get_fork_name():
         repo_name = action_params
         return _('fork name ') + str(link_to(action_params, url('summary_home',
                                           repo_name=repo_name,)))
     action_map = {'user_deleted_repo':(_('[deleted] repository'), None),
            'user_created_repo':(_('[created] repository'), None),
            'user_forked_repo':(_('[forked] repository'), get_fork_name),
            'user_updated_repo':(_('[updated] repository'), None),
            'admin_deleted_repo':(_('[delete] repository'), None),
            'admin_created_repo':(_('[created] repository'), None),
            'admin_forked_repo':(_('[forked] repository'), None),
            'admin_updated_repo':(_('[updated] repository'), None),
            'push':(_('[pushed] into'), get_cs_links),
            'push_remote':(_('[pulled from remote] into'), get_cs_links),
            'pull':(_('[pulled] from'), None),
            'started_following_repo':(_('[started following] repository'), None),
            'stopped_following_repo':(_('[stopped following] repository'), None),
+            }
     action_str = action_map.get(action, action)
     if feed:
         action = action_str[0].replace('[', '').replace(']', '')
     else:
         action = action_str[0].replace('[', '<span class="journal_highlight">')\
                    .replace(']', '</span>')
     action_params_func = lambda :""
     if callable(action_str[1]):
         action_params_func = action_str[1]
     return [literal(action), action_params_func]
 def action_parser_icon(user_log):
     action = user_log.action
     action_params = None
     x = action.split(':')
     if len(x) > 1:
         action, action_params = x
     tmpl = """<img src="%s%s" alt="%s"/>"""
     map = {'user_deleted_repo':'database_delete.png',
            'user_created_repo':'database_add.png',
            'user_forked_repo':'arrow_divide.png',
            'user_updated_repo':'database_edit.png',
            'admin_deleted_repo':'database_delete.png',
            'admin_created_repo':'database_add.png',
            'admin_forked_repo':'arrow_divide.png',
            'admin_updated_repo':'database_edit.png',
            'push':'script_add.png',
            'push_remote':'connect.png',
            'pull':'down_16.png',
            'started_following_repo':'heart_add.png',
            'stopped_following_repo':'heart_delete.png',
+            }
     return literal(tmpl % ((url('/images/icons/')),
                            map.get(action, action), action))
 #==============================================================================
 # PERMS
 #==============================================================================
 from rhodecode.lib.auth import HasPermissionAny, HasPermissionAll, \
 HasRepoPermissionAny, HasRepoPermissionAll
 #==============================================================================
 # GRAVATAR URL
 #==============================================================================
 def gravatar_url(email_address, size=30):
     if not str2bool(config['app_conf'].get('use_gravatar')):
         return "/images/user%s.png" % size
     ssl_enabled = 'https' == request.environ.get('wsgi.url_scheme')
     default = 'identicon'
     baseurl_nossl = "http://www.gravatar.com/avatar/"
     baseurl_ssl = "https://secure.gravatar.com/avatar/"
     baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
     if isinstance(email_address, unicode):
         #hashlib crashes on unicode items
         email_address = email_address.encode('utf8', 'replace')
     # construct the url
     gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
     gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
     return gravatar_url
 #==============================================================================
 # REPO PAGER
 #==============================================================================
 class RepoPage(Page):
     def __init__(self, collection, page=1, items_per_page=20,
         item_count=None, url=None, branch_name=None, **kwargs):
         """Create a "RepoPage" instance. special pager for paging
         repository
         """
         self._url_generator = url
         # Safe the kwargs class-wide so they can be used in the pager() method
         self.kwargs = kwargs
         # Save a reference to the collection
         self.original_collection = collection
         self.collection = collection
         # The self.page is the number of the current page.
         # The first page has the number 1!
         try:
             self.page = int(page) # make it int() if we get it as a string
         except (ValueError, TypeError):
             self.page = 1
         self.items_per_page = items_per_page
         # Unless the user tells us how many items the collections has
         # we calculate that ourselves.
         if item_count is not None:
             self.item_count = item_count
         else:
             self.item_count = len(self.collection)
         # Compute the number of the first and last available page
         if self.item_count > 0:
             self.first_page = 1
             self.page_count = ((self.item_count - 1) / self.items_per_page) + 1
             self.last_page = self.first_page + self.page_count - 1
             # Make sure that the requested page number is the range of valid pages
             if self.page > self.last_page:
                 self.page = self.last_page
             elif self.page < self.first_page:
                 self.page = self.first_page
             # Note: the number of items on this page can be less than
             #       items_per_page if the last page is not full
             self.first_item = max(0, (self.item_count) - (self.page * items_per_page))
             self.last_item = ((self.item_count - 1) - items_per_page * (self.page - 1))
             iterator = self.collection.get_changesets(start=self.first_item,
                                                       end=self.last_item,
                                                       reverse=True,
                                                       branch_name=branch_name)
             self.items = list(iterator)
             # Links to previous and next page
             if self.page > self.first_page:
                 self.previous_page = self.page - 1
             else:
                 self.previous_page = None
             if self.page < self.last_page:
                 self.next_page = self.page + 1
             else:
                 self.next_page = None
         # No items available
         else:
             self.first_page = None
             self.page_count = 0
             self.last_page = None
             self.first_item = None
             self.last_item = None
             self.previous_page = None
             self.next_page = None
             self.items = []
         # This is a subclass of the 'list' type. Initialise the list now.
         list.__init__(self, self.items)
 def safe_unicode(str):
     """safe unicode function. In case of UnicodeDecode error we try to return
     unicode with errors replace, if this failes we return unicode with
     string_escape decoding """
     try:
         u_str = unicode(str)
     except UnicodeDecodeError:
         try:
             u_str = unicode(str, 'utf-8', 'replace')
         except UnicodeDecodeError:
             #incase we have a decode error just represent as byte string
             u_str = unicode(str(str).encode('string_escape'))
     return u_str
 def changed_tooltip(nodes):
     if nodes:
         pref = ': <br/> '
         suf = ''
         if len(nodes) > 30:
             suf = '<br/>' + _(' and %s more') % (len(nodes) - 30)
         return literal(pref + '<br/> '.join([x.path.decode('utf-8', 'replace') for x in nodes[:30]]) + suf)
     else:
         return ': ' + _('No Files')

rhodecode/lib/indexers/daemon.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     rhodecode.lib.indexers.daemon
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     A deamon will read from task table and run tasks
     :created_on: Jan 26, 2010
     :author: marcink
     :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
     :license: GPLv3, see COPYING for more details.
 """
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2
 # of the License or (at your opinion) any later version of the license.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 # MA  02110-1301, USA.
 import os
 import sys
-import os
+import logging
 import traceback
 from shutil import rmtree
 from time import mktime
 from os.path import dirname as dn
 from os.path import join as jn
 #to get the rhodecode import
 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 from rhodecode.model.scm import ScmModel
 from rhodecode.lib.helpers import safe_unicode
 from whoosh.index import create_in, open_dir
 from shutil import rmtree
 from rhodecode.lib import safe_unicode
 from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
 from time import mktime
 from vcs.exceptions import ChangesetError, RepositoryError
 import logging
 from whoosh.index import create_in, open_dir
 log = logging.getLogger('whooshIndexer')
 # create logger
 log.setLevel(logging.DEBUG)
 log.propagate = False
 # create console handler and set level to debug
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 # create formatter
 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 # add formatter to ch
 ch.setFormatter(formatter)
 # add ch to logger
 log.addHandler(ch)
 class WhooshIndexingDaemon(object):
     """
     Deamon for atomic jobs
     """
     def __init__(self, indexname='HG_INDEX', index_location=None,
                  repo_location=None, sa=None, repo_list=None):
         self.indexname = indexname
         self.index_location = index_location
         if not index_location:
             raise Exception('You have to provide index location')
         self.repo_location = repo_location
         if not repo_location:
             raise Exception('You have to provide repositories location')
         self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
         if repo_list:
             filtered_repo_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
                     filtered_repo_paths[repo.name] = repo
             self.repo_paths = filtered_repo_paths
         self.initial = False
         if not os.path.isdir(self.index_location):
             os.makedirs(self.index_location)
             log.info('Cannot run incremental index since it does not'
                      ' yet exist running full build')
             self.initial = True
     def get_paths(self, repo):
         """recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         try:
             tip = repo.get_changeset('tip')
             for topnode, dirs, files in tip.walk('/'):
                 for f in files:
                     index_paths_.add(jn(repo.path, f.path))
                 for dir in dirs:
                     for f in files:
                         index_paths_.add(jn(repo.path, f.path))
         except RepositoryError, e:
             log.debug(traceback.format_exc())
             pass
         return index_paths_
     def get_node(self, repo, path):
         n_path = path[len(repo.path) + 1:]
         node = repo.get_changeset().get_node(n_path)
         return node
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo):
         """Adding doc to writer this function itself fetches data from
         the instance of vcs backend"""
         node = self.get_node(repo, path)
         #we just index the content of chosen files, and skip binary files
         if node.extension in INDEX_EXTENSIONS and not node.is_binary:
             u_content = node.content
             if not isinstance(u_content, unicode):
                 log.warning('  >> %s Could not get this content as unicode '
                           'replacing with empty content', path)
                 u_content = u''
             else:
                 log.debug('    >> %s [WITH CONTENT]' % path)
         else:
             log.debug('    >> %s' % path)
             #just index file name without it's content
             u_content = u''
         writer.add_document(owner=unicode(repo.contact),
                         repository=safe_unicode(repo.name),
                         path=safe_unicode(path),
                         content=u_content,
                         modtime=self.get_node_mtime(node),
                         extension=node.extension)
     def build_index(self):
         if os.path.exists(self.index_location):
             log.debug('removing previous index')
             rmtree(self.index_location)
         if not os.path.exists(self.index_location):
             os.mkdir(self.index_location)
         idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
         writer = idx.writer()
         for repo in self.repo_paths.values():
             log.debug('building index @ %s' % repo.path)
             for idx_path in self.get_paths(repo):
                 self.add_doc(writer, idx_path, repo)
         log.debug('>> COMMITING CHANGES <<')
         writer.commit(merge=True)
         log.debug('>>> FINISHED BUILDING INDEX <<<')
     def update_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE')
         idx = open_dir(self.index_location, indexname=self.indexname)
         # The set of all paths in the index
         indexed_paths = set()
         # The set of all paths we need to re-index
         to_index = set()
         reader = idx.reader()
         writer = idx.writer()
         # Loop over the stored fields in the index
         for fields in reader.all_stored_fields():
             indexed_path = fields['path']
             indexed_paths.add(indexed_path)
             repo = self.repo_paths[fields['repository']]
             try:
                 node = self.get_node(repo, indexed_path)
             except ChangesetError:
                 # This file was deleted since it was indexed
                 log.debug('removing from index %s' % indexed_path)
                 writer.delete_by_term('path', indexed_path)
             else:
                 # Check if this file was changed since it was indexed
                 indexed_time = fields['modtime']
                 mtime = self.get_node_mtime(node)
                 if mtime > indexed_time:
                     # The file has changed, delete it and add it to the list of
                     # files to reindex
                     log.debug('adding to reindex list %s' % indexed_path)
                     writer.delete_by_term('path', indexed_path)
                     to_index.add(indexed_path)
         # Loop over the files in the filesystem
         # Assume we have a function that gathers the filenames of the
         # documents to be indexed
         for repo in self.repo_paths.values():
             for path in self.get_paths(repo):
                 if path in to_index or path not in indexed_paths:
                     # This is either a file that's changed, or a new file
                     # that wasn't indexed before. So index it!
                     self.add_doc(writer, path, repo)
                     log.debug('re indexing %s' % path)
         log.debug('>> COMMITING CHANGES <<')
         writer.commit(merge=True)
         log.debug('>>> FINISHED REBUILDING INDEX <<<')
     def run(self, full_index=False):
         """Run daemon"""
         if full_index or self.initial:
             self.build_index()
         else:
             self.update_index()

0 comments (0 inline, 0 general)