kallithea Changeset - e5157e2a530e

Changeset - e5157e2a530e

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Marcin Kuzminski - 15 years ago 2010-09-01 23:38:03
marcin@python-works.com

added safe unicode funtion, and implemented it in whoosh indexer

2 files changed with 18 insertions and 5 deletions:

pylons_app/lib/helpers.py

pylons_app/lib/indexers/daemon.py

0 comments (0 inline, 0 general)

pylons_app/lib/helpers.py

➞

Show inline comments

 """Helper functions
 Consists of functions to typically be used within templates, but also
 available to Controllers. This module is available to both as 'h'.
 """
 from pygments.formatters import HtmlFormatter
 from pygments import highlight as code_highlight
 from pylons import url, app_globals as g
 from pylons.i18n.translation import _, ungettext
 from vcs.utils.annotate import annotate_highlight
 from webhelpers.html import literal, HTML, escape
 from webhelpers.html.tools import *
 from webhelpers.html.builder import make_tag
 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
     end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
     link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
     password, textarea, title, ul, xml_declaration, radio
 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
     mail_to, strip_links, strip_tags, tag_re
 from webhelpers.number import format_byte_size, format_bit_size
 from webhelpers.pylonslib import Flash as _Flash
 from webhelpers.pylonslib.secure_form import secure_form
 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
     convert_misc_entities, lchop, plural, rchop, remove_formatting, \
     replace_whitespace, urlify, truncate, wrap_paragraphs
 #Custom helpers here :)
 class _Link(object):
     '''
     Make a url based on label and url with help of url_for
     @param label:name of link    if not defined url is used
     @param url: the url for link
     '''
     def __call__(self, label='', *url_, **urlargs):
         if label is None or '':
             label = url
         link_fn = link_to(label, url(*url_, **urlargs))
         return link_fn
 link = _Link()
 class _GetError(object):
     def __call__(self, field_name, form_errors):
         tmpl = """<span class="error_msg">%s</span>"""
         if form_errors and form_errors.has_key(field_name):
             return literal(tmpl % form_errors.get(field_name))
 get_error = _GetError()
 def recursive_replace(str, replace=' '):
     """
     Recursive replace of given sign to just one instance
     @param str: given string
     @param replace:char to find and replace multiple instances
     Examples::
     >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
     'Mighty-Mighty-Bo-sstones'
     """
     if str.find(replace * 2) == -1:
         return str
     else:
         str = str.replace(replace * 2, replace)
         return recursive_replace(str, replace)
 class _ToolTip(object):
     def __call__(self, tooltip_title, trim_at=50):
         """
         Special function just to wrap our text into nice formatted autowrapped
         text
         @param tooltip_title:
         """
         return literal(wrap_paragraphs(tooltip_title, trim_at)\
                        .replace('\n', '<br/>'))
     def activate(self):
         """
         Adds tooltip mechanism to the given Html all tooltips have to have
         set class tooltip and set attribute tooltip_title.
         Then a tooltip will be generated based on that
         All with yui js tooltip
         """
         js = '''
         YAHOO.util.Event.onDOMReady(function(){
             function toolTipsId(){
                 var ids = [];
                 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
                 for (var i = 0; i < tts.length; i++) {
                     //if element doesn not have and id autgenerate one for tooltip
                     if (!tts[i].id){
                         tts[i].id='tt'+i*100;
+                    }
                     ids.push(tts[i].id);
+                }
                 return ids
             };
             var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
                 context: toolTipsId(),
                 monitorresize:false,
                 xyoffset :[0,0],
                 autodismissdelay:300000,
                 hidedelay:5,
                 showdelay:20,
             });
             //Mouse Over event disabled for new repositories since they dont
             //have last commit message
             myToolTips.contextMouseOverEvent.subscribe(
                 function(type, args) {
                     var context = args[0];
                     var txt = context.getAttribute('tooltip_title');
                     if(txt){
                         return true;
+                    }
                     else{
                         return false;
+                    }
                 });
             // Set the text for the tooltip just before we display it. Lazy method
             myToolTips.contextTriggerEvent.subscribe(
                  function(type, args) {
                         var context = args[0];
                         var txt = context.getAttribute('tooltip_title');
                         this.cfg.setProperty("text", txt);
                         // positioning of tooltip
                         var tt_w = this.element.clientWidth;
                         var tt_h = this.element.clientHeight;
                         var context_w = context.offsetWidth;
                         var context_h = context.offsetHeight;
                         var pos_x = YAHOO.util.Dom.getX(context);
                         var pos_y = YAHOO.util.Dom.getY(context);
                         var display_strategy = 'top';
                         var xy_pos = [0,0];
                         switch (display_strategy){
                             case 'top':
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = pos_y-tt_h-4;
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'bottom':
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = pos_y+context_h+4;
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'left':
                                 var cur_x = (pos_x-tt_w-4);
                                 var cur_y = pos_y-((tt_h/2)-context_h/2);
                                 xy_pos = [cur_x,cur_y];
                                 break;
                             case 'right':
                                 var cur_x = (pos_x+context_w+4);
                                 var cur_y = pos_y-((tt_h/2)-context_h/2);
                                 xy_pos = [cur_x,cur_y];
                                 break;
                              default:
                                 var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                 var cur_y = pos_y-tt_h-4;
                                 xy_pos = [cur_x,cur_y];
                                 break;
+                        }
                         this.cfg.setProperty("xy",xy_pos);
                   });
             //Mouse out
             myToolTips.contextMouseOutEvent.subscribe(
                 function(type, args) {
                     var context = args[0];
                 });
         });
         '''
         return literal(js)
 tooltip = _ToolTip()
 class _FilesBreadCrumbs(object):
     def __call__(self, repo_name, rev, paths):
         url_l = [link_to(repo_name, url('files_home',
                                         repo_name=repo_name,
                                         revision=rev, f_path=''))]
         paths_l = paths.split('/')
         for cnt, p in enumerate(paths_l, 1):
             if p != '':
                 url_l.append(link_to(p, url('files_home',
                                             repo_name=repo_name,
                                             revision=rev,
                                             f_path='/'.join(paths_l[:cnt]))))
         return literal(' / '.join(url_l))
 files_breadcrumbs = _FilesBreadCrumbs()
 def pygmentize(filenode, **kwargs):
     """
     pygmentize function using pygments
     @param filenode:
     """
     return literal(code_highlight(filenode.content,
                                   filenode.lexer, HtmlFormatter(**kwargs)))
 def pygmentize_annotation(filenode, **kwargs):
     """
     pygmentize function for annotation
     @param filenode:
     """
     color_dict = {}
     def gen_color():
         """generator for getting 10k of evenly distibuted colors using hsv color
         and golden ratio.
         """
         import colorsys
         n = 10000
         golden_ratio = 0.618033988749895
         h = 0.22717784590367374
         #generate 10k nice web friendly colors in the same order
         for c in xrange(n):
             h +=golden_ratio
             h %= 1
             HSV_tuple = [h, 0.95, 0.95]
             RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
             yield map(lambda x:str(int(x*256)),RGB_tuple)
     cgenerator = gen_color()
     def get_color_string(cs):
         if color_dict.has_key(cs):
             col = color_dict[cs]
         else:
             col = color_dict[cs] = cgenerator.next()
         return "color: rgb(%s)! important;" % (', '.join(col))
     def url_func(changeset):
         tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
         " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
         tooltip_html = tooltip_html % (changeset.author,
                                                changeset.date,
                                                tooltip(changeset.message))
         lnk_format = 'r%-5s:%s' % (changeset.revision,
                                  changeset.raw_id)
         uri = link_to(
                 lnk_format,
                 url('changeset_home', repo_name=changeset.repository.name,
                     revision=changeset.raw_id),
                 style=get_color_string(changeset.raw_id),
                 class_='tooltip',
                 tooltip_title=tooltip_html
+              )
         uri += '\n'
         return uri
     return literal(annotate_highlight(filenode, url_func, **kwargs))
 def repo_name_slug(value):
     """
     Return slug of name of repository
     """
     slug = urlify(value)
     for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
         slug = slug.replace(c, '-')
     slug = recursive_replace(slug, '-')
     return slug
 flash = _Flash()
 #===============================================================================
 # MERCURIAL FILTERS available via h.
 #===============================================================================
 from mercurial import util
 from mercurial.templatefilters import age as _age, person as _person
 age = lambda  x:_age(x)
 capitalize = lambda x: x.capitalize()
 date = lambda x: util.datestr(x)
 email = util.email
 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
 person = lambda x: _person(x)
 hgdate = lambda  x: "%d %d" % x
 isodate = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
 isodatesec = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
 localdate = lambda  x: (x[0], util.makedate()[1])
 rfc822date = lambda  x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
 rfc3339date = lambda  x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
 time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
 #===============================================================================
 # PERMS
 #===============================================================================
 from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
 HasRepoPermissionAny, HasRepoPermissionAll
 #===============================================================================
 # GRAVATAR URL
 #===============================================================================
 import hashlib
 import urllib
 from pylons import request
 def gravatar_url(email_address, size=30):
     ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
     default = 'identicon'
     baseurl_nossl = "http://www.gravatar.com/avatar/"
     baseurl_ssl = "https://secure.gravatar.com/avatar/"
     baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
     # construct the url
     gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
     gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
     return gravatar_url
 def safe_unicode(str):
     """safe unicode function. In case of UnicodeDecode error we try to return
     unicode with errors replace, if this failes we return unicode with
     string_escape decoding """
     try:
         u_str = unicode(str)
     except UnicodeDecodeError:
         try:
             u_str = unicode(str, 'utf-8', 'replace')
         except UnicodeDecodeError:
             #incase we have a decode error just represent as byte string
             u_str = unicode(str(str).encode('string_escape'))
     return u_str
@@ \ No newline at end of file @@

pylons_app/lib/indexers/daemon.py

➞

Show inline comments

 #!/usr/bin/env python
 # encoding: utf-8
 # whoosh indexer daemon for hg-app
 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
+#
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2
 # of the License or (at your opinion) any later version of the license.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 # MA  02110-1301, USA.
 """
 Created on Jan 26, 2010
 @author: marcink
 A deamon will read from task table and run tasks
 """
 import sys
 import os
 from os.path import dirname as dn
 from os.path import join as jn
 #to get the pylons_app import
 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 from pidlock import LockHeld, DaemonLock
 import traceback
 from pylons_app.config.environment import load_environment
 from pylons_app.model.hg_model import HgModel
 from pylons_app.lib.helpers import safe_unicode
 from whoosh.index import create_in, open_dir
 from shutil import rmtree
 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
 SCHEMA, IDX_NAME
 import logging
 import logging.config
 logging.config.fileConfig(jn(project_path, 'development.ini'))
 log = logging.getLogger('whooshIndexer')
 def scan_paths(root_location):
     return HgModel.repo_scan('/', root_location, None, True)
 class WhooshIndexingDaemon(object):
     """Deamon for atomic jobs"""
     def __init__(self, indexname='HG_INDEX', repo_location=None):
         self.indexname = indexname
         self.repo_location = repo_location
     def get_paths(self, root_dir):
         """recursive walk in root dir and return a set of all path in that dir
         excluding files in .hg dir"""
         index_paths_ = set()
         for path, dirs, files in os.walk(root_dir):
             if path.find('.hg') == -1:
                 for f in files:
                     index_paths_.add(jn(path, f))
         return index_paths_
     def add_doc(self, writer, path, repo):
         """Adding doc to writer"""
         ext = unicode(path.split('/')[-1].split('.')[-1].lower())
         #we just index the content of choosen files
         if ext in INDEX_EXTENSIONS:
             log.debug('    >> %s [WITH CONTENT]' % path)
             fobj = open(path, 'rb')
             content = fobj.read()
             fobj.close()
             try:
                 u_content = unicode(content)
             except UnicodeDecodeError:
                 #incase we have a decode error just represent as byte string
                 u_content = unicode(str(content).encode('string_escape'))
             u_content = safe_unicode(content)
         else:
             log.debug('    >> %s' % path)
             #just index file name without it's content
             u_content = u''
         try:
             os.stat(path)
             writer.add_document(owner=unicode(repo.contact),
                             repository=u"%s" % repo.name,
                             path=u"%s" % path,
                             content=u_content,
                             modtime=os.path.getmtime(path),
                             extension=ext)
         except OSError, e:
             import errno
             if e.errno == errno.ENOENT:
                 log.debug('path %s does not exist or is a broken symlink' % path)
             else:
                 raise e
     def build_index(self):
         if os.path.exists(IDX_LOCATION):
             log.debug('removing previos index')
             rmtree(IDX_LOCATION)
         if not os.path.exists(IDX_LOCATION):
             os.mkdir(IDX_LOCATION)
         idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
         writer = idx.writer()
         for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
             log.debug('building index @ %s' % repo.path)
             for idx_path in self.get_paths(repo.path):
                 self.add_doc(writer, idx_path, repo)
         writer.commit(merge=True)
         log.debug('>>> FINISHED BUILDING INDEX <<<')
     def update_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE')
         idx = open_dir(IDX_LOCATION, indexname=self.indexname)
         # The set of all paths in the index
         indexed_paths = set()
         # The set of all paths we need to re-index
         to_index = set()
         reader = idx.reader()
         writer = idx.writer()
         # Loop over the stored fields in the index
         for fields in reader.all_stored_fields():
             indexed_path = fields['path']
             indexed_paths.add(indexed_path)
             if not os.path.exists(indexed_path):
                 # This file was deleted since it was indexed
                 log.debug('removing from index %s' % indexed_path)
                 writer.delete_by_term('path', indexed_path)
             else:
                 # Check if this file was changed since it
                 # was indexed
                 indexed_time = fields['modtime']
                 mtime = os.path.getmtime(indexed_path)
                 if mtime > indexed_time:
                     # The file has changed, delete it and add it to the list of
                     # files to reindex
                     log.debug('adding to reindex list %s' % indexed_path)
                     writer.delete_by_term('path', indexed_path)
                     to_index.add(indexed_path)
                     #writer.commit()
         # Loop over the files in the filesystem
         # Assume we have a function that gathers the filenames of the
         # documents to be indexed
         for repo in scan_paths(self.repo_location).values():
             for path in self.get_paths(repo.path):
                 if path in to_index or path not in indexed_paths:
                     # This is either a file that's changed, or a new file
                     # that wasn't indexed before. So index it!
                     self.add_doc(writer, path, repo)
                     log.debug('reindexing %s' % path)
         writer.commit(merge=True)
         #idx.optimize()
         log.debug('>>> FINISHED <<<')
     def run(self, full_index=False):
         """Run daemon"""
         if full_index:
             self.build_index()
         else:
             self.update_index()
 if __name__ == "__main__":
     repo_location = '/home/marcink/hg_repos/*'
     full_index = True # False means looking just for changes
     try:
         l = DaemonLock()
         WhooshIndexingDaemon(repo_location=repo_location)\
             .run(full_index=full_index)
         l.release()
     except LockHeld:
         sys.exit(1)

0 comments (0 inline, 0 general)