Changeset - e5157e2a530e
[Not reviewed]
default
0 2 0
Marcin Kuzminski - 15 years ago 2010-09-01 23:38:03
marcin@python-works.com
added safe unicode funtion, and implemented it in whoosh indexer
2 files changed with 18 insertions and 5 deletions:
0 comments (0 inline, 0 general)
pylons_app/lib/helpers.py
Show inline comments
 
"""Helper functions
 

	
 
Consists of functions to typically be used within templates, but also
 
available to Controllers. This module is available to both as 'h'.
 
"""
 
from pygments.formatters import HtmlFormatter
 
from pygments import highlight as code_highlight
 
from pylons import url, app_globals as g
 
from pylons.i18n.translation import _, ungettext
 
from vcs.utils.annotate import annotate_highlight
 
from webhelpers.html import literal, HTML, escape
 
from webhelpers.html.tools import *
 
from webhelpers.html.builder import make_tag
 
from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
 
    end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
 
    link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
 
    password, textarea, title, ul, xml_declaration, radio
 
from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
 
    mail_to, strip_links, strip_tags, tag_re
 
from webhelpers.number import format_byte_size, format_bit_size
 
from webhelpers.pylonslib import Flash as _Flash
 
from webhelpers.pylonslib.secure_form import secure_form
 
from webhelpers.text import chop_at, collapse, convert_accented_entities, \
 
    convert_misc_entities, lchop, plural, rchop, remove_formatting, \
 
    replace_whitespace, urlify, truncate, wrap_paragraphs
 

	
 
#Custom helpers here :)
 
class _Link(object):
 
    '''
 
    Make a url based on label and url with help of url_for
 
    @param label:name of link    if not defined url is used
 
    @param url: the url for link
 
    '''
 

	
 
    def __call__(self, label='', *url_, **urlargs):
 
        if label is None or '':
 
            label = url
 
        link_fn = link_to(label, url(*url_, **urlargs))
 
        return link_fn
 

	
 
link = _Link()
 

	
 
class _GetError(object):
 

	
 
    def __call__(self, field_name, form_errors):
 
        tmpl = """<span class="error_msg">%s</span>"""
 
        if form_errors and form_errors.has_key(field_name):
 
            return literal(tmpl % form_errors.get(field_name))
 

	
 
get_error = _GetError()
 

	
 
def recursive_replace(str, replace=' '):
 
    """
 
    Recursive replace of given sign to just one instance
 
    @param str: given string
 
    @param replace:char to find and replace multiple instances
 
        
 
    Examples::
 
    >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
 
    'Mighty-Mighty-Bo-sstones'
 
    """
 

	
 
    if str.find(replace * 2) == -1:
 
        return str
 
    else:
 
        str = str.replace(replace * 2, replace)
 
        return recursive_replace(str, replace)  
 

	
 
class _ToolTip(object):
 
    
 
    def __call__(self, tooltip_title, trim_at=50):
 
        """
 
        Special function just to wrap our text into nice formatted autowrapped
 
        text
 
        @param tooltip_title:
 
        """
 
        
 
        return literal(wrap_paragraphs(tooltip_title, trim_at)\
 
                       .replace('\n', '<br/>')) 
 
    
 
    def activate(self):
 
        """
 
        Adds tooltip mechanism to the given Html all tooltips have to have 
 
        set class tooltip and set attribute tooltip_title.
 
        Then a tooltip will be generated based on that
 
        All with yui js tooltip
 
        """
 
        
 
        js = '''
 
        YAHOO.util.Event.onDOMReady(function(){
 
            function toolTipsId(){
 
                var ids = [];
 
                var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
 
                
 
                for (var i = 0; i < tts.length; i++) {
 
                    //if element doesn not have and id autgenerate one for tooltip
 
                    
 
                    if (!tts[i].id){
 
                        tts[i].id='tt'+i*100;
 
                    }
 
                    ids.push(tts[i].id);
 
                }
 
                return ids        
 
            };
 
            var myToolTips = new YAHOO.widget.Tooltip("tooltip", { 
 
                context: toolTipsId(),
 
                monitorresize:false,
 
                xyoffset :[0,0],
 
                autodismissdelay:300000,
 
                hidedelay:5,
 
                showdelay:20,
 
            });
 
            
 
            //Mouse Over event disabled for new repositories since they dont
 
            //have last commit message
 
            myToolTips.contextMouseOverEvent.subscribe(
 
                function(type, args) {
 
                    var context = args[0];
 
                    var txt = context.getAttribute('tooltip_title');
 
                    if(txt){                                       
 
                        return true;
 
                    }
 
                    else{
 
                        return false;
 
                    }
 
                });
 
            
 
                            
 
            // Set the text for the tooltip just before we display it. Lazy method
 
            myToolTips.contextTriggerEvent.subscribe( 
 
                 function(type, args) { 
 

	
 
                 
 
                        var context = args[0]; 
 
                        
 
                        var txt = context.getAttribute('tooltip_title');
 
                        this.cfg.setProperty("text", txt);
 
                        
 
                        
 
                        // positioning of tooltip
 
                        var tt_w = this.element.clientWidth;
 
                        var tt_h = this.element.clientHeight;
 
                        
 
                        var context_w = context.offsetWidth;
 
                        var context_h = context.offsetHeight;
 
                        
 
                        var pos_x = YAHOO.util.Dom.getX(context);
 
                        var pos_y = YAHOO.util.Dom.getY(context);
 

	
 
                        var display_strategy = 'top';
 
                        var xy_pos = [0,0];
 
                        switch (display_strategy){
 
                        
 
                            case 'top':
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y-tt_h-4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'bottom':
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y+context_h+4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'left':
 
                                var cur_x = (pos_x-tt_w-4);
 
                                var cur_y = pos_y-((tt_h/2)-context_h/2);
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'right':
 
                                var cur_x = (pos_x+context_w+4);
 
                                var cur_y = pos_y-((tt_h/2)-context_h/2);
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                             default:
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y-tt_h-4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;                             
 
                                 
 
                        }
 

	
 
                        this.cfg.setProperty("xy",xy_pos);
 

	
 
                  });
 
                  
 
            //Mouse out 
 
            myToolTips.contextMouseOutEvent.subscribe(
 
                function(type, args) {
 
                    var context = args[0];
 
                    
 
                });
 
        });
 
        '''         
 
        return literal(js)
 

	
 
tooltip = _ToolTip()
 

	
 
class _FilesBreadCrumbs(object):
 
    
 
    def __call__(self, repo_name, rev, paths):
 
        url_l = [link_to(repo_name, url('files_home',
 
                                        repo_name=repo_name,
 
                                        revision=rev, f_path=''))]
 
        paths_l = paths.split('/')
 
        
 
        for cnt, p in enumerate(paths_l, 1):
 
            if p != '':
 
                url_l.append(link_to(p, url('files_home',
 
                                            repo_name=repo_name,
 
                                            revision=rev,
 
                                            f_path='/'.join(paths_l[:cnt]))))
 

	
 
        return literal(' / '.join(url_l))
 

	
 
files_breadcrumbs = _FilesBreadCrumbs()
 

	
 
def pygmentize(filenode, **kwargs):
 
    """
 
    pygmentize function using pygments
 
    @param filenode:
 
    """
 
    return literal(code_highlight(filenode.content, 
 
                                  filenode.lexer, HtmlFormatter(**kwargs)))
 

	
 
def pygmentize_annotation(filenode, **kwargs):
 
    """
 
    pygmentize function for annotation
 
    @param filenode:
 
    """
 
    
 
    color_dict = {}
 
    def gen_color():
 
        """generator for getting 10k of evenly distibuted colors using hsv color
 
        and golden ratio.
 
        """        
 
        import colorsys
 
        n = 10000
 
        golden_ratio = 0.618033988749895
 
        h = 0.22717784590367374
 
        #generate 10k nice web friendly colors in the same order
 
        for c in xrange(n):
 
            h +=golden_ratio
 
            h %= 1
 
            HSV_tuple = [h, 0.95, 0.95]
 
            RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
 
            yield map(lambda x:str(int(x*256)),RGB_tuple)           
 

	
 
    cgenerator = gen_color()
 
        
 
    def get_color_string(cs):
 
        if color_dict.has_key(cs):
 
            col = color_dict[cs]
 
        else:
 
            col = color_dict[cs] = cgenerator.next()
 
        return "color: rgb(%s)! important;" % (', '.join(col))
 
        
 
    def url_func(changeset):
 
        tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
 
        " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>" 
 
        
 
        tooltip_html = tooltip_html % (changeset.author,
 
                                               changeset.date,
 
                                               tooltip(changeset.message))
 
        lnk_format = 'r%-5s:%s' % (changeset.revision,
 
                                 changeset.raw_id)
 
        uri = link_to(
 
                lnk_format,
 
                url('changeset_home', repo_name=changeset.repository.name,
 
                    revision=changeset.raw_id),
 
                style=get_color_string(changeset.raw_id),
 
                class_='tooltip',
 
                tooltip_title=tooltip_html
 
              )
 
        
 
        uri += '\n'
 
        return uri   
 
    return literal(annotate_highlight(filenode, url_func, **kwargs))
 
      
 
def repo_name_slug(value):
 
    """
 
    Return slug of name of repository
 
    """
 
    slug = urlify(value)
 
    for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
 
        slug = slug.replace(c, '-')
 
    slug = recursive_replace(slug, '-')
 
    return slug
 

	
 
flash = _Flash()
 

	
 

	
 
#===============================================================================
 
# MERCURIAL FILTERS available via h.
 
#===============================================================================
 
from mercurial import util
 
from mercurial.templatefilters import age as _age, person as _person
 

	
 
age = lambda  x:_age(x)
 
capitalize = lambda x: x.capitalize()
 
date = lambda x: util.datestr(x)
 
email = util.email
 
email_or_none = lambda x: util.email(x) if util.email(x) != x else None
 
person = lambda x: _person(x)
 
hgdate = lambda  x: "%d %d" % x
 
isodate = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
 
isodatesec = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
 
localdate = lambda  x: (x[0], util.makedate()[1])
 
rfc822date = lambda  x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
 
rfc3339date = lambda  x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
 
time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
 

	
 

	
 
#===============================================================================
 
# PERMS
 
#===============================================================================
 
from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
 
HasRepoPermissionAny, HasRepoPermissionAll
 

	
 
#===============================================================================
 
# GRAVATAR URL
 
#===============================================================================
 
import hashlib
 
import urllib
 
from pylons import request
 

	
 
def gravatar_url(email_address, size=30):
 
    ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
 
    default = 'identicon'
 
    baseurl_nossl = "http://www.gravatar.com/avatar/"
 
    baseurl_ssl = "https://secure.gravatar.com/avatar/"
 
    baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
 
        
 
    
 
    # construct the url
 
    gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
 
    gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
 

	
 
    return gravatar_url
 

	
 
def safe_unicode(str):
 
    """safe unicode function. In case of UnicodeDecode error we try to return
 
    unicode with errors replace, if this failes we return unicode with 
 
    string_escape decoding """
 
    
 
    try:
 
        u_str = unicode(str)
 
    except UnicodeDecodeError:
 
        try:
 
            u_str = unicode(str, 'utf-8', 'replace')
 
        except UnicodeDecodeError:
 
            #incase we have a decode error just represent as byte string
 
            u_str = unicode(str(str).encode('string_escape'))
 
        
 
    return u_str
 
\ No newline at end of file
pylons_app/lib/indexers/daemon.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# whoosh indexer daemon for hg-app
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
#
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on Jan 26, 2010
 

	
 
@author: marcink
 
A deamon will read from task table and run tasks
 
"""
 
import sys
 
import os
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the pylons_app import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 
from pidlock import LockHeld, DaemonLock
 
import traceback
 
from pylons_app.config.environment import load_environment
 
from pylons_app.model.hg_model import HgModel
 
from pylons_app.lib.helpers import safe_unicode
 
from whoosh.index import create_in, open_dir
 
from shutil import rmtree
 
from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
 
SCHEMA, IDX_NAME
 

	
 
import logging
 
import logging.config
 
logging.config.fileConfig(jn(project_path, 'development.ini'))
 
log = logging.getLogger('whooshIndexer')
 

	
 
def scan_paths(root_location):
 
    return HgModel.repo_scan('/', root_location, None, True)
 

	
 
class WhooshIndexingDaemon(object):
 
    """Deamon for atomic jobs"""
 

	
 
    def __init__(self, indexname='HG_INDEX', repo_location=None):
 
        self.indexname = indexname
 
        self.repo_location = repo_location
 
    
 
    def get_paths(self, root_dir):
 
        """recursive walk in root dir and return a set of all path in that dir
 
        excluding files in .hg dir"""
 
        index_paths_ = set()
 
        for path, dirs, files in os.walk(root_dir):
 
            if path.find('.hg') == -1:
 
                for f in files:
 
                    index_paths_.add(jn(path, f))
 
    
 
        return index_paths_
 
    
 
    def add_doc(self, writer, path, repo):
 
        """Adding doc to writer"""
 
        
 
        ext = unicode(path.split('/')[-1].split('.')[-1].lower())
 
        #we just index the content of choosen files
 
        if ext in INDEX_EXTENSIONS:
 
            log.debug('    >> %s [WITH CONTENT]' % path)
 
            fobj = open(path, 'rb')
 
            content = fobj.read()
 
            fobj.close()
 
            try:
 
                u_content = unicode(content)
 
            except UnicodeDecodeError:
 
                #incase we have a decode error just represent as byte string
 
                u_content = unicode(str(content).encode('string_escape'))
 
            u_content = safe_unicode(content)
 
        else:
 
            log.debug('    >> %s' % path)
 
            #just index file name without it's content
 
            u_content = u''
 
        
 
        
 
        
 
        try:
 
            os.stat(path)
 
            writer.add_document(owner=unicode(repo.contact),
 
                            repository=u"%s" % repo.name,
 
                            path=u"%s" % path,
 
                            content=u_content,
 
                            modtime=os.path.getmtime(path),
 
                            extension=ext)             
 
        except OSError, e:
 
            import errno
 
            if e.errno == errno.ENOENT:
 
                log.debug('path %s does not exist or is a broken symlink' % path)
 
            else:
 
                raise e                 
 

	
 
    
 
    def build_index(self):
 
        if os.path.exists(IDX_LOCATION):
 
            log.debug('removing previos index')
 
            rmtree(IDX_LOCATION)
 
            
 
        if not os.path.exists(IDX_LOCATION):
 
            os.mkdir(IDX_LOCATION)
 
        
 
        idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
 
        writer = idx.writer()
 
        
 
        for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
 
            log.debug('building index @ %s' % repo.path)
 
        
 
            for idx_path in self.get_paths(repo.path):
 
                self.add_doc(writer, idx_path, repo)
 
        writer.commit(merge=True)
 
                
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 
            
 
    
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
 
            
 
        idx = open_dir(IDX_LOCATION, indexname=self.indexname)
 
        # The set of all paths in the index
 
        indexed_paths = set()
 
        # The set of all paths we need to re-index
 
        to_index = set()
 
        
 
        reader = idx.reader()
 
        writer = idx.writer()
 
    
 
        # Loop over the stored fields in the index
 
        for fields in reader.all_stored_fields():
 
            indexed_path = fields['path']
 
            indexed_paths.add(indexed_path)
 
    
 
            if not os.path.exists(indexed_path):
 
                # This file was deleted since it was indexed
 
                log.debug('removing from index %s' % indexed_path)
 
                writer.delete_by_term('path', indexed_path)
 
    
 
            else:
 
                # Check if this file was changed since it
 
                # was indexed
 
                indexed_time = fields['modtime']
 
                
 
                mtime = os.path.getmtime(indexed_path)
 
    
 
                if mtime > indexed_time:
 
    
 
                    # The file has changed, delete it and add it to the list of
 
                    # files to reindex
 
                    log.debug('adding to reindex list %s' % indexed_path)
 
                    writer.delete_by_term('path', indexed_path)
 
                    to_index.add(indexed_path)
 
                    #writer.commit()
 
    
 
        # Loop over the files in the filesystem
 
        # Assume we have a function that gathers the filenames of the
 
        # documents to be indexed
 
        for repo in scan_paths(self.repo_location).values():
 
            for path in self.get_paths(repo.path):
 
                if path in to_index or path not in indexed_paths:
 
                    # This is either a file that's changed, or a new file
 
                    # that wasn't indexed before. So index it!
 
                    self.add_doc(writer, path, repo)
 
                    log.debug('reindexing %s' % path)
 
    
 
        writer.commit(merge=True)
 
        #idx.optimize()
 
        log.debug('>>> FINISHED <<<')
 
        
 
    def run(self, full_index=False):
 
        """Run daemon"""
 
        if full_index:
 
            self.build_index()
 
        else:
 
            self.update_index()
 
        
 
if __name__ == "__main__":
 
    repo_location = '/home/marcink/hg_repos/*'
 
    full_index = True # False means looking just for changes
 
    try:
 
        l = DaemonLock()
 
        WhooshIndexingDaemon(repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
    except LockHeld:
 
        sys.exit(1)
 

	
0 comments (0 inline, 0 general)