Changeset - e5157e2a530e
[Not reviewed]
default
0 2 0
Marcin Kuzminski - 15 years ago 2010-09-01 23:38:03
marcin@python-works.com
added safe unicode funtion, and implemented it in whoosh indexer
2 files changed with 18 insertions and 5 deletions:
0 comments (0 inline, 0 general)
pylons_app/lib/helpers.py
Show inline comments
 
@@ -147,192 +147,208 @@ class _ToolTip(object):
 
                        var pos_x = YAHOO.util.Dom.getX(context);
 
                        var pos_y = YAHOO.util.Dom.getY(context);
 

	
 
                        var display_strategy = 'top';
 
                        var xy_pos = [0,0];
 
                        switch (display_strategy){
 
                        
 
                            case 'top':
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y-tt_h-4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'bottom':
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y+context_h+4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'left':
 
                                var cur_x = (pos_x-tt_w-4);
 
                                var cur_y = pos_y-((tt_h/2)-context_h/2);
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                            case 'right':
 
                                var cur_x = (pos_x+context_w+4);
 
                                var cur_y = pos_y-((tt_h/2)-context_h/2);
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;
 
                             default:
 
                                var cur_x = (pos_x+context_w/2)-(tt_w/2);
 
                                var cur_y = pos_y-tt_h-4;
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;                             
 
                                 
 
                        }
 

	
 
                        this.cfg.setProperty("xy",xy_pos);
 

	
 
                  });
 
                  
 
            //Mouse out 
 
            myToolTips.contextMouseOutEvent.subscribe(
 
                function(type, args) {
 
                    var context = args[0];
 
                    
 
                });
 
        });
 
        '''         
 
        return literal(js)
 

	
 
tooltip = _ToolTip()
 

	
 
class _FilesBreadCrumbs(object):
 
    
 
    def __call__(self, repo_name, rev, paths):
 
        url_l = [link_to(repo_name, url('files_home',
 
                                        repo_name=repo_name,
 
                                        revision=rev, f_path=''))]
 
        paths_l = paths.split('/')
 
        
 
        for cnt, p in enumerate(paths_l, 1):
 
            if p != '':
 
                url_l.append(link_to(p, url('files_home',
 
                                            repo_name=repo_name,
 
                                            revision=rev,
 
                                            f_path='/'.join(paths_l[:cnt]))))
 

	
 
        return literal(' / '.join(url_l))
 

	
 
files_breadcrumbs = _FilesBreadCrumbs()
 

	
 
def pygmentize(filenode, **kwargs):
 
    """
 
    pygmentize function using pygments
 
    @param filenode:
 
    """
 
    return literal(code_highlight(filenode.content, 
 
                                  filenode.lexer, HtmlFormatter(**kwargs)))
 

	
 
def pygmentize_annotation(filenode, **kwargs):
 
    """
 
    pygmentize function for annotation
 
    @param filenode:
 
    """
 
    
 
    color_dict = {}
 
    def gen_color():
 
        """generator for getting 10k of evenly distibuted colors using hsv color
 
        and golden ratio.
 
        """        
 
        import colorsys
 
        n = 10000
 
        golden_ratio = 0.618033988749895
 
        h = 0.22717784590367374
 
        #generate 10k nice web friendly colors in the same order
 
        for c in xrange(n):
 
            h +=golden_ratio
 
            h %= 1
 
            HSV_tuple = [h, 0.95, 0.95]
 
            RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
 
            yield map(lambda x:str(int(x*256)),RGB_tuple)           
 

	
 
    cgenerator = gen_color()
 
        
 
    def get_color_string(cs):
 
        if color_dict.has_key(cs):
 
            col = color_dict[cs]
 
        else:
 
            col = color_dict[cs] = cgenerator.next()
 
        return "color: rgb(%s)! important;" % (', '.join(col))
 
        
 
    def url_func(changeset):
 
        tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
 
        " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>" 
 
        
 
        tooltip_html = tooltip_html % (changeset.author,
 
                                               changeset.date,
 
                                               tooltip(changeset.message))
 
        lnk_format = 'r%-5s:%s' % (changeset.revision,
 
                                 changeset.raw_id)
 
        uri = link_to(
 
                lnk_format,
 
                url('changeset_home', repo_name=changeset.repository.name,
 
                    revision=changeset.raw_id),
 
                style=get_color_string(changeset.raw_id),
 
                class_='tooltip',
 
                tooltip_title=tooltip_html
 
              )
 
        
 
        uri += '\n'
 
        return uri   
 
    return literal(annotate_highlight(filenode, url_func, **kwargs))
 
      
 
def repo_name_slug(value):
 
    """
 
    Return slug of name of repository
 
    """
 
    slug = urlify(value)
 
    for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
 
        slug = slug.replace(c, '-')
 
    slug = recursive_replace(slug, '-')
 
    return slug
 

	
 
flash = _Flash()
 

	
 

	
 
#===============================================================================
 
# MERCURIAL FILTERS available via h.
 
#===============================================================================
 
from mercurial import util
 
from mercurial.templatefilters import age as _age, person as _person
 

	
 
age = lambda  x:_age(x)
 
capitalize = lambda x: x.capitalize()
 
date = lambda x: util.datestr(x)
 
email = util.email
 
email_or_none = lambda x: util.email(x) if util.email(x) != x else None
 
person = lambda x: _person(x)
 
hgdate = lambda  x: "%d %d" % x
 
isodate = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
 
isodatesec = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
 
localdate = lambda  x: (x[0], util.makedate()[1])
 
rfc822date = lambda  x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
 
rfc3339date = lambda  x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
 
time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
 

	
 

	
 
#===============================================================================
 
# PERMS
 
#===============================================================================
 
from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
 
HasRepoPermissionAny, HasRepoPermissionAll
 

	
 
#===============================================================================
 
# GRAVATAR URL
 
#===============================================================================
 
import hashlib
 
import urllib
 
from pylons import request
 

	
 
def gravatar_url(email_address, size=30):
 
    ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
 
    default = 'identicon'
 
    baseurl_nossl = "http://www.gravatar.com/avatar/"
 
    baseurl_ssl = "https://secure.gravatar.com/avatar/"
 
    baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
 
        
 
    
 
    # construct the url
 
    gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
 
    gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
 

	
 
    return gravatar_url
 

	
 
def safe_unicode(str):
 
    """safe unicode function. In case of UnicodeDecode error we try to return
 
    unicode with errors replace, if this failes we return unicode with 
 
    string_escape decoding """
 
    
 
    try:
 
        u_str = unicode(str)
 
    except UnicodeDecodeError:
 
        try:
 
            u_str = unicode(str, 'utf-8', 'replace')
 
        except UnicodeDecodeError:
 
            #incase we have a decode error just represent as byte string
 
            u_str = unicode(str(str).encode('string_escape'))
 
        
 
    return u_str
 
\ No newline at end of file
pylons_app/lib/indexers/daemon.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# whoosh indexer daemon for hg-app
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
#
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on Jan 26, 2010
 

	
 
@author: marcink
 
A deamon will read from task table and run tasks
 
"""
 
import sys
 
import os
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the pylons_app import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 
from pidlock import LockHeld, DaemonLock
 
import traceback
 
from pylons_app.config.environment import load_environment
 
from pylons_app.model.hg_model import HgModel
 
from pylons_app.lib.helpers import safe_unicode
 
from whoosh.index import create_in, open_dir
 
from shutil import rmtree
 
from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
 
SCHEMA, IDX_NAME
 

	
 
import logging
 
import logging.config
 
logging.config.fileConfig(jn(project_path, 'development.ini'))
 
log = logging.getLogger('whooshIndexer')
 

	
 
def scan_paths(root_location):
 
    return HgModel.repo_scan('/', root_location, None, True)
 

	
 
class WhooshIndexingDaemon(object):
 
    """Deamon for atomic jobs"""
 

	
 
    def __init__(self, indexname='HG_INDEX', repo_location=None):
 
        self.indexname = indexname
 
        self.repo_location = repo_location
 
    
 
    def get_paths(self, root_dir):
 
        """recursive walk in root dir and return a set of all path in that dir
 
        excluding files in .hg dir"""
 
        index_paths_ = set()
 
        for path, dirs, files in os.walk(root_dir):
 
            if path.find('.hg') == -1:
 
                for f in files:
 
                    index_paths_.add(jn(path, f))
 
    
 
        return index_paths_
 
    
 
    def add_doc(self, writer, path, repo):
 
        """Adding doc to writer"""
 
        
 
        ext = unicode(path.split('/')[-1].split('.')[-1].lower())
 
        #we just index the content of choosen files
 
        if ext in INDEX_EXTENSIONS:
 
            log.debug('    >> %s [WITH CONTENT]' % path)
 
            fobj = open(path, 'rb')
 
            content = fobj.read()
 
            fobj.close()
 
            try:
 
                u_content = unicode(content)
 
            except UnicodeDecodeError:
 
                #incase we have a decode error just represent as byte string
 
                u_content = unicode(str(content).encode('string_escape'))
 
            u_content = safe_unicode(content)
 
        else:
 
            log.debug('    >> %s' % path)
 
            #just index file name without it's content
 
            u_content = u''
 
        
 
        
 
        
 
        try:
 
            os.stat(path)
 
            writer.add_document(owner=unicode(repo.contact),
 
                            repository=u"%s" % repo.name,
 
                            path=u"%s" % path,
 
                            content=u_content,
 
                            modtime=os.path.getmtime(path),
 
                            extension=ext)             
 
        except OSError, e:
 
            import errno
 
            if e.errno == errno.ENOENT:
 
                log.debug('path %s does not exist or is a broken symlink' % path)
 
            else:
 
                raise e                 
 

	
 
    
 
    def build_index(self):
 
        if os.path.exists(IDX_LOCATION):
 
            log.debug('removing previos index')
 
            rmtree(IDX_LOCATION)
 
            
 
        if not os.path.exists(IDX_LOCATION):
 
            os.mkdir(IDX_LOCATION)
 
        
 
        idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
 
        writer = idx.writer()
 
        
 
        for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
 
            log.debug('building index @ %s' % repo.path)
 
        
 
            for idx_path in self.get_paths(repo.path):
 
                self.add_doc(writer, idx_path, repo)
 
        writer.commit(merge=True)
 
                
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 
            
 
    
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
 
            
 
        idx = open_dir(IDX_LOCATION, indexname=self.indexname)
 
        # The set of all paths in the index
 
        indexed_paths = set()
 
        # The set of all paths we need to re-index
 
        to_index = set()
 
        
 
        reader = idx.reader()
 
        writer = idx.writer()
 
    
 
        # Loop over the stored fields in the index
 
        for fields in reader.all_stored_fields():
 
            indexed_path = fields['path']
 
            indexed_paths.add(indexed_path)
 
    
 
            if not os.path.exists(indexed_path):
 
                # This file was deleted since it was indexed
 
                log.debug('removing from index %s' % indexed_path)
 
                writer.delete_by_term('path', indexed_path)
 
    
 
            else:
 
                # Check if this file was changed since it
 
                # was indexed
 
                indexed_time = fields['modtime']
 
                
 
                mtime = os.path.getmtime(indexed_path)
 
    
 
                if mtime > indexed_time:
 
    
 
                    # The file has changed, delete it and add it to the list of
 
                    # files to reindex
 
                    log.debug('adding to reindex list %s' % indexed_path)
 
                    writer.delete_by_term('path', indexed_path)
 
                    to_index.add(indexed_path)
 
                    #writer.commit()
 
    
 
        # Loop over the files in the filesystem
 
        # Assume we have a function that gathers the filenames of the
 
        # documents to be indexed
 
        for repo in scan_paths(self.repo_location).values():
 
            for path in self.get_paths(repo.path):
 
                if path in to_index or path not in indexed_paths:
 
                    # This is either a file that's changed, or a new file
 
                    # that wasn't indexed before. So index it!
 
                    self.add_doc(writer, path, repo)
 
                    log.debug('reindexing %s' % path)
 
    
 
        writer.commit(merge=True)
 
        #idx.optimize()
 
        log.debug('>>> FINISHED <<<')
 
        
 
    def run(self, full_index=False):
 
        """Run daemon"""
 
        if full_index:
 
            self.build_index()
 
        else:
 
            self.update_index()
 
        
 
if __name__ == "__main__":
 
    repo_location = '/home/marcink/hg_repos/*'
 
    full_index = True # False means looking just for changes
 
    try:
 
        l = DaemonLock()
 
        WhooshIndexingDaemon(repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
    except LockHeld:
 
        sys.exit(1)
 

	
0 comments (0 inline, 0 general)