Changeset - fb0c3af6031b
[Not reviewed]
celery
1 6 1
Marcin Kuzminski - 15 years ago 2010-09-23 01:08:33
marcin@python-works.com
Implemented locking for task, to prevent for running the same tasks,
moved out pidlock library. Added dirsize display
7 files changed with 71 insertions and 31 deletions:
0 comments (0 inline, 0 general)
pylons_app/lib/celerylib/__init__.py
Show inline comments
 
from pylons_app.lib.pidlock import DaemonLock, LockHeld
 
from vcs.utils.lazy import LazyProperty
 
from decorator import decorator
 
import logging
 
import os
 
import sys
 
import traceback
 

	
 
from hashlib import md5
 
log = logging.getLogger(__name__)
 

	
 
class ResultWrapper(object):
 
    def __init__(self, task):
 
        self.task = task
 
        
 
    @LazyProperty
 
    def result(self):
 
        return self.task
 

	
 
def run_task(task, *args, **kwargs):
 
    try:
 
        t = task.delay(*args, **kwargs)
 
        log.info('running task %s', t.task_id)
 
        return t
 
    except Exception, e:
 
        print e
 
        if e.errno == 111:
 
            log.debug('Unnable to connect. Sync execution')
 
        else:
 
            log.error(traceback.format_exc())
 
        #pure sync version
 
        return ResultWrapper(task(*args, **kwargs))
 

	
 

	
 
class LockTask(object):
 
    """LockTask decorator"""
 
    
 
    def __init__(self, func):
 
        self.func = func
 
        
 
    def __call__(self, func):
 
        return decorator(self.__wrapper, func)
 
    
 
    def __wrapper(self, func, *fargs, **fkwargs):
 
        params = []
 
        params.extend(fargs)
 
        params.extend(fkwargs.values())
 
        lockkey = 'task_%s' % \
 
           md5(str(self.func) + '-' + '-'.join(map(str, params))).hexdigest()
 
        log.info('running task with lockkey %s', lockkey)
 
        try:
 
            l = DaemonLock(lockkey)
 
            return func(*fargs, **fkwargs)
 
            l.release()
 
        except LockHeld:
 
            log.info('LockHeld')
 
            return 'Task with key %s already running' % lockkey   
 

	
 
            
 
            
 

	
 
        
 
        
 
    
 
    
 
    
 
  
pylons_app/lib/celerylib/tasks.py
Show inline comments
 
from celery.decorators import task
 
from celery.task.sets import subtask
 
from celeryconfig import PYLONS_CONFIG as config
 
from pylons.i18n.translation import _
 
from pylons_app.lib.celerylib import run_task
 
from pylons_app.lib.celerylib import run_task, LockTask
 
from pylons_app.lib.helpers import person
 
from pylons_app.lib.smtp_mailer import SmtpMailer
 
from pylons_app.lib.utils import OrderedDict
 
from operator import itemgetter
 
from vcs.backends.hg import MercurialRepository
 
from time import mktime
 
import traceback
 
import json
 

	
 
__all__ = ['whoosh_index', 'get_commits_stats',
 
           'reset_user_password', 'send_email']
 

	
 
def get_session():
 
    from sqlalchemy import engine_from_config
 
    from sqlalchemy.orm import sessionmaker, scoped_session
 
    engine = engine_from_config(dict(config.items('app:main')), 'sqlalchemy.db1.')
 
    sa = scoped_session(sessionmaker(bind=engine))
 
    return sa
 

	
 
def get_hg_settings():
 
    from pylons_app.model.db import HgAppSettings
 
    try:
 
        sa = get_session()
 
        ret = sa.query(HgAppSettings).all()
 
    finally:
 
        sa.remove()
 
        
 
    if not ret:
 
        raise Exception('Could not get application settings !')
 
    settings = {}
 
    for each in ret:
 
        settings['hg_app_' + each.app_settings_name] = each.app_settings_value    
 
    
 
    return settings
 

	
 
def get_hg_ui_settings():
 
    from pylons_app.model.db import HgAppUi
 
    try:
 
        sa = get_session()
 
        ret = sa.query(HgAppUi).all()
 
    finally:
 
        sa.remove()
 
        
 
    if not ret:
 
        raise Exception('Could not get application ui settings !')
 
    settings = {}
 
    for each in ret:
 
        k = each.ui_key
 
        v = each.ui_value
 
        if k == '/':
 
            k = 'root_path'
 
        
 
        if k.find('.') != -1:
 
            k = k.replace('.', '_')
 
        
 
        if each.ui_section == 'hooks':
 
            v = each.ui_active
 
        
 
        settings[each.ui_section + '_' + k] = v  
 
    
 
    return settings   
 

	
 
@task
 
def whoosh_index(repo_location, full_index):
 
    log = whoosh_index.get_logger()
 
    from pylons_app.lib.indexers import DaemonLock
 
    from pylons_app.lib.pidlock import DaemonLock
 
    from pylons_app.lib.indexers.daemon import WhooshIndexingDaemon, LockHeld
 
    try:
 
        l = DaemonLock()
 
        WhooshIndexingDaemon(repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
        return 'Done'
 
    except LockHeld:
 
        log.info('LockHeld')
 
        return 'LockHeld'    
 

	
 

	
 
@task
 
@LockTask('get_commits_stats')
 
def get_commits_stats(repo_name, ts_min_y, ts_max_y):
 
    author_key_cleaner = lambda k: person(k).replace('"', "") #for js data compatibilty
 
        
 
    from pylons_app.model.db import Statistics, Repository
 
    log = get_commits_stats.get_logger()
 
    commits_by_day_author_aggregate = {}
 
    commits_by_day_aggregate = {}
 
    repos_path = get_hg_ui_settings()['paths_root_path'].replace('*', '')
 
    repo = MercurialRepository(repos_path + repo_name)
 

	
 
    skip_date_limit = True
 
    parse_limit = 500 #limit for single task changeset parsing
 
    parse_limit = 350 #limit for single task changeset parsing
 
    last_rev = 0
 
    last_cs = None
 
    timegetter = itemgetter('time')
 
    
 
    sa = get_session()
 
    
 
    dbrepo = sa.query(Repository)\
 
        .filter(Repository.repo_name == repo_name).scalar()
 
    cur_stats = sa.query(Statistics)\
 
        .filter(Statistics.repository == dbrepo).scalar()
 
    if cur_stats:
 
        last_rev = cur_stats.stat_on_revision
 
    
 
    if last_rev == repo.revisions[-1]:
 
        #pass silently without any work
 
        return True
 
    
 
    if cur_stats:
 
        commits_by_day_aggregate = OrderedDict(
 
                                       json.loads(
 
                                        cur_stats.commit_activity_combined))
 
        commits_by_day_author_aggregate = json.loads(cur_stats.commit_activity)
 
    
 
    for cnt, rev in enumerate(repo.revisions[last_rev:]):
 
        last_cs = cs = repo.get_changeset(rev)
 
        k = '%s-%s-%s' % (cs.date.timetuple()[0], cs.date.timetuple()[1],
 
                          cs.date.timetuple()[2])
 
        timetupple = [int(x) for x in k.split('-')]
 
        timetupple.extend([0 for _ in xrange(6)])
 
        k = mktime(timetupple)
 
        if commits_by_day_author_aggregate.has_key(author_key_cleaner(cs.author)):
 
            try:
 
                l = [timegetter(x) for x in commits_by_day_author_aggregate\
 
                        [author_key_cleaner(cs.author)]['data']]
 
                time_pos = l.index(k)
 
            except ValueError:
 
                time_pos = False
 
                
 
            if time_pos >= 0 and time_pos is not False:
 
                
 
                datadict = commits_by_day_author_aggregate\
 
                    [author_key_cleaner(cs.author)]['data'][time_pos]
 
                
 
                datadict["commits"] += 1
 
                datadict["added"] += len(cs.added)
 
                datadict["changed"] += len(cs.changed)
 
                datadict["removed"] += len(cs.removed)
 
                #print datadict
 
                
 
            else:
 
                #print 'ELSE !!!!'
 
                if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 
                    
 
                    datadict = {"time":k,
 
                                "commits":1,
 
                                "added":len(cs.added),
 
                                "changed":len(cs.changed),
 
                                "removed":len(cs.removed),
 
                               }
 
                    commits_by_day_author_aggregate\
 
                        [author_key_cleaner(cs.author)]['data'].append(datadict)
 
                                        
 
        else:
 
            #print k, 'nokey ADDING'
 
            if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 
                commits_by_day_author_aggregate[author_key_cleaner(cs.author)] = {
 
                                    "label":author_key_cleaner(cs.author),
 
                                    "data":[{"time":k,
 
                                             "commits":1,
 
                                             "added":len(cs.added),
 
                                             "changed":len(cs.changed),
 
                                             "removed":len(cs.removed),
 
                                             }],
 
                                    "schema":["commits"],
 
                                    }               
 
    
 
#        #gather all data by day
 
        if commits_by_day_aggregate.has_key(k):
 
            commits_by_day_aggregate[k] += 1
 
        else:
 
            commits_by_day_aggregate[k] = 1
 
        
 
        if cnt >= parse_limit:
 
            #don't fetch to much data since we can freeze application
 
            break
 

	
 
    overview_data = []
 
    for k, v in commits_by_day_aggregate.items():
 
        overview_data.append([k, v])
 
    overview_data = sorted(overview_data, key=itemgetter(0))
 
        
 
    if not commits_by_day_author_aggregate:
 
        commits_by_day_author_aggregate[author_key_cleaner(repo.contact)] = {
 
            "label":author_key_cleaner(repo.contact),
 
            "data":[0, 1],
 
            "schema":["commits"],
 
        }
 

	
 
    stats = cur_stats if cur_stats else Statistics()
 
    stats.commit_activity = json.dumps(commits_by_day_author_aggregate)
 
    stats.commit_activity_combined = json.dumps(overview_data)
 
    stats.repository = dbrepo
 
    stats.stat_on_revision = last_cs.revision
 
    stats.languages = json.dumps({'_TOTAL_':0, '':0})
 
    
 
    try:
 
        sa.add(stats)
 
        sa.commit()    
 
    except:
 
        log.error(traceback.format_exc())
 
        sa.rollback()
 
        return False
 
                        
 
    
 
    run_task(get_commits_stats, repo_name, ts_min_y, ts_max_y)
 
                            
 
    return True
 

	
 
@task
 
def reset_user_password(user_email):
 
    log = reset_user_password.get_logger()
 
    from pylons_app.lib import auth
 
    from pylons_app.model.db import User
 
    
 
    try:
 
        try:
 
            sa = get_session()
 
            user = sa.query(User).filter(User.email == user_email).scalar()
 
            new_passwd = auth.PasswordGenerator().gen_password(8,
 
                             auth.PasswordGenerator.ALPHABETS_BIG_SMALL)
 
            if user:
 
                user.password = auth.get_crypt_password(new_passwd)
 
                sa.add(user)
 
                sa.commit()
 
                log.info('change password for %s', user_email)
 
            if new_passwd is None:
 
                raise Exception('unable to generate new password')
 
            
 
        except:
 
            log.error(traceback.format_exc())
 
            sa.rollback()
 
        
 
        run_task(send_email, user_email,
 
                 "Your new hg-app password",
 
                 'Your new hg-app password:%s' % (new_passwd))
 
        log.info('send new password mail to %s', user_email)
 
        
 
        
 
    except:
 
        log.error('Failed to update user password')
 
        log.error(traceback.format_exc())
 
    return True
 

	
 
@task    
 
def send_email(recipients, subject, body):
 
    log = send_email.get_logger()
 
    email_config = dict(config.items('DEFAULT')) 
 
    mail_from = email_config.get('app_email_from')
 
    user = email_config.get('smtp_username')
 
    passwd = email_config.get('smtp_password')
 
    mail_server = email_config.get('smtp_server')
 
    mail_port = email_config.get('smtp_port')
 
    tls = email_config.get('smtp_use_tls')
 
    ssl = False
 
    
 
    try:
 
        m = SmtpMailer(mail_from, user, passwd, mail_server,
 
                       mail_port, ssl, tls)
 
        m.send(recipients, subject, body)  
 
    except:
 
        log.error('Mail sending failed')
 
        log.error(traceback.format_exc())
 
        return False
 
    return True
pylons_app/lib/indexers/__init__.py
Show inline comments
 
from os.path import dirname as dn, join as jn
 
from pidlock import LockHeld, DaemonLock
 
from pylons_app.config.environment import load_environment
 
from pylons_app.model.hg_model import HgModel
 
from shutil import rmtree
 
from webhelpers.html.builder import escape
 
from vcs.utils.lazy import LazyProperty
 

	
 
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 
from whoosh.index import create_in, open_dir
 
from whoosh.formats import Characters
 
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter   
 

	
 
import os
 
import sys
 
import traceback
 

	
 
#to get the pylons_app import
 
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
 

	
 

	
 
#LOCATION WE KEEP THE INDEX
 
IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
 

	
 
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
 
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
 
                    'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
 
                    'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
 
                    'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
 
                    'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
 
                    'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
 
                    'yaws']
 

	
 
#CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 

	
 

	
 
#INDEX SCHEMA DEFINITION
 
SCHEMA = Schema(owner=TEXT(),
 
                repository=TEXT(stored=True),
 
                path=ID(stored=True, unique=True),
 
                content=FieldType(format=Characters(ANALYZER),
 
                             scorable=True, stored=True),
 
                modtime=STORED(), extension=TEXT(stored=True))
 

	
 

	
 
IDX_NAME = 'HG_INDEX'
 
FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') 
 
FRAGMENTER = SimpleFragmenter(200)
 
                            
 
class ResultWrapper(object):
 
    def __init__(self, searcher, matcher, highlight_items):
 
        self.searcher = searcher
 
        self.matcher = matcher
 
        self.highlight_items = highlight_items
 
        self.fragment_size = 200 / 2
 
    
 
    @LazyProperty
 
    def doc_ids(self):
 
        docs_id = []
 
        while self.matcher.is_active():
 
            docnum = self.matcher.id()
 
            chunks = [offsets for offsets in self.get_chunks()]
 
            docs_id.append([docnum, chunks])
 
            self.matcher.next()
 
        return docs_id   
 
        
 
    def __str__(self):
 
        return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
 

	
 
    def __repr__(self):
 
        return self.__str__()
 

	
 
    def __len__(self):
 
        return len(self.doc_ids)
 

	
 
    def __iter__(self):
 
        """
 
        Allows Iteration over results,and lazy generate content
 

	
 
        *Requires* implementation of ``__getitem__`` method.
 
        """
 
        for docid in self.doc_ids:
 
            yield self.get_full_content(docid)
 

	
 
    def __getslice__(self, i, j):
 
        """
 
        Slicing of resultWrapper
 
        """
 
        slice = []
 
        for docid in self.doc_ids[i:j]:
 
            slice.append(self.get_full_content(docid))
 
        return slice   
 
                            
 

	
 
    def get_full_content(self, docid):
 
        res = self.searcher.stored_fields(docid[0])
 
        f_path = res['path'][res['path'].find(res['repository']) \
 
                             + len(res['repository']):].lstrip('/')
 
        
 
        content_short = self.get_short_content(res, docid[1])
 
        res.update({'content_short':content_short,
 
                    'content_short_hl':self.highlight(content_short),
 
                    'f_path':f_path})
 
        
 
        return res        
 
    
 
    def get_short_content(self, res, chunks):
 
        
 
        return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
 
    
 
    def get_chunks(self):
 
        """
 
        Smart function that implements chunking the content
 
        but not overlap chunks so it doesn't highlight the same
 
        close occurences twice.
 
        @param matcher:
 
        @param size:
 
        """
 
        memory = [(0, 0)]
 
        for span in self.matcher.spans():
 
            start = span.startchar or 0
 
            end = span.endchar or 0
 
            start_offseted = max(0, start - self.fragment_size)
 
            end_offseted = end + self.fragment_size
 
            
 
            if start_offseted < memory[-1][1]:
 
                start_offseted = memory[-1][1]
 
            memory.append((start_offseted, end_offseted,))    
 
            yield (start_offseted, end_offseted,)  
 
        
 
    def highlight(self, content, top=5):
 
        hl = highlight(escape(content),
 
                 self.highlight_items,
 
                 analyzer=ANALYZER,
 
                 fragmenter=FRAGMENTER,
 
                 formatter=FORMATTER,
 
                 top=top)
 
        return hl 
pylons_app/lib/indexers/daemon.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# whoosh indexer daemon for hg-app
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
#
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on Jan 26, 2010
 

	
 
@author: marcink
 
A deamon will read from task table and run tasks
 
"""
 
import sys
 
import os
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the pylons_app import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 
from pidlock import LockHeld, DaemonLock
 
from pylons_app.lib.pidlock import LockHeld, DaemonLock
 
from pylons_app.model.hg_model import HgModel
 
from pylons_app.lib.helpers import safe_unicode
 
from whoosh.index import create_in, open_dir
 
from shutil import rmtree
 
from pylons_app.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
 

	
 
import logging
 

	
 
log = logging.getLogger('whooshIndexer')
 
# create logger
 
log.setLevel(logging.DEBUG)
 
log.propagate = False
 
# create console handler and set level to debug
 
ch = logging.StreamHandler()
 
ch.setLevel(logging.DEBUG)
 

	
 
# create formatter
 
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 

	
 
# add formatter to ch
 
ch.setFormatter(formatter)
 

	
 
# add ch to logger
 
log.addHandler(ch)
 

	
 
def scan_paths(root_location):
 
    return HgModel.repo_scan('/', root_location, None, True)
 

	
 
class WhooshIndexingDaemon(object):
 
    """Deamon for atomic jobs"""
 

	
 
    def __init__(self, indexname='HG_INDEX', repo_location=None):
 
        self.indexname = indexname
 
        self.repo_location = repo_location
 
        self.initial = False
 
        if not os.path.isdir(IDX_LOCATION):
 
            os.mkdir(IDX_LOCATION)
 
            log.info('Cannot run incremental index since it does not'
 
                     ' yet exist running full build')
 
            self.initial = True
 
    
 
    def get_paths(self, root_dir):
 
        """recursive walk in root dir and return a set of all path in that dir
 
        excluding files in .hg dir"""
 
        index_paths_ = set()
 
        for path, dirs, files in os.walk(root_dir):
 
            if path.find('.hg') == -1:
 
                for f in files:
 
                    index_paths_.add(jn(path, f))
 
    
 
        return index_paths_
 
    
 
    def add_doc(self, writer, path, repo):
 
        """Adding doc to writer"""
 
        
 
        ext = unicode(path.split('/')[-1].split('.')[-1].lower())
 
        #we just index the content of choosen files
 
        if ext in INDEX_EXTENSIONS:
 
            log.debug('    >> %s [WITH CONTENT]' % path)
 
            fobj = open(path, 'rb')
 
            content = fobj.read()
 
            fobj.close()
 
            u_content = safe_unicode(content)
 
        else:
 
            log.debug('    >> %s' % path)
 
            #just index file name without it's content
 
            u_content = u''
 
        
 
        
 
        
 
        try:
 
            os.stat(path)
 
            writer.add_document(owner=unicode(repo.contact),
 
                            repository=u"%s" % repo.name,
 
                            path=u"%s" % path,
 
                            content=u_content,
 
                            modtime=os.path.getmtime(path),
 
                            extension=ext)             
 
        except OSError, e:
 
            import errno
 
            if e.errno == errno.ENOENT:
 
                log.debug('path %s does not exist or is a broken symlink' % path)
 
            else:
 
                raise e                 
 

	
 
    
 
    def build_index(self):
 
        if os.path.exists(IDX_LOCATION):
 
            log.debug('removing previos index')
 
            rmtree(IDX_LOCATION)
 
            
 
        if not os.path.exists(IDX_LOCATION):
 
            os.mkdir(IDX_LOCATION)
 
        
 
        idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
 
        writer = idx.writer()
 
        
 
        for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
 
            log.debug('building index @ %s' % repo.path)
 
        
 
            for idx_path in self.get_paths(repo.path):
 
                self.add_doc(writer, idx_path, repo)
 
        writer.commit(merge=True)
 
                
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 
            
 
    
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
 
            
 
        idx = open_dir(IDX_LOCATION, indexname=self.indexname)
 
        # The set of all paths in the index
 
        indexed_paths = set()
 
        # The set of all paths we need to re-index
 
        to_index = set()
 
        
 
        reader = idx.reader()
 
        writer = idx.writer()
 
    
 
        # Loop over the stored fields in the index
 
        for fields in reader.all_stored_fields():
 
            indexed_path = fields['path']
 
            indexed_paths.add(indexed_path)
 
    
 
            if not os.path.exists(indexed_path):
 
                # This file was deleted since it was indexed
 
                log.debug('removing from index %s' % indexed_path)
 
                writer.delete_by_term('path', indexed_path)
 
    
 
            else:
 
                # Check if this file was changed since it
 
                # was indexed
 
                indexed_time = fields['modtime']
 
                
 
                mtime = os.path.getmtime(indexed_path)
 
    
 
                if mtime > indexed_time:
 
    
 
                    # The file has changed, delete it and add it to the list of
 
                    # files to reindex
 
                    log.debug('adding to reindex list %s' % indexed_path)
 
                    writer.delete_by_term('path', indexed_path)
 
                    to_index.add(indexed_path)
 
                    #writer.commit()
 
    
 
        # Loop over the files in the filesystem
 
        # Assume we have a function that gathers the filenames of the
 
        # documents to be indexed
 
        for repo in scan_paths(self.repo_location).values():
 
            for path in self.get_paths(repo.path):
 
                if path in to_index or path not in indexed_paths:
 
                    # This is either a file that's changed, or a new file
 
                    # that wasn't indexed before. So index it!
 
                    self.add_doc(writer, path, repo)
 
                    log.debug('reindexing %s' % path)
 
    
 
        writer.commit(merge=True)
 
        #idx.optimize()
 
        log.debug('>>> FINISHED <<<')
 
        
 
    def run(self, full_index=False):
 
        """Run daemon"""
 
        if full_index or self.initial:
 
            self.build_index()
 
        else:
 
            self.update_index()
 
        
 
if __name__ == "__main__":
 
    arg = sys.argv[1:]
 
    if len(arg) != 2:
 
        sys.stderr.write('Please specify indexing type [full|incremental]' 
 
                         'and path to repositories as script args \n')
 
        sys.exit()
 
    
 
    
 
    if arg[0] == 'full':
 
        full_index = True
 
    elif arg[0] == 'incremental':
 
        # False means looking just for changes
 
        full_index = False
 
    else:
 
        sys.stdout.write('Please use [full|incremental]' 
 
                         ' as script first arg \n')
 
        sys.exit()
 
    
 
    if not os.path.isdir(arg[1]):
 
        sys.stderr.write('%s is not a valid path \n' % arg[1])
 
        sys.exit()
 
    else:
 
        if arg[1].endswith('/'):
 
            repo_location = arg[1] + '*'
 
        else:
 
            repo_location = arg[1] + '/*'
 
    
 
    try:
 
        l = DaemonLock()
 
        WhooshIndexingDaemon(repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
        reload(logging)
 
    except LockHeld:
 
        sys.exit(1)
 

	
pylons_app/lib/pidlock.py
Show inline comments
 
file renamed from pylons_app/lib/indexers/pidlock.py to pylons_app/lib/pidlock.py
 
import os, time
 
import sys
 
from warnings import warn
 

	
 
class LockHeld(Exception):pass
 

	
 

	
 
class DaemonLock(object):
 
    '''daemon locking
 
    """daemon locking
 
    USAGE:
 
    try:
 
        l = lock()
 
        main()
 
        l.release()
 
    except LockHeld:
 
        sys.exit(1)
 
    '''
 
    """
 

	
 
    def __init__(self, file=None, callbackfn=None,
 
                 desc='daemon lock', debug=False):
 

	
 
        self.pidfile = file if file else os.path.join(os.path.dirname(__file__),
 
                                                      'running.lock')
 
        self.callbackfn = callbackfn
 
        self.desc = desc
 
        self.debug = debug
 
        self.held = False
 
        #run the lock automatically !
 
        self.lock()
 

	
 
    def __del__(self):
 
        if self.held:
 

	
 
#            warn("use lock.release instead of del lock",
 
#                    category = DeprecationWarning,
 
#                    stacklevel = 2)
 

	
 
            # ensure the lock will be removed
 
            self.release()
 

	
 

	
 
    def lock(self):
 
        '''
 
        """
 
        locking function, if lock is present it will raise LockHeld exception
 
        '''
 
        """
 
        lockname = '%s' % (os.getpid())
 

	
 
        self.trylock()
 
        self.makelock(lockname, self.pidfile)
 
        return True
 

	
 
    def trylock(self):
 
        running_pid = False
 
        try:
 
            pidfile = open(self.pidfile, "r")
 
            pidfile.seek(0)
 
            running_pid = pidfile.readline()
 
            if self.debug:
 
                print 'lock file present running_pid: %s, checking for execution'\
 
                % running_pid
 
            # Now we check the PID from lock file matches to the current
 
            # process PID
 
            if running_pid:
 
                if os.path.exists("/proc/%s" % running_pid):
 
                        print "You already have an instance of the program running"
 
                        print "It is running as process %s" % running_pid
 
                        raise LockHeld
 
                else:
 
                        print "Lock File is there but the program is not running"
 
                        print "Removing lock file for the: %s" % running_pid
 
                        self.release()
 
        except IOError, e:
 
            if e.errno != 2:
 
                raise
 

	
 

	
 
    def release(self):
 
        '''
 
        """
 
        releases the pid by removing the pidfile
 
        '''
 
        """
 
        if self.callbackfn:
 
            #execute callback function on release
 
            if self.debug:
 
                print 'executing callback function %s' % self.callbackfn
 
            self.callbackfn()
 
        try:
 
            if self.debug:
 
                print 'removing pidfile %s' % self.pidfile
 
            os.remove(self.pidfile)
 
            self.held = False
 
        except OSError, e:
 
            if self.debug:
 
                print 'removing pidfile failed %s' % e
 
            pass
 

	
 
    def makelock(self, lockname, pidfile):
 
        '''
 
        """
 
        this function will make an actual lock
 
        @param lockname: acctual pid of file
 
        @param pidfile: the file to write the pid in
 
        '''
 
        """
 
        if self.debug:
 
            print 'creating a file %s and pid: %s' % (pidfile, lockname)
 
        pidfile = open(self.pidfile, "wb")
 
        pidfile.write(lockname)
 
        pidfile.close
 
        self.held = True
 

	
 

	
 
def main():
 
    print 'func is running'
 
    cnt = 20
 
    while 1:
 
        print cnt
 
        if cnt == 0:
 
            break
 
        time.sleep(1)
 
        cnt -= 1
 

	
 

	
 
if __name__ == "__main__":
 
    try:
 
        l = DaemonLock(desc='test lock')
 
        main()
 
        l.release()
 
    except LockHeld:
 
        sys.exit(1)
pylons_app/lib/utils.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# Utilities for hg app
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 

	
 
"""
 
Created on April 18, 2010
 
Utilities for hg app
 
@author: marcink
 
"""
 
from beaker.cache import cache_region
 
from mercurial import ui, config, hg
 
from mercurial.error import RepoError
 
from pylons_app.model import meta
 
from pylons_app.model.db import Repository, User, HgAppUi, HgAppSettings
 
from vcs.backends.base import BaseChangeset
 
from vcs.utils.lazy import LazyProperty
 
import logging
 
import os
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def get_repo_slug(request):
 
    return request.environ['pylons.routes_dict'].get('repo_name')
 

	
 
def is_mercurial(environ):
 
    """
 
    Returns True if request's target is mercurial server - header
 
    ``HTTP_ACCEPT`` of such request would start with ``application/mercurial``.
 
    """
 
    http_accept = environ.get('HTTP_ACCEPT')
 
    if http_accept and http_accept.startswith('application/mercurial'):
 
        return True
 
    return False
 

	
 
def check_repo_dir(paths):
 
    repos_path = paths[0][1].split('/')
 
    if repos_path[-1] in ['*', '**']:
 
        repos_path = repos_path[:-1]
 
    if repos_path[0] != '/':
 
        repos_path[0] = '/'
 
    if not os.path.isdir(os.path.join(*repos_path)):
 
        raise Exception('Not a valid repository in %s' % paths[0][1])
 

	
 
def check_repo_fast(repo_name, base_path):
 
    if os.path.isdir(os.path.join(base_path, repo_name)):return False
 
    return True
 

	
 
def check_repo(repo_name, base_path, verify=True):
 

	
 
    repo_path = os.path.join(base_path, repo_name)
 

	
 
    try:
 
        if not check_repo_fast(repo_name, base_path):
 
            return False
 
        r = hg.repository(ui.ui(), repo_path)
 
        if verify:
 
            hg.verify(r)
 
        #here we hnow that repo exists it was verified
 
        log.info('%s repo is already created', repo_name)
 
        return False
 
    except RepoError:
 
        #it means that there is no valid repo there...
 
        log.info('%s repo is free for creation', repo_name)
 
        return True
 

	
 
def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
 
    while True:
 
        ok = raw_input(prompt)
 
        if ok in ('y', 'ye', 'yes'): return True
 
        if ok in ('n', 'no', 'nop', 'nope'): return False
 
        retries = retries - 1
 
        if retries < 0: raise IOError
 
        print complaint
 
        
 
@cache_region('super_short_term', 'cached_hg_ui')
 
def get_hg_ui_cached():
 
    try:
 
        sa = meta.Session
 
        ret = sa.query(HgAppUi).all()
 
    finally:
 
        meta.Session.remove()
 
    return ret
 

	
 

	
 
def get_hg_settings():
 
    try:
 
        sa = meta.Session
 
        ret = sa.query(HgAppSettings).all()
 
    finally:
 
        meta.Session.remove()
 
        
 
    if not ret:
 
        raise Exception('Could not get application settings !')
 
    settings = {}
 
    for each in ret:
 
        settings['hg_app_' + each.app_settings_name] = each.app_settings_value    
 
    
 
    return settings
 

	
 
def get_hg_ui_settings():
 
    try:
 
        sa = meta.Session
 
        ret = sa.query(HgAppUi).all()
 
    finally:
 
        meta.Session.remove()
 
        
 
    if not ret:
 
        raise Exception('Could not get application ui settings !')
 
    settings = {}
 
    for each in ret:
 
        k = each.ui_key
 
        v = each.ui_value
 
        if k == '/':
 
            k = 'root_path'
 
        
 
        if k.find('.') != -1:
 
            k = k.replace('.', '_')
 
        
 
        if each.ui_section == 'hooks':
 
            v = each.ui_active
 
        
 
        settings[each.ui_section + '_' + k] = v  
 
    
 
    return settings
 

	
 
#propagated from mercurial documentation
 
ui_sections = ['alias', 'auth',
 
                'decode/encode', 'defaults',
 
                'diff', 'email',
 
                'extensions', 'format',
 
                'merge-patterns', 'merge-tools',
 
                'hooks', 'http_proxy',
 
                'smtp', 'patch',
 
                'paths', 'profiling',
 
                'server', 'trusted',
 
                'ui', 'web', ]
 
        
 
def make_ui(read_from='file', path=None, checkpaths=True):        
 
    """
 
    A function that will read python rc files or database
 
    and make an mercurial ui object from read options
 
    
 
    @param path: path to mercurial config file
 
    @param checkpaths: check the path
 
    @param read_from: read from 'file' or 'db'
 
    """
 

	
 
    baseui = ui.ui()
 

	
 
    if read_from == 'file':
 
        if not os.path.isfile(path):
 
            log.warning('Unable to read config file %s' % path)
 
            return False
 
        log.debug('reading hgrc from %s', path)
 
        cfg = config.config()
 
        cfg.read(path)
 
        for section in ui_sections:
 
            for k, v in cfg.items(section):
 
                baseui.setconfig(section, k, v)
 
                log.debug('settings ui from file[%s]%s:%s', section, k, v)
 
        if checkpaths:check_repo_dir(cfg.items('paths'))                
 
              
 
        
 
    elif read_from == 'db':
 
        hg_ui = get_hg_ui_cached()
 
        for ui_ in hg_ui:
 
            if ui_.ui_active:
 
                log.debug('settings ui from db[%s]%s:%s', ui_.ui_section, ui_.ui_key, ui_.ui_value)
 
                baseui.setconfig(ui_.ui_section, ui_.ui_key, ui_.ui_value)
 
        
 
    
 
    return baseui
 

	
 

	
 
def set_hg_app_config(config):
 
    hgsettings = get_hg_settings()
 
    
 
    for k, v in hgsettings.items():
 
        config[k] = v
 

	
 
def invalidate_cache(name, *args):
 
    """Invalidates given name cache"""
 
    
 
    from beaker.cache import region_invalidate
 
    log.info('INVALIDATING CACHE FOR %s', name)
 
    
 
    """propagate our arguments to make sure invalidation works. First
 
    argument has to be the name of cached func name give to cache decorator
 
    without that the invalidation would not work"""
 
    tmp = [name]
 
    tmp.extend(args)
 
    args = tuple(tmp)
 
    
 
    if name == 'cached_repo_list':
 
        from pylons_app.model.hg_model import _get_repos_cached
 
        region_invalidate(_get_repos_cached, None, *args)
 
        
 
    if name == 'full_changelog':
 
        from pylons_app.model.hg_model import _full_changelog_cached
 
        region_invalidate(_full_changelog_cached, None, *args)
 
        
 
class EmptyChangeset(BaseChangeset):
 
    
 
    revision = -1
 
    message = ''
 
    author = ''
 
    
 
    @LazyProperty
 
    def raw_id(self):
 
        """
 
        Returns raw string identifing this changeset, useful for web
 
        representation.
 
        """
 
        return '0' * 12
 

	
 

	
 
def repo2db_mapper(initial_repo_list, remove_obsolete=False):
 
    """
 
    maps all found repositories into db
 
    """
 
    from pylons_app.model.repo_model import RepoModel
 
    
 
    sa = meta.Session
 
    user = sa.query(User).filter(User.admin == True).first()
 
    
 
    rm = RepoModel()
 
    
 
    for name, repo in initial_repo_list.items():
 
        if not sa.query(Repository).filter(Repository.repo_name == name).scalar():
 
            log.info('repository %s not found creating default', name)
 
                
 
            form_data = {
 
                         'repo_name':name,
 
                         'description':repo.description if repo.description != 'unknown' else \
 
                                        'auto description for %s' % name,
 
                         'private':False
 
                         }
 
            rm.create(form_data, user, just_db=True)
 

	
 

	
 
    if remove_obsolete:
 
        #remove from database those repositories that are not in the filesystem
 
        for repo in sa.query(Repository).all():
 
            if repo.repo_name not in initial_repo_list.keys():
 
                sa.delete(repo)
 
                sa.commit()
 

	
 
    
 
    meta.Session.remove()
 

	
 
from UserDict import DictMixin
 

	
 
class OrderedDict(dict, DictMixin):
 

	
 
    def __init__(self, *args, **kwds):
 
        if len(args) > 1:
 
            raise TypeError('expected at most 1 arguments, got %d' % len(args))
 
        try:
 
            self.__end
 
        except AttributeError:
 
            self.clear()
 
        self.update(*args, **kwds)
 

	
 
    def clear(self):
 
        self.__end = end = []
 
        end += [None, end, end]         # sentinel node for doubly linked list
 
        self.__map = {}                 # key --> [key, prev, next]
 
        dict.clear(self)
 

	
 
    def __setitem__(self, key, value):
 
        if key not in self:
 
            end = self.__end
 
            curr = end[1]
 
            curr[2] = end[1] = self.__map[key] = [key, curr, end]
 
        dict.__setitem__(self, key, value)
 

	
 
    def __delitem__(self, key):
 
        dict.__delitem__(self, key)
 
        key, prev, next = self.__map.pop(key)
 
        prev[2] = next
 
        next[1] = prev
 

	
 
    def __iter__(self):
 
        end = self.__end
 
        curr = end[2]
 
        while curr is not end:
 
            yield curr[0]
 
            curr = curr[2]
 

	
 
    def __reversed__(self):
 
        end = self.__end
 
        curr = end[1]
 
        while curr is not end:
 
            yield curr[0]
 
            curr = curr[1]
 

	
 
    def popitem(self, last=True):
 
        if not self:
 
            raise KeyError('dictionary is empty')
 
        if last:
 
            key = reversed(self).next()
 
        else:
 
            key = iter(self).next()
 
        value = self.pop(key)
 
        return key, value
 

	
 
    def __reduce__(self):
 
        items = [[k, self[k]] for k in self]
 
        tmp = self.__map, self.__end
 
        del self.__map, self.__end
 
        inst_dict = vars(self).copy()
 
        self.__map, self.__end = tmp
 
        if inst_dict:
 
            return (self.__class__, (items,), inst_dict)
 
        return self.__class__, (items,)
 

	
 
    def keys(self):
 
        return list(self)
 

	
 
    setdefault = DictMixin.setdefault
 
    update = DictMixin.update
 
    pop = DictMixin.pop
 
    values = DictMixin.values
 
    items = DictMixin.items
 
    iterkeys = DictMixin.iterkeys
 
    itervalues = DictMixin.itervalues
 
    iteritems = DictMixin.iteritems
 

	
 
    def __repr__(self):
 
        if not self:
 
            return '%s()' % (self.__class__.__name__,)
 
        return '%s(%r)' % (self.__class__.__name__, self.items())
 

	
 
    def copy(self):
 
        return self.__class__(self)
 

	
 
    @classmethod
 
    def fromkeys(cls, iterable, value=None):
 
        d = cls()
 
        for key in iterable:
 
            d[key] = value
 
        return d
 

	
 
    def __eq__(self, other):
 
        if isinstance(other, OrderedDict):
 
            return len(self) == len(other) and self.items() == other.items()
 
        return dict.__eq__(self, other)
 

	
 
    def __ne__(self, other):
 
        return not self == other
 

	
 

	
 
#===============================================================================
 
# TEST FUNCTIONS
 
#===============================================================================
 
def create_test_index(repo_location, full_index):
 
    """Makes default test index
 
    @param repo_location:
 
    @param full_index:
 
    """
 
    from pylons_app.lib.indexers import daemon
 
    from pylons_app.lib.indexers.daemon import WhooshIndexingDaemon
 
    from pylons_app.lib.indexers.pidlock import DaemonLock, LockHeld
 
    from pylons_app.lib.pidlock import DaemonLock, LockHeld
 
    from pylons_app.lib.indexers import IDX_LOCATION
 
    import shutil
 
    
 
    if os.path.exists(IDX_LOCATION):
 
        shutil.rmtree(IDX_LOCATION)
 
         
 
    try:
 
        l = DaemonLock()
 
        WhooshIndexingDaemon(repo_location=repo_location)\
 
            .run(full_index=full_index)
 
        l.release()
 
    except LockHeld:
 
        pass    
 
    
 
def create_test_env(repos_test_path, config):
 
    """Makes a fresh database and 
 
    install test repository into tmp dir
 
    """
 
    from pylons_app.lib.db_manage import DbManage
 
    import tarfile
 
    import shutil
 
    from os.path import dirname as dn, join as jn, abspath
 
    
 
    log = logging.getLogger('TestEnvCreator')
 
    # create logger
 
    log.setLevel(logging.DEBUG)
 
    log.propagate = True
 
    # create console handler and set level to debug
 
    ch = logging.StreamHandler()
 
    ch.setLevel(logging.DEBUG)
 
    
 
    # create formatter
 
    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 
    
 
    # add formatter to ch
 
    ch.setFormatter(formatter)
 
    
 
    # add ch to logger
 
    log.addHandler(ch)
 
    
 
    #PART ONE create db
 
    log.debug('making test db')
 
    dbname = config['sqlalchemy.db1.url'].split('/')[-1]
 
    dbmanage = DbManage(log_sql=True, dbname=dbname, tests=True)
 
    dbmanage.create_tables(override=True)
 
    dbmanage.config_prompt(repos_test_path)
 
    dbmanage.create_default_user()
 
    dbmanage.admin_prompt()
 
    dbmanage.create_permissions()
 
    dbmanage.populate_default_permissions()
 
    
 
    #PART TWO make test repo
 
    log.debug('making test vcs repo')
 
    if os.path.isdir('/tmp/vcs_test'):
 
        shutil.rmtree('/tmp/vcs_test')
 
        
 
    cur_dir = dn(dn(abspath(__file__)))
 
    tar = tarfile.open(jn(cur_dir, 'tests', "vcs_test.tar.gz"))
 
    tar.extractall('/tmp')
 
    tar.close()
pylons_app/templates/files/files_browser.html
Show inline comments
 
<%def name="file_class(node)">
 
	%if node.is_file():
 
		<%return "browser-file" %>
 
	%else:
 
		<%return "browser-dir"%>
 
	%endif
 
</%def>
 
<div id="body" class="browserblock">
 
	<div class="browser-header">
 
		${h.form(h.url.current())}
 
		<div class="info_box">
 
          <span >${_('view')}@rev</span> 
 
          <a href="${c.url_prev}">&laquo;</a>
 
          ${h.text('at_rev',value=c.rev_nr,size=3)}
 
          <a href="${c.url_next}">&raquo;</a>
 
          ${h.submit('view','view')}
 
	    </div>           
 
		${h.end_form()}
 
	</div>
 
	<div class="browser-body">
 
		<table class="code-browser">
 
		         <thead>
 
		             <tr>
 
		                 <th>${_('Name')}</th>
 
		                 <th>${_('Size')}</th>
 
		                 <th>${_('Mimetype')}</th>
 
		                 <th>${_('Revision')}</th>
 
		                 <th>${_('Last modified')}</th>
 
		                 <th>${_('Last commiter')}</th>
 
		             </tr>
 
		         </thead>
 
		         	<tr class="parity0">
 
		          		<td>
 
		          		% if c.files_list.parent:
 
		          			${h.link_to('..',h.url('files_home',repo_name=c.repo_name,revision=c.cur_rev,f_path=c.files_list.parent.path),class_="browser-dir")}
 
		          		%endif
 
		          		</td>
 
		          		<td></td>
 
		          		<td></td>
 
		          		<td></td>
 
		          		<td></td>
 
		         	</tr>
 

	
 
          		% if c.files_list.parent:
 
         		<tr class="parity0">
 
	          		<td>		          		
 
	          			${h.link_to('..',h.url('files_home',repo_name=c.repo_name,revision=c.cur_rev,f_path=c.files_list.parent.path),class_="browser-dir")}
 
	          		</td>
 
	          		<td></td>
 
	          		<td></td>
 
	          		<td></td>
 
	          		<td></td>
 
	          		<td></td>
 
				</tr>	          		
 
          		%endif
 
		         	
 
		    %for cnt,node in enumerate(c.files_list,1):
 
				<tr class="parity${cnt%2}">
 
		             <td>
 
						${h.link_to(node.name,h.url('files_home',repo_name=c.repo_name,revision=c.cur_rev,f_path=node.path),class_=file_class(node))}
 
		             </td>
 
		             <td>
 
		                %if node.is_file():
 
		             		${h.format_byte_size(node.size,binary=True)}
 
		             	%endif
 
		             	${h.format_byte_size(node.size,binary=True)}
 
		             </td>
 
		             <td>
 
		              %if node.is_file():
 
		                  ${node.mimetype}
 
		              %endif
 
		             </td>
 
		             <td>
 
		             	%if node.is_file():
 
		             		${node.last_changeset.revision}
 
		             	%endif
 
		             </td>
 
		             <td>
 
		             	%if node.is_file():
 
		             		${h.age(node.last_changeset._ctx.date())} - ${node.last_changeset.date}
 
		             	%endif
 
		             </td>
 
		             <td>
 
		             	%if node.is_file():
 
		             		${node.last_changeset.author}
 
		             	%endif                    
 
		             </td>
 
				</tr>
 
			%endfor
 
		</table>
 
	</div>
 
</div>
 
\ No newline at end of file
0 comments (0 inline, 0 general)