Changeset - 373ee7031003
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 15 years ago 2010-11-06 16:14:49
marcin@python-works.com
fixed annotation bug, added history to annotation.
multiple fixes for raw_id length
removed unneded function from index daemon.
4 files changed with 15 insertions and 18 deletions:
0 comments (0 inline, 0 general)
rhodecode/controllers/files.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# files controller for pylons
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 

	
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on April 21, 2010
 
files controller for pylons
 
@author: marcink
 
"""
 
from mercurial import archival
 
from pylons import request, response, session, tmpl_context as c, url
 
from pylons.i18n.translation import _
 
from pylons.controllers.util import redirect
 
from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
 
from rhodecode.lib.base import BaseController, render
 
from rhodecode.lib.utils import EmptyChangeset
 
from rhodecode.model.hg import HgModel
 
from vcs.exceptions import RepositoryError, ChangesetError
 
from vcs.nodes import FileNode
 
from vcs.utils import diffs as differ
 
import logging
 
import rhodecode.lib.helpers as h
 
import tempfile
 

	
 
log = logging.getLogger(__name__)
 

	
 
class FilesController(BaseController):
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def __before__(self):
 
        super(FilesController, self).__before__()
 
        c.file_size_limit = 250 * 1024 #limit of file size to display
 

	
 
    def index(self, repo_name, revision, f_path):
 
        hg_model = HgModel()
 
        c.repo = repo = hg_model.get_repo(c.repo_name)
 
        c.repo = hg_model.get_repo(c.repo_name)
 
        revision = request.POST.get('at_rev', None) or revision
 

	
 
        def get_next_rev(cur):
 
            max_rev = len(c.repo.revisions) - 1
 
            r = cur + 1
 
            if r > max_rev:
 
                r = max_rev
 
            return r
 

	
 
        def get_prev_rev(cur):
 
            r = cur - 1
 
            return r
 

	
 
        c.f_path = f_path
 

	
 

	
 
        try:
 
            c.changeset = repo.get_changeset(revision)
 
            c.changeset = c.repo.get_changeset(revision)
 
            cur_rev = c.changeset.revision
 
            prev_rev = repo.get_changeset(get_prev_rev(cur_rev)).raw_id
 
            next_rev = repo.get_changeset(get_next_rev(cur_rev)).raw_id
 
            prev_rev = c.repo.get_changeset(get_prev_rev(cur_rev)).raw_id
 
            next_rev = c.repo.get_changeset(get_next_rev(cur_rev)).raw_id
 

	
 
            c.url_prev = url('files_home', repo_name=c.repo_name,
 
                             revision=prev_rev, f_path=f_path)
 
            c.url_next = url('files_home', repo_name=c.repo_name,
 
                         revision=next_rev, f_path=f_path)
 

	
 
            try:
 
                c.files_list = c.changeset.get_node(f_path)
 
                c.file_history = self._get_history(repo, c.files_list, f_path)
 
                c.file_history = self._get_history(c.repo, c.files_list, f_path)
 

	
 
            except RepositoryError, e:
 
                h.flash(str(e), category='warning')
 
                redirect(h.url('files_home', repo_name=repo_name, revision=revision))
 

	
 
        except RepositoryError, e:
 
            h.flash(str(e), category='warning')
 
            redirect(h.url('files_home', repo_name=repo_name, revision='tip'))
 

	
 

	
 

	
 
        return render('files/files.html')
 

	
 
    def rawfile(self, repo_name, revision, f_path):
 
        hg_model = HgModel()
 
        c.repo = hg_model.get_repo(c.repo_name)
 
        file_node = c.repo.get_changeset(revision).get_node(f_path)
 
        response.content_type = file_node.mimetype
 
        response.content_disposition = 'attachment; filename=%s' \
 
                                                    % f_path.split('/')[-1]
 
        return file_node.content
 

	
 
    def raw(self, repo_name, revision, f_path):
 
        hg_model = HgModel()
 
        c.repo = hg_model.get_repo(c.repo_name)
 
        file_node = c.repo.get_changeset(revision).get_node(f_path)
 
        response.content_type = 'text/plain'
 

	
 
        return file_node.content
 

	
 
    def annotate(self, repo_name, revision, f_path):
 
        hg_model = HgModel()
 
        c.repo = hg_model.get_repo(c.repo_name)
 
        cs = c.repo.get_changeset(revision)
 
        c.file = cs.get_node(f_path)
 
        c.file_msg = cs.get_file_message(f_path)
 
        c.cur_rev = cs.raw_id
 
        c.rev_nr = cs.revision
 
        c.cs = c.repo.get_changeset(revision)
 
        c.file = c.cs.get_node(f_path)
 
        c.file_history = self._get_history(c.repo, c.file, f_path)
 

	
 
        c.f_path = f_path
 

	
 
        return render('files/files_annotate.html')
 

	
 
    def archivefile(self, repo_name, revision, fileformat):
 
        archive_specs = {
 
          '.tar.bz2': ('application/x-tar', 'tbz2'),
 
          '.tar.gz': ('application/x-tar', 'tgz'),
 
          '.zip': ('application/zip', 'zip'),
 
        }
 
        if not archive_specs.has_key(fileformat):
 
            return 'Unknown archive type %s' % fileformat
 

	
 
        def read_in_chunks(file_object, chunk_size=1024 * 40):
 
            """Lazy function (generator) to read a file piece by piece.
 
            Default chunk size: 40k."""
 
            while True:
 
                data = file_object.read(chunk_size)
 
                if not data:
 
                    break
 
                yield data
 

	
 
        archive = tempfile.TemporaryFile()
 
        repo = HgModel().get_repo(repo_name).repo
 
        fname = '%s-%s%s' % (repo_name, revision, fileformat)
 
        archival.archive(repo, archive, revision, archive_specs[fileformat][1],
 
                         prefix='%s-%s' % (repo_name, revision))
 
        response.content_type = archive_specs[fileformat][0]
 
        response.content_disposition = 'attachment; filename=%s' % fname
 
        archive.seek(0)
 
        return read_in_chunks(archive)
 

	
 
    def diff(self, repo_name, f_path):
 
        hg_model = HgModel()
 
        diff1 = request.GET.get('diff1')
 
        diff2 = request.GET.get('diff2')
 
        c.action = request.GET.get('diff')
 
        c.no_changes = diff1 == diff2
 
        c.f_path = f_path
 
        c.repo = hg_model.get_repo(c.repo_name)
 

	
 
        try:
 
            if diff1 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_1 = c.repo.get_changeset(diff1)
 
                node1 = c.changeset_1.get_node(f_path)
 
            else:
 
                c.changeset_1 = EmptyChangeset()
 
                node1 = FileNode('.', '', changeset=c.changeset_1)
 

	
 
            if diff2 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_2 = c.repo.get_changeset(diff2)
 
                node2 = c.changeset_2.get_node(f_path)
 
            else:
 
                c.changeset_2 = EmptyChangeset()
 
                node2 = FileNode('.', '', changeset=c.changeset_2)
 
        except RepositoryError:
 
            return redirect(url('files_home',
 
                                repo_name=c.repo_name, f_path=f_path))
 

	
 
        f_udiff = differ.get_udiff(node1, node2)
 
        diff = differ.DiffProcessor(f_udiff)
 

	
 
        if c.action == 'download':
 
            diff_name = '%s_vs_%s.diff' % (diff1, diff2)
 
            response.content_type = 'text/plain'
 
            response.content_disposition = 'attachment; filename=%s' \
 
                                                    % diff_name
 
            return diff.raw_diff()
 

	
 
        elif c.action == 'raw':
 
            response.content_type = 'text/plain'
 
            return diff.raw_diff()
 
            
 

	
 
        elif c.action == 'diff':
 
            if node1.size > c.file_size_limit or node2.size > c.file_size_limit:
 
                c.cur_diff = _('Diff is to big to display')
 
            else:
 
                c.cur_diff = diff.as_html()
 
        else:
 
            #default option
 
            if node1.size > c.file_size_limit or node2.size > c.file_size_limit:
 
                c.cur_diff = _('Diff is to big to display')
 
            else:
 
                c.cur_diff = diff.as_html()
 

	
 
        if not c.cur_diff: c.no_changes = True
 
        return render('files/file_diff.html')
 

	
 
    def _get_history(self, repo, node, f_path):
 
        from vcs.nodes import NodeKind
 
        if not node.kind is NodeKind.FILE:
 
            return []
 
        changesets = node.history
 
        hist_l = []
 
        for chs in changesets:
 
            n_desc = 'r%s:%s' % (chs.revision, chs.short_id)
 
            hist_l.append((chs.raw_id, n_desc,))
 
        return hist_l
rhodecode/lib/celerylib/tasks.py
Show inline comments
 
from celery.decorators import task
 

	
 
from operator import itemgetter
 
from pylons.i18n.translation import _
 
from rhodecode.lib.celerylib import run_task, locked_task
 
from rhodecode.lib.helpers import person
 
from rhodecode.lib.smtp_mailer import SmtpMailer
 
from rhodecode.lib.utils import OrderedDict
 
from time import mktime
 
from vcs.backends.hg import MercurialRepository
 
from vcs.backends.git import GitRepository
 
import os
 
import traceback
 
from vcs.backends import get_repo
 
from vcs.utils.helpers import get_scm
 

	
 
try:
 
    import json
 
except ImportError:
 
    #python 2.5 compatibility
 
    import simplejson as json
 

	
 
try:
 
    from celeryconfig import PYLONS_CONFIG as config
 
    celery_on = True
 
except ImportError:
 
    #if celeryconfig is not present let's just load our pylons
 
    #config instead
 
    from pylons import config
 
    celery_on = False
 

	
 

	
 
__all__ = ['whoosh_index', 'get_commits_stats',
 
           'reset_user_password', 'send_email']
 

	
 
def get_session():
 
    if celery_on:
 
        from sqlalchemy import engine_from_config
 
        from sqlalchemy.orm import sessionmaker, scoped_session
 
        engine = engine_from_config(dict(config.items('app:main')), 'sqlalchemy.db1.')
 
        sa = scoped_session(sessionmaker(bind=engine))
 
    else:
 
        #If we don't use celery reuse our current application Session
 
        from rhodecode.model.meta import Session
 
        sa = Session()
 

	
 
    return sa
 

	
 
def get_hg_settings():
 
    from rhodecode.model.db import RhodeCodeSettings
 
    sa = get_session()
 
    ret = sa.query(RhodeCodeSettings).all()
 

	
 
    if not ret:
 
        raise Exception('Could not get application settings !')
 
    settings = {}
 
    for each in ret:
 
        settings['rhodecode_' + each.app_settings_name] = each.app_settings_value
 

	
 
    return settings
 

	
 
def get_hg_ui_settings():
 
    from rhodecode.model.db import RhodeCodeUi
 
    sa = get_session()
 
    ret = sa.query(RhodeCodeUi).all()
 

	
 
    if not ret:
 
        raise Exception('Could not get application ui settings !')
 
    settings = {}
 
    for each in ret:
 
        k = each.ui_key
 
        v = each.ui_value
 
        if k == '/':
 
            k = 'root_path'
 

	
 
        if k.find('.') != -1:
 
            k = k.replace('.', '_')
 

	
 
        if each.ui_section == 'hooks':
 
            v = each.ui_active
 

	
 
        settings[each.ui_section + '_' + k] = v
 

	
 
    return settings
 

	
 
@task
 
@locked_task
 
def whoosh_index(repo_location, full_index):
 
    log = whoosh_index.get_logger()
 
    from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
 
    WhooshIndexingDaemon(repo_location=repo_location).run(full_index=full_index)
 
    index_location = ''
 
    WhooshIndexingDaemon(index_location=index_location,
 
                         repo_location=repo_location).run(full_index=full_index)
 

	
 
@task
 
@locked_task
 
def get_commits_stats(repo_name, ts_min_y, ts_max_y):
 
    from rhodecode.model.db import Statistics, Repository
 
    log = get_commits_stats.get_logger()
 
    author_key_cleaner = lambda k: person(k).replace('"', "") #for js data compatibilty
 

	
 
    commits_by_day_author_aggregate = {}
 
    commits_by_day_aggregate = {}
 
    repos_path = get_hg_ui_settings()['paths_root_path']
 
    p = os.path.join(repos_path, repo_name)
 
    repo = get_repo(p)
 

	
 
    skip_date_limit = True
 
    parse_limit = 250 #limit for single task changeset parsing optimal for
 
    last_rev = 0
 
    last_cs = None
 
    timegetter = itemgetter('time')
 

	
 
    sa = get_session()
 

	
 
    dbrepo = sa.query(Repository)\
 
        .filter(Repository.repo_name == repo_name).scalar()
 
    cur_stats = sa.query(Statistics)\
 
        .filter(Statistics.repository == dbrepo).scalar()
 
    if cur_stats:
 
        last_rev = cur_stats.stat_on_revision
 
    if not repo.revisions:
 
        return True
 

	
 
    if last_rev == repo.revisions[-1] and len(repo.revisions) > 1:
 
        #pass silently without any work if we're not on first revision or current
 
        #state of parsing revision(from db marker) is the last revision
 
        return True
 

	
 
    if cur_stats:
 
        commits_by_day_aggregate = OrderedDict(
 
                                       json.loads(
 
                                        cur_stats.commit_activity_combined))
 
        commits_by_day_author_aggregate = json.loads(cur_stats.commit_activity)
 

	
 
    log.debug('starting parsing %s', parse_limit)
 
    lmktime = mktime
 

	
 
    for cnt, rev in enumerate(repo.revisions[last_rev:]):
 
        last_cs = cs = repo.get_changeset(rev)
 
        k = '%s-%s-%s' % (cs.date.timetuple()[0], cs.date.timetuple()[1],
 
                          cs.date.timetuple()[2])
 
        timetupple = [int(x) for x in k.split('-')]
 
        timetupple.extend([0 for _ in xrange(6)])
 
        k = lmktime(timetupple)
 
        if commits_by_day_author_aggregate.has_key(author_key_cleaner(cs.author)):
 
            try:
 
                l = [timegetter(x) for x in commits_by_day_author_aggregate\
 
                        [author_key_cleaner(cs.author)]['data']]
 
                time_pos = l.index(k)
 
            except ValueError:
 
                time_pos = False
 

	
 
            if time_pos >= 0 and time_pos is not False:
 

	
 
                datadict = commits_by_day_author_aggregate\
 
                    [author_key_cleaner(cs.author)]['data'][time_pos]
 

	
 
                datadict["commits"] += 1
 
                datadict["added"] += len(cs.added)
 
                datadict["changed"] += len(cs.changed)
 
                datadict["removed"] += len(cs.removed)
 

	
 
            else:
 
                if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 

	
 
                    datadict = {"time":k,
 
                                "commits":1,
 
                                "added":len(cs.added),
 
                                "changed":len(cs.changed),
 
                                "removed":len(cs.removed),
 
                               }
 
                    commits_by_day_author_aggregate\
 
                        [author_key_cleaner(cs.author)]['data'].append(datadict)
 

	
 
        else:
 
            if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 
                commits_by_day_author_aggregate[author_key_cleaner(cs.author)] = {
 
                                    "label":author_key_cleaner(cs.author),
 
                                    "data":[{"time":k,
 
                                             "commits":1,
 
                                             "added":len(cs.added),
 
                                             "changed":len(cs.changed),
 
                                             "removed":len(cs.removed),
 
                                             }],
 
                                    "schema":["commits"],
 
                                    }
 

	
 
        #gather all data by day
rhodecode/lib/helpers.py
Show inline comments
 
@@ -178,194 +178,194 @@ class _ToolTip(object):
 
                                xy_pos = [cur_x,cur_y];                                
 
                                break;                             
 
                                 
 
                        }
 

	
 
                        this.cfg.setProperty("xy",xy_pos);
 

	
 
                  });
 
                  
 
            //Mouse out 
 
            myToolTips.contextMouseOutEvent.subscribe(
 
                function(type, args) {
 
                    var context = args[0];
 
                    
 
                });
 
        });
 
        '''
 
        return literal(js)
 

	
 
tooltip = _ToolTip()
 

	
 
class _FilesBreadCrumbs(object):
 

	
 
    def __call__(self, repo_name, rev, paths):
 
        url_l = [link_to(repo_name, url('files_home',
 
                                        repo_name=repo_name,
 
                                        revision=rev, f_path=''))]
 
        paths_l = paths.split('/')
 

	
 
        for cnt, p in enumerate(paths_l, 1):
 
            if p != '':
 
                url_l.append(link_to(p, url('files_home',
 
                                            repo_name=repo_name,
 
                                            revision=rev,
 
                                            f_path='/'.join(paths_l[:cnt]))))
 

	
 
        return literal('/'.join(url_l))
 

	
 
files_breadcrumbs = _FilesBreadCrumbs()
 
class CodeHtmlFormatter(HtmlFormatter):
 

	
 
    def wrap(self, source, outfile):
 
        return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
 

	
 
    def _wrap_code(self, source):
 
        for cnt, it in enumerate(source, 1):
 
            i, t = it
 
            t = '<div id="#S-%s">%s</div>' % (cnt, t)
 
            yield i, t
 
def pygmentize(filenode, **kwargs):
 
    """
 
    pygmentize function using pygments
 
    :param filenode:
 
    """
 
    return literal(code_highlight(filenode.content,
 
                                  filenode.lexer, CodeHtmlFormatter(**kwargs)))
 

	
 
def pygmentize_annotation(filenode, **kwargs):
 
    """
 
    pygmentize function for annotation
 
    :param filenode:
 
    """
 

	
 
    color_dict = {}
 
    def gen_color():
 
        """generator for getting 10k of evenly distibuted colors using hsv color
 
        and golden ratio.
 
        """
 
        import colorsys
 
        n = 10000
 
        golden_ratio = 0.618033988749895
 
        h = 0.22717784590367374
 
        #generate 10k nice web friendly colors in the same order
 
        for c in xrange(n):
 
            h += golden_ratio
 
            h %= 1
 
            HSV_tuple = [h, 0.95, 0.95]
 
            RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
 
            yield map(lambda x:str(int(x * 256)), RGB_tuple)
 

	
 
    cgenerator = gen_color()
 

	
 
    def get_color_string(cs):
 
        if color_dict.has_key(cs):
 
            col = color_dict[cs]
 
        else:
 
            col = color_dict[cs] = cgenerator.next()
 
        return "color: rgb(%s)! important;" % (', '.join(col))
 

	
 
    def url_func(changeset):
 
        tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>" + \
 
        " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
 

	
 
        tooltip_html = tooltip_html % (changeset.author,
 
                                               changeset.date,
 
                                               tooltip(changeset.message))
 
        lnk_format = 'r%-5s:%s' % (changeset.revision,
 
                                 changeset.raw_id)
 
        lnk_format = '%5s:%s' % ('r%s' % changeset.revision,
 
                                 short_id(changeset.raw_id))
 
        uri = link_to(
 
                lnk_format,
 
                url('changeset_home', repo_name=changeset.repository.name,
 
                    revision=changeset.raw_id),
 
                style=get_color_string(changeset.raw_id),
 
                class_='tooltip',
 
                tooltip_title=tooltip_html
 
              )
 

	
 
        uri += '\n'
 
        return uri
 
    return literal(annotate_highlight(filenode, url_func, **kwargs))
 

	
 
def repo_name_slug(value):
 
    """Return slug of name of repository
 
    This function is called on each creation/modification
 
    of repository to prevent bad names in repo
 
    """
 
    slug = remove_formatting(value)
 
    slug = strip_tags(slug)
 

	
 
    for c in """=[]\;'"<>,/~!@#$%^&*()+{}|: """:
 
        slug = slug.replace(c, '-')
 
    slug = recursive_replace(slug, '-')
 
    slug = collapse(slug, '-')
 
    return slug
 

	
 
def get_changeset_safe(repo, rev):
 
    from vcs.backends.base import BaseRepository
 
    from vcs.exceptions import RepositoryError
 
    if not isinstance(repo, BaseRepository):
 
        raise Exception('You must pass an Repository '
 
                        'object as first argument got %s', type(repo))
 

	
 
    try:
 
        cs = repo.get_changeset(rev)
 
    except RepositoryError:
 
        from rhodecode.lib.utils import EmptyChangeset
 
        cs = EmptyChangeset()
 
    return cs
 

	
 

	
 
flash = _Flash()
 

	
 

	
 
#==============================================================================
 
# MERCURIAL FILTERS available via h.
 
#==============================================================================
 
from mercurial import util
 
from mercurial.templatefilters import person as _person
 

	
 

	
 

	
 
def _age(curdate):
 
    """turns a datetime into an age string."""
 

	
 
    if not curdate:
 
        return ''
 

	
 
    from datetime import timedelta, datetime
 

	
 
    agescales = [("year", 3600 * 24 * 365),
 
                 ("month", 3600 * 24 * 30),
 
                 ("day", 3600 * 24),
 
                 ("hour", 3600),
 
                 ("minute", 60),
 
                 ("second", 1), ]
 

	
 
    age = datetime.now() - curdate
 
    age_seconds = (age.days * agescales[2][1]) + age.seconds
 
    pos = 1
 
    for scale in agescales:
 
        if scale[1] <= age_seconds:
 
            if pos == 6:pos = 5
 
            return time_ago_in_words(curdate, agescales[pos][0])
 
        pos += 1
 

	
 
age = lambda  x:_age(x)
 
capitalize = lambda x: x.capitalize()
 
email = util.email
 
email_or_none = lambda x: util.email(x) if util.email(x) != x else None
 
person = lambda x: _person(x)
 
short_id = lambda x: x[:12]
 

	
 

	
 
def action_parser(user_log):
 
    """
 
    This helper will map the specified string action into translated
 
    fancy names with icons and links
 
    
 
    @param action:
 
    """
 
    action = user_log.action
 
    action_params = None
 
    cs_links = ''
 

	
rhodecode/lib/indexers/daemon.py
Show inline comments
 
#!/usr/bin/env python
 
# encoding: utf-8
 
# whoosh indexer daemon for rhodecode
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
#
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on Jan 26, 2010
 

	
 
@author: marcink
 
A deamon will read from task table and run tasks
 
"""
 
import sys
 
import os
 
from os.path import dirname as dn
 
from os.path import join as jn
 

	
 
#to get the rhodecode import
 
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 
sys.path.append(project_path)
 

	
 

	
 
from rhodecode.model.hg import HgModel
 
from rhodecode.lib.helpers import safe_unicode
 
from whoosh.index import create_in, open_dir
 
from shutil import rmtree
 
from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
 

	
 
from time import mktime
 
from vcs.exceptions import ChangesetError, RepositoryError
 

	
 
import logging
 

	
 
log = logging.getLogger('whooshIndexer')
 
# create logger
 
log.setLevel(logging.DEBUG)
 
log.propagate = False
 
# create console handler and set level to debug
 
ch = logging.StreamHandler()
 
ch.setLevel(logging.DEBUG)
 

	
 
# create formatter
 
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 

	
 
# add formatter to ch
 
ch.setFormatter(formatter)
 

	
 
# add ch to logger
 
log.addHandler(ch)
 

	
 
def get_repos_location():
 
    return HgModel.get_repos_location()
 

	
 

	
 
class WhooshIndexingDaemon(object):
 
    """
 
    Deamon for atomic jobs
 
    """
 

	
 
    def __init__(self, indexname='HG_INDEX', index_location=None,
 
                 repo_location=None):
 
        self.indexname = indexname
 

	
 
        self.index_location = index_location
 
        if not index_location:
 
            raise Exception('You have to provide index location')
 

	
 
        self.repo_location = repo_location
 
        if not repo_location:
 
            raise Exception('You have to provide repositories location')
 

	
 

	
 

	
 
        self.repo_paths = HgModel.repo_scan('/', self.repo_location, None, True)
 
        self.initial = False
 
        if not os.path.isdir(self.index_location):
 
            os.mkdir(self.index_location)
 
            log.info('Cannot run incremental index since it does not'
 
                     ' yet exist running full build')
 
            self.initial = True
 

	
 
    def get_paths(self, repo):
 
        """
 
        recursive walk in root dir and return a set of all path in that dir
 
        based on repository walk function
 
        """
 
        index_paths_ = set()
 
        try:
 
            for topnode, dirs, files in repo.walk('/', 'tip'):
 
                for f in files:
 
                    index_paths_.add(jn(repo.path, f.path))
 
                for dir in dirs:
 
                    for f in files:
 
                        index_paths_.add(jn(repo.path, f.path))
 

	
 
        except RepositoryError:
 
            pass
 
        return index_paths_
 

	
 
    def get_node(self, repo, path):
 
        n_path = path[len(repo.path) + 1:]
 
        node = repo.get_changeset().get_node(n_path)
 
        return node
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo):
 
        """Adding doc to writer"""
 
        node = self.get_node(repo, path)
 

	
 
        #we just index the content of chosen files
 
        if node.extension in INDEX_EXTENSIONS:
 
            log.debug('    >> %s [WITH CONTENT]' % path)
 
            u_content = node.content
 
        else:
 
            log.debug('    >> %s' % path)
 
            #just index file name without it's content
 
            u_content = u''
 

	
 
        writer.add_document(owner=unicode(repo.contact),
 
                        repository=safe_unicode(repo.name),
 
                        path=safe_unicode(path),
 
                        content=u_content,
 
                        modtime=self.get_node_mtime(node),
 
                        extension=node.extension)
 

	
 

	
 
    def build_index(self):
 
        if os.path.exists(self.index_location):
 
            log.debug('removing previous index')
 
            rmtree(self.index_location)
 

	
 
        if not os.path.exists(self.index_location):
 
            os.mkdir(self.index_location)
 

	
 
        idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
 
        writer = idx.writer()
 

	
 
        for cnt, repo in enumerate(self.repo_paths.values()):
 
            log.debug('building index @ %s' % repo.path)
 

	
 
            for idx_path in self.get_paths(repo):
 
                self.add_doc(writer, idx_path, repo)
 

	
 
        log.debug('>> COMMITING CHANGES <<')
 
        writer.commit(merge=True)
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 

	
 

	
0 comments (0 inline, 0 general)