kallithea Changeset - 95a502d94860

Changeset - 95a502d94860

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Marcin Kuzminski - 15 years ago 2010-10-18 01:14:40
marcin@python-works.com

removed soon deprecated walk method on repository instance

2 files changed with 4 insertions and 2 deletions:

rhodecode/lib/celerylib/tasks.py

rhodecode/lib/indexers/daemon.py

0 comments (0 inline, 0 general)

rhodecode/lib/celerylib/tasks.py

➞

Show inline comments

 from celery.decorators import task
 from operator import itemgetter
 from pylons.i18n.translation import _
 from rhodecode.lib.celerylib import run_task, locked_task
 from rhodecode.lib.helpers import person
 from rhodecode.lib.smtp_mailer import SmtpMailer
 from rhodecode.lib.utils import OrderedDict
 from time import mktime
 from vcs.backends.hg import MercurialRepository
 import traceback
 try:
     import json
 except ImportError:
     #python 2.5 compatibility
     import simplejson as json
 try:
     from celeryconfig import PYLONS_CONFIG as config
     celery_on = True
 except ImportError:
     #if celeryconfig is not present let's just load our pylons
     #config instead
     from pylons import config
     celery_on = False
 __all__ = ['whoosh_index', 'get_commits_stats',
            'reset_user_password', 'send_email']
 def get_session():
     if celery_on:
         from sqlalchemy import engine_from_config
         from sqlalchemy.orm import sessionmaker, scoped_session
         engine = engine_from_config(dict(config.items('app:main')), 'sqlalchemy.db1.')
         sa = scoped_session(sessionmaker(bind=engine))
     else:
         #If we don't use celery reuse our current application Session
         from rhodecode.model.meta import Session
         sa = Session
     return sa
 def get_hg_settings():
     from rhodecode.model.db import RhodeCodeSettings
     sa = get_session()
     ret = sa.query(RhodeCodeSettings).all()
     if not ret:
         raise Exception('Could not get application settings !')
     settings = {}
     for each in ret:
         settings['rhodecode_' + each.app_settings_name] = each.app_settings_value
     return settings
 def get_hg_ui_settings():
     from rhodecode.model.db import RhodeCodeUi
     sa = get_session()
     ret = sa.query(RhodeCodeUi).all()
     if not ret:
         raise Exception('Could not get application ui settings !')
     settings = {}
     for each in ret:
         k = each.ui_key
         v = each.ui_value
         if k == '/':
             k = 'root_path'
         if k.find('.') != -1:
             k = k.replace('.', '_')
         if each.ui_section == 'hooks':
             v = each.ui_active
         settings[each.ui_section + '_' + k] = v
     return settings
 @task
 @locked_task
 def whoosh_index(repo_location, full_index):
     log = whoosh_index.get_logger()
     from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
     WhooshIndexingDaemon(repo_location=repo_location).run(full_index=full_index)
 @task
 @locked_task
 def get_commits_stats(repo_name, ts_min_y, ts_max_y):
     from rhodecode.model.db import Statistics, Repository
     log = get_commits_stats.get_logger()
     author_key_cleaner = lambda k: person(k).replace('"', "") #for js data compatibilty
     commits_by_day_author_aggregate = {}
     commits_by_day_aggregate = {}
     repos_path = get_hg_ui_settings()['paths_root_path'].replace('*', '')
     repo = MercurialRepository(repos_path + repo_name)
     skip_date_limit = True
     parse_limit = 350 #limit for single task changeset parsing optimal for
     last_rev = 0
     last_cs = None
     timegetter = itemgetter('time')
     sa = get_session()
     dbrepo = sa.query(Repository)\
         .filter(Repository.repo_name == repo_name).scalar()
     cur_stats = sa.query(Statistics)\
         .filter(Statistics.repository == dbrepo).scalar()
     if cur_stats:
         last_rev = cur_stats.stat_on_revision
     if not repo.revisions:
         return True
     if last_rev == repo.revisions[-1] and len(repo.revisions) > 1:
         #pass silently without any work if we're not on first revision or current
         #state of parsing revision(from db marker) is the last revision
         return True
     if cur_stats:
         commits_by_day_aggregate = OrderedDict(
                                        json.loads(
                                         cur_stats.commit_activity_combined))
         commits_by_day_author_aggregate = json.loads(cur_stats.commit_activity)
     log.debug('starting parsing %s', parse_limit)
     for cnt, rev in enumerate(repo.revisions[last_rev:]):
         last_cs = cs = repo.get_changeset(rev)
         k = '%s-%s-%s' % (cs.date.timetuple()[0], cs.date.timetuple()[1],
                           cs.date.timetuple()[2])
         timetupple = [int(x) for x in k.split('-')]
         timetupple.extend([0 for _ in xrange(6)])
         k = mktime(timetupple)
         if commits_by_day_author_aggregate.has_key(author_key_cleaner(cs.author)):
             try:
                 l = [timegetter(x) for x in commits_by_day_author_aggregate\
                         [author_key_cleaner(cs.author)]['data']]
                 time_pos = l.index(k)
             except ValueError:
                 time_pos = False
             if time_pos >= 0 and time_pos is not False:
                 datadict = commits_by_day_author_aggregate\
                     [author_key_cleaner(cs.author)]['data'][time_pos]
                 datadict["commits"] += 1
                 datadict["added"] += len(cs.added)
                 datadict["changed"] += len(cs.changed)
                 datadict["removed"] += len(cs.removed)
             else:
                 if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                     datadict = {"time":k,
                                 "commits":1,
                                 "added":len(cs.added),
                                 "changed":len(cs.changed),
                                 "removed":len(cs.removed),
+                               }
                     commits_by_day_author_aggregate\
                         [author_key_cleaner(cs.author)]['data'].append(datadict)
         else:
             if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                 commits_by_day_author_aggregate[author_key_cleaner(cs.author)] = {
                                     "label":author_key_cleaner(cs.author),
                                     "data":[{"time":k,
                                              "commits":1,
                                              "added":len(cs.added),
                                              "changed":len(cs.changed),
                                              "removed":len(cs.removed),
                                              }],
                                     "schema":["commits"],
+                                    }
         #gather all data by day
         if commits_by_day_aggregate.has_key(k):
             commits_by_day_aggregate[k] += 1
         else:
             commits_by_day_aggregate[k] = 1
         if cnt >= parse_limit:
             #don't fetch to much data since we can freeze application
             break
     overview_data = []
     for k, v in commits_by_day_aggregate.items():
         overview_data.append([k, v])
     overview_data = sorted(overview_data, key=itemgetter(0))
     if not commits_by_day_author_aggregate:
         commits_by_day_author_aggregate[author_key_cleaner(repo.contact)] = {
             "label":author_key_cleaner(repo.contact),
             "data":[0, 1],
             "schema":["commits"],
+        }
     stats = cur_stats if cur_stats else Statistics()
     stats.commit_activity = json.dumps(commits_by_day_author_aggregate)
     stats.commit_activity_combined = json.dumps(overview_data)
     log.debug('last revison %s', last_rev)
     leftovers = len(repo.revisions[last_rev:])
     log.debug('revisions to parse %s', leftovers)
     if last_rev == 0 or leftovers < parse_limit:
         stats.languages = json.dumps(__get_codes_stats(repo_name))
     stats.repository = dbrepo
     stats.stat_on_revision = last_cs.revision
     try:
         sa.add(stats)
         sa.commit()
     except:
         log.error(traceback.format_exc())
         sa.rollback()
         return False
     if len(repo.revisions) > 1:
         run_task(get_commits_stats, repo_name, ts_min_y, ts_max_y)
     return True
 @task
 def reset_user_password(user_email):
     log = reset_user_password.get_logger()
     from rhodecode.lib import auth
     from rhodecode.model.db import User
     try:
         try:
             sa = get_session()
             user = sa.query(User).filter(User.email == user_email).scalar()
             new_passwd = auth.PasswordGenerator().gen_password(8,
                              auth.PasswordGenerator.ALPHABETS_BIG_SMALL)
             if user:
                 user.password = auth.get_crypt_password(new_passwd)
                 sa.add(user)
                 sa.commit()
                 log.info('change password for %s', user_email)
             if new_passwd is None:
                 raise Exception('unable to generate new password')
         except:
             log.error(traceback.format_exc())
             sa.rollback()
         run_task(send_email, user_email,
                  "Your new rhodecode password",
                  'Your new rhodecode password:%s' % (new_passwd))
         log.info('send new password mail to %s', user_email)
     except:
         log.error('Failed to update user password')
         log.error(traceback.format_exc())
     return True
 @task
 def send_email(recipients, subject, body):
     log = send_email.get_logger()
     email_config = dict(config.items('DEFAULT'))
     mail_from = email_config.get('app_email_from')
     user = email_config.get('smtp_username')
     passwd = email_config.get('smtp_password')
     mail_server = email_config.get('smtp_server')
     mail_port = email_config.get('smtp_port')
     tls = email_config.get('smtp_use_tls')
     ssl = False
     try:
         m = SmtpMailer(mail_from, user, passwd, mail_server,
                        mail_port, ssl, tls)
         m.send(recipients, subject, body)
     except:
         log.error('Mail sending failed')
         log.error(traceback.format_exc())
         return False
     return True
 @task
 def create_repo_fork(form_data, cur_user):
     import os
     from rhodecode.model.repo_model import RepoModel
     sa = get_session()
     rm = RepoModel(sa)
     rm.create(form_data, cur_user, just_db=True, fork=True)
     repos_path = get_hg_ui_settings()['paths_root_path'].replace('*', '')
     repo_path = os.path.join(repos_path, form_data['repo_name'])
     repo_fork_path = os.path.join(repos_path, form_data['fork_name'])
     MercurialRepository(str(repo_fork_path), True, clone_url=str(repo_path))
 def __get_codes_stats(repo_name):
     LANGUAGES_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
                     'cfg', 'cfm', 'cpp', 'cs', 'diff', 'do', 'el', 'erl',
                     'h', 'java', 'js', 'jsp', 'jspx', 'lisp',
                     'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
                     'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh',
                     'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
                     'yaws']
     repos_path = get_hg_ui_settings()['paths_root_path'].replace('*', '')
     repo = MercurialRepository(repos_path + repo_name)
     tip = repo.get_changeset()
     code_stats = {}
-    for topnode, dirs, files in repo.walk('/', 'tip'):
+    for topnode, dirs, files in tip.walk('/'):
         for f in files:
             k = f.mimetype
             if f.extension in LANGUAGES_EXTENSIONS:
                 if code_stats.has_key(k):
                     code_stats[k] += 1
                 else:
                     code_stats[k] = 1
     return code_stats or {}

rhodecode/lib/indexers/daemon.py

➞

Show inline comments

 #!/usr/bin/env python
 # encoding: utf-8
 # whoosh indexer daemon for rhodecode
 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
+#
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2
 # of the License or (at your opinion) any later version of the license.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 # MA  02110-1301, USA.
 """
 Created on Jan 26, 2010
 @author: marcink
 A deamon will read from task table and run tasks
 """
 import sys
 import os
 from os.path import dirname as dn
 from os.path import join as jn
 #to get the rhodecode import
 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 from rhodecode.lib.pidlock import LockHeld, DaemonLock
 from rhodecode.model.hg_model import HgModel
 from rhodecode.lib.helpers import safe_unicode
 from whoosh.index import create_in, open_dir
 from shutil import rmtree
 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
 from time import mktime
 from vcs.exceptions import ChangesetError, RepositoryError
 import logging
 log = logging.getLogger('whooshIndexer')
 # create logger
 log.setLevel(logging.DEBUG)
 log.propagate = False
 # create console handler and set level to debug
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 # create formatter
 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 # add formatter to ch
 ch.setFormatter(formatter)
 # add ch to logger
 log.addHandler(ch)
 def scan_paths(root_location):
     return HgModel.repo_scan('/', root_location, None, True)
 class WhooshIndexingDaemon(object):
     """
     Deamon for atomic jobs
     """
     def __init__(self, indexname='HG_INDEX', repo_location=None):
         self.indexname = indexname
         self.repo_location = repo_location
         self.repo_paths = scan_paths(self.repo_location)
         self.initial = False
         if not os.path.isdir(IDX_LOCATION):
             os.mkdir(IDX_LOCATION)
             log.info('Cannot run incremental index since it does not'
                      ' yet exist running full build')
             self.initial = True
     def get_paths(self, repo):
         """
         recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         tip = repo.get_changeset()
         try:
-            for topnode, dirs, files in repo.walk('/', 'tip'):
+            for topnode, dirs, files in tip.walk('/'):
                 for f in files:
                     index_paths_.add(jn(repo.path, f.path))
                 for dir in dirs:
                     for f in files:
                         index_paths_.add(jn(repo.path, f.path))
         except RepositoryError:
             pass
         return index_paths_
     def get_node(self, repo, path):
         n_path = path[len(repo.path) + 1:]
         node = repo.get_changeset().get_node(n_path)
         return node
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo):
         """Adding doc to writer"""
         node = self.get_node(repo, path)
         #we just index the content of chosen files
         if node.extension in INDEX_EXTENSIONS:
             log.debug('    >> %s [WITH CONTENT]' % path)
             u_content = node.content
         else:
             log.debug('    >> %s' % path)
             #just index file name without it's content
             u_content = u''
         writer.add_document(owner=unicode(repo.contact),
                         repository=safe_unicode(repo.name),
                         path=safe_unicode(path),
                         content=u_content,
                         modtime=self.get_node_mtime(node),
                         extension=node.extension)
     def build_index(self):
         if os.path.exists(IDX_LOCATION):
             log.debug('removing previous index')
             rmtree(IDX_LOCATION)
         if not os.path.exists(IDX_LOCATION):
             os.mkdir(IDX_LOCATION)
         idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
         writer = idx.writer()
         for cnt, repo in enumerate(self.repo_paths.values()):
             log.debug('building index @ %s' % repo.path)
             for idx_path in self.get_paths(repo):
                 self.add_doc(writer, idx_path, repo)
         log.debug('>> COMMITING CHANGES <<')
         writer.commit(merge=True)
         log.debug('>>> FINISHED BUILDING INDEX <<<')
     def update_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE')
         idx = open_dir(IDX_LOCATION, indexname=self.indexname)
         # The set of all paths in the index
         indexed_paths = set()
         # The set of all paths we need to re-index
         to_index = set()
         reader = idx.reader()
         writer = idx.writer()
         # Loop over the stored fields in the index
         for fields in reader.all_stored_fields():
             indexed_path = fields['path']
             indexed_paths.add(indexed_path)
             repo = self.repo_paths[fields['repository']]
             try:
                 node = self.get_node(repo, indexed_path)
             except ChangesetError:
                 # This file was deleted since it was indexed
                 log.debug('removing from index %s' % indexed_path)
                 writer.delete_by_term('path', indexed_path)
             else:
                 # Check if this file was changed since it was indexed
                 indexed_time = fields['modtime']
                 mtime = self.get_node_mtime(node)
                 if mtime > indexed_time:
                     # The file has changed, delete it and add it to the list of
                     # files to reindex
                     log.debug('adding to reindex list %s' % indexed_path)
                     writer.delete_by_term('path', indexed_path)
                     to_index.add(indexed_path)
         # Loop over the files in the filesystem
         # Assume we have a function that gathers the filenames of the
         # documents to be indexed
         for repo in self.repo_paths.values():
             for path in self.get_paths(repo):
                 if path in to_index or path not in indexed_paths:
                     # This is either a file that's changed, or a new file
                     # that wasn't indexed before. So index it!
                     self.add_doc(writer, path, repo)
                     log.debug('re indexing %s' % path)
         log.debug('>> COMMITING CHANGES <<')
         writer.commit(merge=True)
         log.debug('>>> FINISHED REBUILDING INDEX <<<')
     def run(self, full_index=False):
         """Run daemon"""
         if full_index or self.initial:
             self.build_index()
         else:
             self.update_index()
 if __name__ == "__main__":
     arg = sys.argv[1:]
     if len(arg) != 2:
         sys.stderr.write('Please specify indexing type [full|incremental]'
                          'and path to repositories as script args \n')
         sys.exit()
     if arg[0] == 'full':
         full_index = True
     elif arg[0] == 'incremental':
         # False means looking just for changes
         full_index = False
     else:
         sys.stdout.write('Please use [full|incremental]'
                          ' as script first arg \n')
         sys.exit()
     if not os.path.isdir(arg[1]):
         sys.stderr.write('%s is not a valid path \n' % arg[1])
         sys.exit()
     else:
         if arg[1].endswith('/'):
             repo_location = arg[1] + '*'
         else:
             repo_location = arg[1] + '/*'
     try:
         l = DaemonLock()
         WhooshIndexingDaemon(repo_location=repo_location)\
             .run(full_index=full_index)
         l.release()
         reload(logging)
     except LockHeld:
         sys.exit(1)

0 comments (0 inline, 0 general)