Changeset - db71228a4c23
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 15 years ago 2011-02-24 23:44:21
marcin@python-works.com
moved statistics parse_limit into .ini files
decreased celery max task per child to 1 for better memory usage
stats task take usage of new slice option of repository
4 files changed with 16 insertions and 11 deletions:
0 comments (0 inline, 0 general)
development.ini
Show inline comments
 
@@ -24,83 +24,86 @@ debug = true
 
#smtp_use_tls = false
 
#smtp_use_ssl = true
 

	
 
[server:main]
 
##nr of threads to spawn
 
threadpool_workers = 5
 

	
 
##max request before thread respawn
 
threadpool_max_requests = 6
 

	
 
##option to use threads of process
 
use_threadpool = false
 

	
 
use = egg:Paste#http
 
host = 0.0.0.0
 
port = 5000
 

	
 
[app:main]
 
use = egg:rhodecode
 
full_stack = true
 
static_files = true
 
lang=en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
app_instance_uuid = develop
 
cut_off_limit = 256000
 
force_https = false
 
commit_parse_limit = 25
 

	
 
####################################
 
###        CELERY CONFIG        ####
 
####################################
 
use_celery = false
 
broker.host = localhost
 
broker.vhost = rabbitmqhost
 
broker.port = 5672
 
broker.user = rabbitmq
 
broker.password = qweqwe
 

	
 
celery.imports = rhodecode.lib.celerylib.tasks
 

	
 
celery.result.backend = amqp
 
celery.result.dburi = amqp://
 
celery.result.serialier = json
 

	
 
#celery.send.task.error.emails = true
 
#celery.amqp.task.result.expires = 18000
 

	
 
celeryd.concurrency = 2
 
#celeryd.log.file = celeryd.log
 
celeryd.log.level = debug
 
celeryd.max.tasks.per.child = 3
 
celeryd.max.tasks.per.child = 1
 

	
 
#tasks will never be sent to the queue, but executed locally instead.
 
celery.always.eager = false
 

	
 
####################################
 
###         BEAKER CACHE        ####
 
####################################
 
beaker.cache.data_dir=/%(here)s/data/cache/data
 
beaker.cache.lock_dir=/%(here)s/data/cache/lock
 
beaker.cache.data_dir=%(here)s/data/cache/data
 
beaker.cache.lock_dir=%(here)s/data/cache/lock
 

	
 
beaker.cache.regions=super_short_term,short_term,long_term,sql_cache_short,sql_cache_med,sql_cache_long
 

	
 
beaker.cache.super_short_term.type=memory
 
beaker.cache.super_short_term.expire=10
 

	
 
beaker.cache.short_term.type=memory
 
beaker.cache.short_term.expire=60
 

	
 
beaker.cache.long_term.type=memory
 
beaker.cache.long_term.expire=36000
 

	
 
beaker.cache.sql_cache_short.type=memory
 
beaker.cache.sql_cache_short.expire=10
 

	
 
beaker.cache.sql_cache_med.type=memory
 
beaker.cache.sql_cache_med.expire=360
 

	
 
beaker.cache.sql_cache_long.type=file
 
beaker.cache.sql_cache_long.expire=3600
 

	
 
####################################
 
###       BEAKER SESSION        ####
 
####################################
 
## Type of storage used for the session, current types are 
production.ini
Show inline comments
 
@@ -26,72 +26,73 @@ debug = true
 

	
 
[server:main]
 
##nr of threads to spawn
 
threadpool_workers = 5
 

	
 
##max request before thread respawn
 
threadpool_max_requests = 2
 

	
 
##option to use threads of process
 
use_threadpool = true
 

	
 
use = egg:Paste#http
 
host = 127.0.0.1
 
port = 8001
 

	
 
[app:main]
 
use = egg:rhodecode
 
full_stack = true
 
static_files = false
 
lang=en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
cut_off_limit = 256000
 
force_https = false
 
commit_parse_limit = 250
 

	
 
####################################
 
###        CELERY CONFIG        ####
 
####################################
 
use_celery = false
 
broker.host = localhost
 
broker.vhost = rabbitmqhost
 
broker.port = 5672
 
broker.user = rabbitmq
 
broker.password = qweqwe
 

	
 
celery.imports = rhodecode.lib.celerylib.tasks
 

	
 
celery.result.backend = amqp
 
celery.result.dburi = amqp://
 
celery.result.serialier = json
 

	
 
#celery.send.task.error.emails = true
 
#celery.amqp.task.result.expires = 18000
 

	
 
celeryd.concurrency = 2
 
#celeryd.log.file = celeryd.log
 
celeryd.log.level = debug
 
celeryd.max.tasks.per.child = 3
 
celeryd.max.tasks.per.child = 1
 

	
 
#tasks will never be sent to the queue, but executed locally instead.
 
celery.always.eager = false
 

	
 
####################################
 
###         BEAKER CACHE        ####
 
####################################
 
beaker.cache.data_dir=%(here)s/data/cache/data
 
beaker.cache.lock_dir=%(here)s/data/cache/lock
 

	
 
beaker.cache.regions=super_short_term,short_term,long_term,sql_cache_short,sql_cache_med,sql_cache_long
 

	
 
beaker.cache.super_short_term.type=memory
 
beaker.cache.super_short_term.expire=10
 

	
 
beaker.cache.short_term.type=memory
 
beaker.cache.short_term.expire=60
 

	
 
beaker.cache.long_term.type=memory
 
beaker.cache.long_term.expire=36000
 

	
 

	
 
beaker.cache.sql_cache_short.type=memory
 
beaker.cache.sql_cache_short.expire=10
rhodecode/config/deployment.ini_tmpl
Show inline comments
 
@@ -27,72 +27,73 @@ debug = true
 
[server:main]
 
##nr of threads to spawn
 
threadpool_workers = 5
 

	
 
##max request before thread respawn
 
threadpool_max_requests = 10
 

	
 
##option to use threads of process
 
use_threadpool = true
 

	
 
use = egg:Paste#http
 
host = 127.0.0.1
 
port = 5000
 

	
 
[app:main]
 
use = egg:rhodecode
 
full_stack = true
 
static_files = true
 
lang=en
 
cache_dir = %(here)s/data
 
index_dir = %(here)s/data/index
 
app_instance_uuid = ${app_instance_uuid}
 
cut_off_limit = 256000
 
force_https = false 
 
commit_parse_limit = 50
 

	
 
####################################
 
###        CELERY CONFIG        ####
 
####################################
 
use_celery = false
 
broker.host = localhost
 
broker.vhost = rabbitmqhost
 
broker.port = 5672
 
broker.user = rabbitmq
 
broker.password = qweqwe
 

	
 
celery.imports = rhodecode.lib.celerylib.tasks
 

	
 
celery.result.backend = amqp
 
celery.result.dburi = amqp://
 
celery.result.serialier = json
 

	
 
#celery.send.task.error.emails = true
 
#celery.amqp.task.result.expires = 18000
 

	
 
celeryd.concurrency = 2
 
#celeryd.log.file = celeryd.log
 
celeryd.log.level = debug
 
celeryd.max.tasks.per.child = 3
 
celeryd.max.tasks.per.child = 1
 

	
 
#tasks will never be sent to the queue, but executed locally instead.
 
celery.always.eager = false
 

	
 
####################################
 
###         BEAKER CACHE        ####
 
####################################
 
beaker.cache.data_dir=%(here)s/data/cache/data
 
beaker.cache.lock_dir=%(here)s/data/cache/lock
 

	
 
beaker.cache.regions=super_short_term,short_term,long_term,sql_cache_short,sql_cache_med,sql_cache_long
 

	
 
beaker.cache.super_short_term.type=memory
 
beaker.cache.super_short_term.expire=10
 

	
 
beaker.cache.short_term.type=memory
 
beaker.cache.short_term.expire=60
 

	
 
beaker.cache.long_term.type=memory
 
beaker.cache.long_term.expire=36000
 

	
 
beaker.cache.sql_cache_short.type=memory
 
beaker.cache.sql_cache_short.expire=10
 

	
rhodecode/lib/celerylib/tasks.py
Show inline comments
 
@@ -83,81 +83,82 @@ def whoosh_index(repo_location, full_ind
 
    WhooshIndexingDaemon(index_location=index_location,
 
                         repo_location=repo_location, sa=get_session())\
 
                         .run(full_index=full_index)
 

	
 
@task(ignore_result=True)
 
@locked_task
 
def get_commits_stats(repo_name, ts_min_y, ts_max_y):
 
    try:
 
        log = get_commits_stats.get_logger()
 
    except:
 
        log = logging.getLogger(__name__)
 

	
 
    from rhodecode.model.db import Statistics, Repository
 

	
 
    #for js data compatibilty
 
    author_key_cleaner = lambda k: person(k).replace('"', "")
 

	
 
    commits_by_day_author_aggregate = {}
 
    commits_by_day_aggregate = {}
 
    repos_path = get_repos_path()
 
    p = os.path.join(repos_path, repo_name)
 
    repo = get_repo(p)
 

	
 
    skip_date_limit = True
 
    parse_limit = 250 #limit for single task changeset parsing optimal for
 
    parse_limit = int(config['app_conf'].get('commit_parse_limit'))
 
    last_rev = 0
 
    last_cs = None
 
    timegetter = itemgetter('time')
 

	
 
    sa = get_session()
 

	
 
    dbrepo = sa.query(Repository)\
 
        .filter(Repository.repo_name == repo_name).scalar()
 
    cur_stats = sa.query(Statistics)\
 
        .filter(Statistics.repository == dbrepo).scalar()
 
    if cur_stats:
 
        last_rev = cur_stats.stat_on_revision
 
    if not repo.revisions:
 
        return True
 

	
 
    if last_rev == repo.revisions[-1] and len(repo.revisions) > 1:
 
        #pass silently without any work if we're not on first revision or 
 
        #current state of parsing revision(from db marker) is the last revision
 
        return True
 

	
 
    if cur_stats:
 
        commits_by_day_aggregate = OrderedDict(
 
                                       json.loads(
 
                                        cur_stats.commit_activity_combined))
 
        commits_by_day_author_aggregate = json.loads(cur_stats.commit_activity)
 

	
 
    log.debug('starting parsing %s', parse_limit)
 
    lmktime = mktime
 

	
 
    last_rev = last_rev + 1 if last_rev > 0 else last_rev
 
    for rev in repo.revisions[last_rev:last_rev + parse_limit]:
 
        last_cs = cs = repo.get_changeset(rev)
 

	
 
    for cs in repo[last_rev:last_rev + parse_limit]:
 
        last_cs = cs #remember last parsed changeset
 
        k = lmktime([cs.date.timetuple()[0], cs.date.timetuple()[1],
 
                      cs.date.timetuple()[2], 0, 0, 0, 0, 0, 0])
 

	
 
        if commits_by_day_author_aggregate.has_key(author_key_cleaner(cs.author)):
 
            try:
 
                l = [timegetter(x) for x in commits_by_day_author_aggregate\
 
                        [author_key_cleaner(cs.author)]['data']]
 
                time_pos = l.index(k)
 
            except ValueError:
 
                time_pos = False
 

	
 
            if time_pos >= 0 and time_pos is not False:
 

	
 
                datadict = commits_by_day_author_aggregate\
 
                    [author_key_cleaner(cs.author)]['data'][time_pos]
 

	
 
                datadict["commits"] += 1
 
                datadict["added"] += len(cs.added)
 
                datadict["changed"] += len(cs.changed)
 
                datadict["removed"] += len(cs.removed)
 

	
 
            else:
 
                if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 

	
 
@@ -188,52 +189,51 @@ def get_commits_stats(repo_name, ts_min_
 
            commits_by_day_aggregate[k] += 1
 
        else:
 
            commits_by_day_aggregate[k] = 1
 

	
 
    overview_data = sorted(commits_by_day_aggregate.items(), key=itemgetter(0))
 
    if not commits_by_day_author_aggregate:
 
        commits_by_day_author_aggregate[author_key_cleaner(repo.contact)] = {
 
            "label":author_key_cleaner(repo.contact),
 
            "data":[0, 1],
 
            "schema":["commits"],
 
        }
 

	
 
    stats = cur_stats if cur_stats else Statistics()
 
    stats.commit_activity = json.dumps(commits_by_day_author_aggregate)
 
    stats.commit_activity_combined = json.dumps(overview_data)
 

	
 
    log.debug('last revison %s', last_rev)
 
    leftovers = len(repo.revisions[last_rev:])
 
    log.debug('revisions to parse %s', leftovers)
 

	
 
    if last_rev == 0 or leftovers < parse_limit:
 
        log.debug('getting code trending stats')
 
        stats.languages = json.dumps(__get_codes_stats(repo_name))
 

	
 
    try:
 
    stats.repository = dbrepo
 
    stats.stat_on_revision = last_cs.revision
 

	
 
    try:
 
        stats.stat_on_revision = last_cs.revision if last_cs else 0
 
        sa.add(stats)
 
        sa.commit()
 
    except:
 
        log.error(traceback.format_exc())
 
        sa.rollback()
 
        return False
 
    if len(repo.revisions) > 1:
 
        run_task(get_commits_stats, repo_name, ts_min_y, ts_max_y)
 

	
 
    return True
 

	
 
@task(ignore_result=True)
 
def reset_user_password(user_email):
 
    try:
 
        log = reset_user_password.get_logger()
 
    except:
 
        log = logging.getLogger(__name__)
 

	
 
    from rhodecode.lib import auth
 
    from rhodecode.model.db import User
 

	
 
    try:
 
        try:
 
            sa = get_session()
0 comments (0 inline, 0 general)