Changeset - 186bf5fee0a1
[Not reviewed]
default
0 3 0
Mads Kiilerich - 9 years ago 2016-07-28 16:28:34
madski@unity3d.com
repo-scan: rewrite get_filesystem_repos to use os.walk instead of stupid recursion

I think this is more readable. It is also faster. Perhaps because the more
readable implementation makes it easier to optimize.
3 files changed with 37 insertions and 26 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/paster_commands/repo_scan.py
Show inline comments
 
@@ -55,13 +55,15 @@ class Command(BasePasterCommand):
 
        rm_obsolete = self.options.delete_obsolete
 
        print 'Now scanning root location for new repos ...'
 
        added, removed = repo2db_mapper(ScmModel().repo_scan(),
 
                                        remove_obsolete=rm_obsolete)
 
        added = ', '.join(added) or '-'
 
        removed = ', '.join(removed) or '-'
 
        print 'Scan completed added: %s removed: %s' % (added, removed)
 
        print 'Scan completed.'
 
        print 'Added: %s' % added
 
        print 'Removed: %s' % removed
 

	
 
    def update_parser(self):
 
        self.parser.add_option(
 
            '--delete-obsolete',
 
            action='store_true',
 
            help="Use this flag do delete repositories that are "
kallithea/lib/utils.py
Show inline comments
 
@@ -201,56 +201,65 @@ def action_logger(user, action, repo, ip
 
    log.info('Logging action:%s on %s by user:%s ip:%s',
 
             action, safe_unicode(repo), user_obj, ipaddr)
 
    if commit:
 
        sa.commit()
 

	
 

	
 
def get_filesystem_repos(path, recursive=False, skip_removed_repos=True):
 
def get_filesystem_repos(path):
 
    """
 
    Scans given path for repos and return (name,(type,path)) tuple
 

	
 
    :param path: path to scan for repositories
 
    :param recursive: recursive search and return names with subdirs in front
 
    """
 

	
 
    # remove ending slash for better results
 
    path = safe_str(path.rstrip(os.sep))
 
    log.debug('now scanning in %s location recursive:%s...', path, recursive)
 
    log.debug('now scanning in %s', path)
 

	
 
    def isdir(*n):
 
        return os.path.isdir(os.path.join(*n))
 

	
 
    def _get_repos(p):
 
        if not os.access(p, os.R_OK) or not os.access(p, os.X_OK):
 
            log.warning('ignoring repo path without access: %s', p)
 
            return
 
        if not os.access(p, os.W_OK):
 
            log.warning('repo path without write access: %s', p)
 
        for dirpath in os.listdir(p):
 
            if os.path.isfile(os.path.join(p, dirpath)):
 
                continue
 
            cur_path = os.path.join(p, dirpath)
 

	
 
    for root, dirs, _files in os.walk(path):
 
        recurse_dirs = []
 
        for subdir in dirs:
 
            # skip removed repos
 
            if skip_removed_repos and REMOVED_REPO_PAT.match(dirpath):
 
            if REMOVED_REPO_PAT.match(subdir):
 
                continue
 

	
 
            #skip .<something> dirs TODO: rly? then we should prevent creating them ...
 
            if dirpath.startswith('.'):
 
            if subdir.startswith('.'):
 
                continue
 

	
 
            cur_path = os.path.join(root, subdir)
 
            if (isdir(cur_path, '.hg') or
 
                isdir(cur_path, '.git') or
 
                isdir(cur_path, '.svn') or
 
                isdir(cur_path, 'objects') and (isdir(cur_path, 'refs') or isfile(cur_path, 'packed-refs'))):
 

	
 
                if not os.access(cur_path, os.R_OK) or not os.access(cur_path, os.X_OK):
 
                    log.warning('ignoring repo path without access: %s', cur_path)
 
                    continue
 

	
 
                if not os.access(cur_path, os.W_OK):
 
                    log.warning('repo path without write access: %s', cur_path)
 

	
 
            try:
 
                scm_info = get_scm(cur_path)
 
                yield scm_info[1].split(path, 1)[-1].lstrip(os.sep), scm_info
 
                    assert cur_path.startswith(path)
 
                    repo_path = cur_path[len(path) + 1:]
 
                    yield repo_path, scm_info
 
                    continue # no recursion
 
            except VCSError:
 
                if not recursive:
 
                    continue
 
                #check if this dir contains other repos for recursive scan
 
                rec_path = os.path.join(p, dirpath)
 
                if not os.path.islink(rec_path) and os.path.isdir(rec_path):
 
                    for inner_scm in _get_repos(rec_path):
 
                        yield inner_scm
 
                    # We should perhaps ignore such broken repos, but especially
 
                    # the bare git detection is unreliable so we dive into it
 
                    pass
 

	
 
    return _get_repos(path)
 
            recurse_dirs.append(subdir)
 

	
 
        dirs[:] = recurse_dirs
 

	
 

	
 
def is_valid_repo(repo_name, base_path, scm=None):
 
    """
 
    Returns True if given path is a valid repository False otherwise.
 
    If scm param is given also compare if given scm is the same as expected
kallithea/model/scm.py
Show inline comments
 
@@ -188,13 +188,13 @@ class ScmModel(BaseModel):
 

	
 
        log.info('scanning for repositories in %s', repos_path)
 

	
 
        baseui = make_ui('db')
 
        repos = {}
 

	
 
        for name, path in get_filesystem_repos(repos_path, recursive=True):
 
        for name, path in get_filesystem_repos(repos_path):
 
            # name need to be decomposed and put back together using the /
 
            # since this is internal storage separator for kallithea
 
            name = Repository.normalize_repo_name(name)
 

	
 
            try:
 
                if name in repos:
0 comments (0 inline, 0 general)