Changeset - 82f818616265
[Not reviewed]
default
0 1 0
Søren Løvborg - 10 years ago 2016-03-08 12:28:06
sorenl@unity3d.com
db: cache SCM instance short-term (tied to SQLAlchemy session lifetime)

Repeatedly checking whether SCM instances are invalidated is slow, and
we don't actually _want_ SCM instances to invalidate half-way through
a request either.

Therefore cache them in on the db.Repository object, the lifetime
of which is directly tied to the lifetime of the SQLAlchemy session,
the lifetime of which is tied directly to the individual HTTP request.
This way, we only check for invalidation the first time the SCM instance
is accessed in a request.

This will improve performance in cases where we have (by definition) badly
written code that retrieves repo objects several times.
1 file changed with 5 insertions and 1 deletions:
0 comments (0 inline, 0 general)
kallithea/model/db.py
Show inline comments
 
@@ -1248,387 +1248,391 @@ class Repository(Base, BaseModel):
 

	
 
        return data
 

	
 
    @classmethod
 
    def lock(cls, repo, user_id, lock_time=None):
 
        if lock_time is not None:
 
            lock_time = time.time()
 
        repo.locked = [user_id, lock_time]
 
        Session().add(repo)
 
        Session().commit()
 

	
 
    @classmethod
 
    def unlock(cls, repo):
 
        repo.locked = None
 
        Session().add(repo)
 
        Session().commit()
 

	
 
    @classmethod
 
    def getlock(cls, repo):
 
        return repo.locked
 

	
 
    @property
 
    def last_db_change(self):
 
        return self.updated_on
 

	
 
    @property
 
    def clone_uri_hidden(self):
 
        clone_uri = self.clone_uri
 
        if clone_uri:
 
            import urlobject
 
            url_obj = urlobject.URLObject(self.clone_uri)
 
            if url_obj.password:
 
                clone_uri = url_obj.with_password('*****')
 
        return clone_uri
 

	
 
    def clone_url(self, **override):
 
        import kallithea.lib.helpers as h
 
        qualified_home_url = h.canonical_url('home')
 

	
 
        uri_tmpl = None
 
        if 'with_id' in override:
 
            uri_tmpl = self.DEFAULT_CLONE_URI_ID
 
            del override['with_id']
 

	
 
        if 'uri_tmpl' in override:
 
            uri_tmpl = override['uri_tmpl']
 
            del override['uri_tmpl']
 

	
 
        # we didn't override our tmpl from **overrides
 
        if not uri_tmpl:
 
            uri_tmpl = self.DEFAULT_CLONE_URI
 
            try:
 
                from pylons import tmpl_context as c
 
                uri_tmpl = c.clone_uri_tmpl
 
            except AttributeError:
 
                # in any case if we call this outside of request context,
 
                # ie, not having tmpl_context set up
 
                pass
 

	
 
        return get_clone_url(uri_tmpl=uri_tmpl,
 
                             qualified_home_url=qualified_home_url,
 
                             repo_name=self.repo_name,
 
                             repo_id=self.repo_id, **override)
 

	
 
    def set_state(self, state):
 
        self.repo_state = state
 
        Session().add(self)
 
    #==========================================================================
 
    # SCM PROPERTIES
 
    #==========================================================================
 

	
 
    def get_changeset(self, rev=None):
 
        return get_changeset_safe(self.scm_instance, rev)
 

	
 
    def get_landing_changeset(self):
 
        """
 
        Returns landing changeset, or if that doesn't exist returns the tip
 
        """
 
        _rev_type, _rev = self.landing_rev
 
        cs = self.get_changeset(_rev)
 
        if isinstance(cs, EmptyChangeset):
 
            return self.get_changeset()
 
        return cs
 

	
 
    def update_changeset_cache(self, cs_cache=None):
 
        """
 
        Update cache of last changeset for repository, keys should be::
 

	
 
            short_id
 
            raw_id
 
            revision
 
            message
 
            date
 
            author
 

	
 
        :param cs_cache:
 
        """
 
        from kallithea.lib.vcs.backends.base import BaseChangeset
 
        if cs_cache is None:
 
            cs_cache = EmptyChangeset()
 
            # use no-cache version here
 
            scm_repo = self.scm_instance_no_cache()
 
            if scm_repo:
 
                cs_cache = scm_repo.get_changeset()
 

	
 
        if isinstance(cs_cache, BaseChangeset):
 
            cs_cache = cs_cache.__json__()
 

	
 
        if (not self.changeset_cache or cs_cache['raw_id'] != self.changeset_cache['raw_id']):
 
            _default = datetime.datetime.fromtimestamp(0)
 
            last_change = cs_cache.get('date') or _default
 
            log.debug('updated repo %s with new cs cache %s',
 
                      self.repo_name, cs_cache)
 
            self.updated_on = last_change
 
            self.changeset_cache = cs_cache
 
            Session().add(self)
 
            Session().commit()
 
        else:
 
            log.debug('changeset_cache for %s already up to date with %s',
 
                      self.repo_name, cs_cache['raw_id'])
 

	
 
    @property
 
    def tip(self):
 
        return self.get_changeset('tip')
 

	
 
    @property
 
    def author(self):
 
        return self.tip.author
 

	
 
    @property
 
    def last_change(self):
 
        return self.scm_instance.last_change
 

	
 
    def get_comments(self, revisions=None):
 
        """
 
        Returns comments for this repository grouped by revisions
 

	
 
        :param revisions: filter query by revisions only
 
        """
 
        cmts = ChangesetComment.query() \
 
            .filter(ChangesetComment.repo == self)
 
        if revisions is not None:
 
            if not revisions:
 
                return {} # don't use sql 'in' on empty set
 
            cmts = cmts.filter(ChangesetComment.revision.in_(revisions))
 
        grouped = collections.defaultdict(list)
 
        for cmt in cmts.all():
 
            grouped[cmt.revision].append(cmt)
 
        return grouped
 

	
 
    def statuses(self, revisions):
 
        """
 
        Returns statuses for this repository.
 
        PRs without any votes do _not_ show up as unreviewed.
 

	
 
        :param revisions: list of revisions to get statuses for
 
        """
 
        if not revisions:
 
            return {}
 

	
 
        statuses = ChangesetStatus.query() \
 
            .filter(ChangesetStatus.repo == self) \
 
            .filter(ChangesetStatus.version == 0) \
 
            .filter(ChangesetStatus.revision.in_(revisions))
 

	
 
        grouped = {}
 
        for stat in statuses.all():
 
            pr_id = pr_nice_id = pr_repo = None
 
            if stat.pull_request:
 
                pr_id = stat.pull_request.pull_request_id
 
                pr_nice_id = PullRequest.make_nice_id(pr_id)
 
                pr_repo = stat.pull_request.other_repo.repo_name
 
            grouped[stat.revision] = [str(stat.status), stat.status_lbl,
 
                                      pr_id, pr_repo, pr_nice_id,
 
                                      stat.author]
 
        return grouped
 

	
 
    def _repo_size(self):
 
        from kallithea.lib import helpers as h
 
        log.debug('calculating repository size...')
 
        return h.format_byte_size(self.scm_instance.size)
 

	
 
    #==========================================================================
 
    # SCM CACHE INSTANCE
 
    #==========================================================================
 

	
 
    def set_invalidate(self):
 
        """
 
        Mark caches of this repo as invalid.
 
        """
 
        CacheInvalidation.set_invalidate(self.repo_name)
 

	
 
    _scm_instance = None
 

	
 
    @property
 
    def scm_instance(self):
 
        return self.scm_instance_cached()
 
        if self._scm_instance is None:
 
            self._scm_instance = self.scm_instance_cached()
 
        return self._scm_instance
 

	
 
    def scm_instance_cached(self, valid_cache_keys=None):
 
        @cache_region('long_term', 'scm_instance_cached')
 
        def _c(repo_name): # repo_name is just for the cache key
 
            log.debug('Creating new %s scm_instance and populating cache', repo_name)
 
            return self.scm_instance_no_cache()
 
        rn = self.repo_name
 

	
 
        valid = CacheInvalidation.test_and_set_valid(rn, None, valid_cache_keys=valid_cache_keys)
 
        if not valid:
 
            log.debug('Cache for %s invalidated, getting new object', rn)
 
            region_invalidate(_c, None, 'scm_instance_cached', rn)
 
        else:
 
            log.debug('Trying to get scm_instance of %s from cache', rn)
 
        return _c(rn)
 

	
 
    def scm_instance_no_cache(self):
 
        repo_full_path = safe_str(self.repo_full_path)
 
        alias = get_scm(repo_full_path)[0]
 
        log.debug('Creating instance of %s repository from %s',
 
                  alias, self.repo_full_path)
 
        backend = get_backend(alias)
 

	
 
        if alias == 'hg':
 
            repo = backend(repo_full_path, create=False,
 
                           baseui=self._ui)
 
        else:
 
            repo = backend(repo_full_path, create=False)
 

	
 
        return repo
 

	
 
    def __json__(self):
 
        return dict(landing_rev = self.landing_rev)
 

	
 
class RepoGroup(Base, BaseModel):
 
    __tablename__ = 'groups'
 
    __table_args__ = (
 
        UniqueConstraint('group_name', 'group_parent_id'),
 
        CheckConstraint('group_id != group_parent_id'),
 
        _table_args_default_dict,
 
    )
 
    __mapper_args__ = {'order_by': 'group_name'}
 

	
 
    SEP = ' » '
 

	
 
    group_id = Column(Integer(), unique=True, primary_key=True)
 
    group_name = Column(Unicode(255), nullable=False, unique=True)
 
    group_parent_id = Column(Integer(), ForeignKey('groups.group_id'), nullable=True)
 
    group_description = Column(Unicode(10000), nullable=False)
 
    enable_locking = Column(Boolean(), nullable=False, default=False)
 
    user_id = Column(Integer(), ForeignKey('users.user_id'), nullable=False)
 
    created_on = Column(DateTime(timezone=False), nullable=False, default=datetime.datetime.now)
 

	
 
    repo_group_to_perm = relationship('UserRepoGroupToPerm', cascade='all', order_by='UserRepoGroupToPerm.group_to_perm_id')
 
    users_group_to_perm = relationship('UserGroupRepoGroupToPerm', cascade='all')
 
    parent_group = relationship('RepoGroup', remote_side=group_id)
 
    user = relationship('User')
 

	
 
    def __init__(self, group_name='', parent_group=None):
 
        self.group_name = group_name
 
        self.parent_group = parent_group
 

	
 
    def __unicode__(self):
 
        return u"<%s('id:%s:%s')>" % (self.__class__.__name__, self.group_id,
 
                                      self.group_name)
 

	
 
    @classmethod
 
    def _generate_choice(cls, repo_group):
 
        """Return tuple with group_id and name as html literal"""
 
        from webhelpers.html import literal
 
        if repo_group is None:
 
            return (-1, u'-- %s --' % _('top level'))
 
        return repo_group.group_id, literal(cls.SEP.join(repo_group.full_path_splitted))
 

	
 
    @classmethod
 
    def groups_choices(cls, groups):
 
        """Return tuples with group_id and name as html literal."""
 
        return sorted((cls._generate_choice(g) for g in groups),
 
                      key=lambda c: c[1].split(cls.SEP))
 

	
 
    @classmethod
 
    def url_sep(cls):
 
        return URL_SEP
 

	
 
    @classmethod
 
    def get_by_group_name(cls, group_name, cache=False, case_insensitive=False):
 
        if case_insensitive:
 
            gr = cls.query() \
 
                .filter(func.lower(cls.group_name) == func.lower(group_name))
 
        else:
 
            gr = cls.query() \
 
                .filter(cls.group_name == group_name)
 
        if cache:
 
            gr = gr.options(FromCache(
 
                            "sql_cache_short",
 
                            "get_group_%s" % _hash_key(group_name)
 
                            )
 
            )
 
        return gr.scalar()
 

	
 
    @property
 
    def parents(self):
 
        parents_recursion_limit = 10
 
        groups = []
 
        if self.parent_group is None:
 
            return groups
 
        cur_gr = self.parent_group
 
        groups.insert(0, cur_gr)
 
        cnt = 0
 
        while 1:
 
            cnt += 1
 
            gr = getattr(cur_gr, 'parent_group', None)
 
            cur_gr = cur_gr.parent_group
 
            if gr is None:
 
                break
 
            if cnt == parents_recursion_limit:
 
                # this will prevent accidental infinite loops
 
                log.error(('more than %s parents found for group %s, stopping '
 
                           'recursive parent fetching' % (parents_recursion_limit, self)))
 
                break
 

	
 
            groups.insert(0, gr)
 
        return groups
 

	
 
    @property
 
    def children(self):
 
        return RepoGroup.query().filter(RepoGroup.parent_group == self)
 

	
 
    @property
 
    def name(self):
 
        return self.group_name.split(RepoGroup.url_sep())[-1]
 

	
 
    @property
 
    def full_path(self):
 
        return self.group_name
 

	
 
    @property
 
    def full_path_splitted(self):
 
        return self.group_name.split(RepoGroup.url_sep())
 

	
 
    @property
 
    def repositories(self):
 
        return Repository.query() \
 
                .filter(Repository.group == self) \
 
                .order_by(Repository.repo_name)
 

	
 
    @property
 
    def repositories_recursive_count(self):
 
        cnt = self.repositories.count()
 

	
 
        def children_count(group):
 
            cnt = 0
 
            for child in group.children:
 
                cnt += child.repositories.count()
 
                cnt += children_count(child)
 
            return cnt
 

	
 
        return cnt + children_count(self)
 

	
 
    def _recursive_objects(self, include_repos=True):
 
        all_ = []
 

	
 
        def _get_members(root_gr):
 
            if include_repos:
 
                for r in root_gr.repositories:
 
                    all_.append(r)
 
            childs = root_gr.children.all()
 
            if childs:
 
                for gr in childs:
 
                    all_.append(gr)
 
                    _get_members(gr)
 

	
 
        _get_members(self)
 
        return [self] + all_
 

	
 
    def recursive_groups_and_repos(self):
 
        """
 
        Recursive return all groups, with repositories in those groups
 
        """
 
        return self._recursive_objects()
 

	
 
    def recursive_groups(self):
 
        """
 
        Returns all children groups for this group including children of children
 
        """
 
        return self._recursive_objects(include_repos=False)
 

	
 
    def get_new_name(self, group_name):
 
        """
 
        returns new full group name based on parent and new name
 

	
 
        :param group_name:
0 comments (0 inline, 0 general)