Changeset - 184159c21be2
[Not reviewed]
default
0 2 0
Mads Kiilerich - 11 years ago 2014-08-01 20:28:42
madski@unity3d.com
vcs: replace recursive implementation of prev&next with iterative

It was recursing over the repo and could give "maximum recursion depth
exceeded".

The implementation is still inefficient.
2 files changed with 28 insertions and 44 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -108,234 +108,226 @@ class GitChangeset(BaseChangeset):
 
        # FIXME: Please, spare a couple of minutes and make those codes cleaner;
 
        if not path in self._paths:
 
            path = path.strip('/')
 
            # set root tree
 
            tree = self.repository._repo[self._tree_id]
 
            if path == '':
 
                self._paths[''] = tree.id
 
                return tree.id
 
            splitted = path.split('/')
 
            dirs, name = splitted[:-1], splitted[-1]
 
            curdir = ''
 

	
 
            # initially extract things from root dir
 
            for item, stat, id in tree.iteritems():
 
                if curdir:
 
                    name = '/'.join((curdir, item))
 
                else:
 
                    name = item
 
                self._paths[name] = id
 
                self._stat_modes[name] = stat
 

	
 
            for dir in dirs:
 
                if curdir:
 
                    curdir = '/'.join((curdir, dir))
 
                else:
 
                    curdir = dir
 
                dir_id = None
 
                for item, stat, id in tree.iteritems():
 
                    if dir == item:
 
                        dir_id = id
 
                if dir_id:
 
                    # Update tree
 
                    tree = self.repository._repo[dir_id]
 
                    if not isinstance(tree, objects.Tree):
 
                        raise ChangesetError('%s is not a directory' % curdir)
 
                else:
 
                    raise ChangesetError('%s have not been found' % curdir)
 

	
 
                # cache all items from the given traversed tree
 
                for item, stat, id in tree.iteritems():
 
                    if curdir:
 
                        name = '/'.join((curdir, item))
 
                    else:
 
                        name = item
 
                    self._paths[name] = id
 
                    self._stat_modes[name] = stat
 
            if not path in self._paths:
 
                raise NodeDoesNotExistError("There is no file nor directory "
 
                    "at the given path '%s' at revision %s"
 
                    % (path, safe_str(self.short_id)))
 
        return self._paths[path]
 

	
 
    def _get_kind(self, path):
 
        obj = self.repository._repo[self._get_id_for_path(path)]
 
        if isinstance(obj, objects.Blob):
 
            return NodeKind.FILE
 
        elif isinstance(obj, objects.Tree):
 
            return NodeKind.DIR
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %s at "
 
                " '%s'" % (self.raw_id, path))
 
        return path
 

	
 
    def _get_file_nodes(self):
 
        return chain(*(t[2] for t in self.walk()))
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        return [self.repository.get_changeset(parent)
 
                for parent in self._commit.parents]
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        rev_filter = _git_path = settings.GIT_REV_FILTER
 
        so, se = self.repository.run_git_command(
 
            "rev-list %s --children" % (rev_filter)
 
        )
 

	
 
        children = []
 
        pat = re.compile(r'^%s' % self.raw_id)
 
        for l in so.splitlines():
 
            if pat.match(l):
 
                childs = l.split(' ')[1:]
 
                children.extend(childs)
 
        return [self.repository.get_changeset(cs) for cs in children]
 

	
 
    def next(self, branch=None):
 

	
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _next(changeset, branch):
 
        cs = self
 
        while True:
 
            try:
 
                next_ = changeset.revision + 1
 
                next_rev = changeset.repository.revisions[next_]
 
                next_ = cs.revision + 1
 
                next_rev = cs.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = changeset.repository.get_changeset(next_rev)
 
            cs = cs.repository.get_changeset(next_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _next(cs, branch)
 

	
 
            return cs
 

	
 
        return _next(self, branch)
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _prev(changeset, branch):
 
        cs = self
 
        while True:
 
            try:
 
                prev_ = changeset.revision - 1
 
                prev_ = cs.revision - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = changeset.repository.revisions[prev_]
 
                prev_rev = cs.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 

	
 
            cs = changeset.repository.get_changeset(prev_rev)
 
            cs = cs.repository.get_changeset(prev_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _prev(cs, branch)
 

	
 
            return cs
 

	
 
        return _prev(self, branch)
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
 
        rev2 = self
 
        return ''.join(self.repository.get_diff(rev1, rev2,
 
                                    ignore_whitespace=ignore_whitespace,
 
                                    context=context))
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        # ensure path is traversed
 
        path = safe_str(path)
 
        self._get_id_for_path(path)
 
        return self._stat_modes[path]
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.as_pretty_string()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.raw_length()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        TODO: This function now uses os underlying 'git' and 'grep' commands
 
        which is generally not good. Should be replaced with algorithm
 
        iterating commits.
 
        """
 
        self._get_filectx(path)
 
        cs_id = safe_str(self.id)
 
        f_path = safe_str(path)
 

	
 
        if limit:
 
            cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % (
 
                      safe_int(limit, 0), cs_id, f_path)
 

	
 
        else:
 
            cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % (
 
                      cs_id, f_path)
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(sha) for sha in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        """
 
        self._get_filectx(path)
 
        from dulwich.walk import Walker
 
        include = [self.id]
 
        walker = Walker(self.repository._repo.object_store, include,
 
                        paths=[path], max_entries=1)
 
        return [self.repository.get_changeset(sha)
 
                for sha in (x.commit.id for x in walker)]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 

	
 
        TODO: This function now uses os underlying 'git' command which is
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for bounderies
 
        # -r sha ==> blames for the given revision
 
        so, se = self.repository.run_git_command(cmd)
 

	
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
            ln_no = i + 1
 
            sha, line = re.split(r' ', blame_line, 1)
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
kallithea/lib/vcs/backends/hg/changeset.py
Show inline comments
 
@@ -22,234 +22,226 @@ class MercurialChangeset(BaseChangeset):
 
    Represents state of the repository at the single revision.
 
    """
 

	
 
    def __init__(self, repository, revision):
 
        self.repository = repository
 
        assert isinstance(revision, basestring), repr(revision)
 
        self.raw_id = revision
 
        self._ctx = repository._repo[revision]
 
        self.revision = self._ctx._rev
 
        self.nodes = {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return map(safe_unicode, self._ctx.tags())
 

	
 
    @LazyProperty
 
    def branch(self):
 
        return  safe_unicode(self._ctx.branch())
 

	
 
    @LazyProperty
 
    def closesbranch(self):
 
        return  self._ctx.closesbranch()
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        return map(safe_unicode, self._ctx.bookmarks())
 

	
 
    @LazyProperty
 
    def message(self):
 
        return safe_unicode(self._ctx.description())
 

	
 
    @LazyProperty
 
    def committer(self):
 
        return safe_unicode(self.author)
 

	
 
    @LazyProperty
 
    def author(self):
 
        return safe_unicode(self._ctx.user())
 

	
 
    @LazyProperty
 
    def date(self):
 
        return date_fromtimestamp(*self._ctx.date())
 

	
 
    @LazyProperty
 
    def _timestamp(self):
 
        return self._ctx.date()[0]
 

	
 
    @LazyProperty
 
    def status(self):
 
        """
 
        Returns modified, added, removed, deleted files for current changeset
 
        """
 
        return self.repository._repo.status(self._ctx.p1().node(),
 
                                            self._ctx.node())
 

	
 
    @LazyProperty
 
    def _file_paths(self):
 
        return list(self._ctx)
 

	
 
    @LazyProperty
 
    def _dir_paths(self):
 
        p = list(set(get_dirs_for_path(*self._file_paths)))
 
        p.insert(0, '')
 
        return p
 

	
 
    @LazyProperty
 
    def _paths(self):
 
        return self._dir_paths + self._file_paths
 

	
 
    @LazyProperty
 
    def id(self):
 
        if self.last:
 
            return u'tip'
 
        return self.short_id
 

	
 
    @LazyProperty
 
    def short_id(self):
 
        return self.raw_id[:12]
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        return [self.repository.get_changeset(parent.rev())
 
                for parent in self._ctx.parents() if parent.rev() >= 0]
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        return [self.repository.get_changeset(child.rev())
 
                for child in self._ctx.children() if child.rev() >= 0]
 

	
 
    def next(self, branch=None):
 

	
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _next(changeset, branch):
 
        cs = self
 
        while True:
 
            try:
 
                next_ = changeset.revision + 1
 
                next_rev = changeset.repository.revisions[next_]
 
                next_ = cs.revision + 1
 
                next_rev = cs.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = changeset.repository.get_changeset(next_rev)
 
            cs = cs.repository.get_changeset(next_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _next(cs, branch)
 

	
 
            return cs
 

	
 
        return _next(self, branch)
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _prev(changeset, branch):
 
        cs = self
 
        while True:
 
            try:
 
                prev_ = changeset.revision - 1
 
                prev_ = cs.revision - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = changeset.repository.revisions[prev_]
 
                prev_rev = cs.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 

	
 
            cs = changeset.repository.get_changeset(prev_rev)
 
            cs = cs.repository.get_changeset(prev_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _prev(cs, branch)
 

	
 
            return cs
 

	
 
        return _prev(self, branch)
 
            if not branch or branch == cs.branch:
 
                return cs
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        return ''.join(self._ctx.diff(git=True,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=context))
 

	
 
    def _fix_path(self, path):
 
        """
 
        Paths are stored without trailing slash so we need to get rid off it if
 
        needed. Also mercurial keeps filenodes as str so we need to decode
 
        from unicode to str
 
        """
 
        if path.endswith('/'):
 
            path = path.rstrip('/')
 

	
 
        return safe_str(path)
 

	
 
    def _get_kind(self, path):
 
        path = self._fix_path(path)
 
        if path in self._file_paths:
 
            return NodeKind.FILE
 
        elif path in self._dir_paths:
 
            return NodeKind.DIR
 
        else:
 
            raise ChangesetError("Node does not exist at the given path '%s'"
 
                % (path))
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %s at "
 
                " '%s'" % (self.raw_id, path))
 
        return self._ctx.filectx(path)
 

	
 
    def _extract_submodules(self):
 
        """
 
        returns a dictionary with submodule information from substate file
 
        of hg repository
 
        """
 
        return self._ctx.substate
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        if 'x' in fctx.flags():
 
            return 0100755
 
        else:
 
            return 0100644
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.data()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.size()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 
        """
 
        fctx = self._get_filectx(path)
 
        hist = []
 
        cnt = 0
 
        for cs in reversed([x for x in fctx.filelog()]):
 
            cnt += 1
 
            hist.append(hex(fctx.filectx(cs).node()))
 
            if limit and cnt == limit:
 
                break
 

	
 
        return [self.repository.get_changeset(node) for node in hist]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 
        """
 

	
 
        fctx = self._get_filectx(path)
 
        for i, annotate_data in enumerate(fctx.annotate()):
 
            ln_no = i + 1
0 comments (0 inline, 0 general)