Changeset - e64c64e2b8aa
[Not reviewed]
beta
0 1 0
Marcin Kuzminski - 13 years ago 2012-08-30 00:57:51
marcin@python-works.com
re implemented affected_files function for git using dulwich
it speeds up things like 70%
1 file changed with 15 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -317,156 +317,169 @@ class GitChangeset(BaseChangeset):
 
        if kind == 'tgz':
 
            cmd += ' | gzip -9'
 
        elif kind == 'tbz2':
 
            cmd += ' | bzip2 -9'
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 
        popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
 
            cwd=self.repository.path)
 

	
 
        buffer_size = 1024 * 8
 
        chunk = popen.stdout.read(buffer_size)
 
        while chunk:
 
            stream.write(chunk)
 
            chunk = popen.stdout.read(buffer_size)
 
        # Make sure all descriptors would be read
 
        popen.communicate()
 

	
 
    def get_nodes(self, path):
 
        if self._get_kind(path) != NodeKind.DIR:
 
            raise ChangesetError("Directory does not exist for revision %r at "
 
                " %r" % (self.revision, path))
 
        path = self._fix_path(path)
 
        id = self._get_id_for_path(path)
 
        tree = self.repository._repo[id]
 
        dirnodes = []
 
        filenodes = []
 
        als = self.repository.alias
 
        for name, stat, id in tree.iteritems():
 
            if objects.S_ISGITLINK(stat):
 
                dirnodes.append(SubModuleNode(name, url=None, changeset=id,
 
                                              alias=als))
 
                continue
 

	
 
            obj = self.repository._repo.get_object(id)
 
            if path != '':
 
                obj_path = '/'.join((path, name))
 
            else:
 
                obj_path = name
 
            if obj_path not in self._stat_modes:
 
                self._stat_modes[obj_path] = stat
 
            if isinstance(obj, objects.Tree):
 
                dirnodes.append(DirNode(obj_path, changeset=self))
 
            elif isinstance(obj, objects.Blob):
 
                filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
 
            else:
 
                raise ChangesetError("Requested object should be Tree "
 
                                     "or Blob, is %r" % type(obj))
 
        nodes = dirnodes + filenodes
 
        for node in nodes:
 
            if not node.path in self.nodes:
 
                self.nodes[node.path] = node
 
        nodes.sort()
 
        return nodes
 

	
 
    def get_node(self, path):
 
        if isinstance(path, unicode):
 
            path = path.encode('utf-8')
 
        path = self._fix_path(path)
 
        if not path in self.nodes:
 
            try:
 
                id_ = self._get_id_for_path(path)
 
            except ChangesetError:
 
                raise NodeDoesNotExistError("Cannot find one of parents' "
 
                    "directories for a given path: %s" % path)
 

	
 
            _GL = lambda m: m and objects.S_ISGITLINK(m)
 
            if _GL(self._stat_modes.get(path)):
 
                node = SubModuleNode(path, url=None, changeset=id_,
 
                                     alias=self.repository.alias)
 
            else:
 
                obj = self.repository._repo.get_object(id_)
 

	
 
                if isinstance(obj, objects.Tree):
 
                    if path == '':
 
                        node = RootNode(changeset=self)
 
                    else:
 
                        node = DirNode(path, changeset=self)
 
                    node._tree = obj
 
                elif isinstance(obj, objects.Blob):
 
                    node = FileNode(path, changeset=self)
 
                    node._blob = obj
 
                else:
 
                    raise NodeDoesNotExistError("There is no file nor directory "
 
                        "at the given path %r at revision %r"
 
                        % (path, self.short_id))
 
            # cache node
 
            self.nodes[path] = node
 
        return self.nodes[path]
 

	
 
    @LazyProperty
 
    def affected_files(self):
 
        """
 
        Get's a fast accessible file changes for given changeset
 
        """
 
        #OLD SOLUTION
 
        #files = set()
 
        #for f in (self.added + self.changed + self.removed):
 
        #    files.add(f.path)
 
        #files = list(files) 
 

	
 
        return self.added + self.changed
 
        _r = self.repository._repo
 
        files = set()
 
        for parent in self.parents:
 
            changes = _r.object_store.tree_changes(_r[parent.raw_id].tree,
 
                                                   _r[self.raw_id].tree)
 
            for (oldpath, newpath), (_, _), (_, _) in changes:
 
                files.add(newpath or oldpath)
 
        return list(files)
 

	
 
    @LazyProperty
 
    def _diff_name_status(self):
 
        output = []
 
        for parent in self.parents:
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
 
                                                                self.raw_id)
 
            so, se = self.repository.run_git_command(cmd)
 
            output.append(so.strip())
 
        return '\n'.join(output)
 

	
 
    def _get_paths_for_status(self, status):
 
        """
 
        Returns sorted list of paths for given ``status``.
 

	
 
        :param status: one of: *added*, *modified* or *deleted*
 
        """
 
        paths = set()
 
        char = status[0].upper()
 
        for line in self._diff_name_status.splitlines():
 
            if not line:
 
                continue
 

	
 
            if line.startswith(char):
 
                splitted = line.split(char, 1)
 
                if not len(splitted) == 2:
 
                    raise VCSError("Couldn't parse diff result:\n%s\n\n and "
 
                        "particularly that line: %s" % (self._diff_name_status,
 
                        line))
 
                _path = splitted[1].strip()
 
                paths.add(_path)
 
        return sorted(paths)
 

	
 
    @LazyProperty
 
    def added(self):
 
        """
 
        Returns list of added ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return list(self._get_file_nodes())
 
        return [self.get_node(path) for path in self._get_paths_for_status('added')]
 

	
 
    @LazyProperty
 
    def changed(self):
 
        """
 
        Returns list of modified ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return [self.get_node(path) for path in self._get_paths_for_status('modified')]
 

	
 
    @LazyProperty
 
    def removed(self):
 
        """
 
        Returns list of removed ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
0 comments (0 inline, 0 general)