Changeset - c57a37430dc9
[Not reviewed]
beta
0 3 0
Marcin Kuzminski - 13 years ago 2012-11-29 19:59:47
marcin@python-works.com
fixes #652 switch to generator approach when doing file annotation to prevent huge memory consumption when executed on large files. Thanks to ALexey Larikov for patch.
- added pure dulwich method for getting file history.
3 files changed with 33 insertions and 20 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/annotate.py
Show inline comments
 
@@ -49,142 +49,143 @@ class AnnotateHtmlFormatter(HtmlFormatte
 
            order=None, **options):
 
        """
 
        If ``annotate_from_changeset_func`` is passed it should be a function
 
        which returns string from the given changeset. For example, we may pass
 
        following function as ``annotate_from_changeset_func``::
 

	
 
            def changeset_to_anchor(changeset):
 
                return '<a href="/changesets/%s/">%s</a>\n' %\
 
                       (changeset.id, changeset.id)
 

	
 
        :param annotate_from_changeset_func: see above
 
        :param order: (default: ``['ls', 'annotate', 'code']``); order of
 
          columns;
 
        :param options: standard pygment's HtmlFormatter options, there is
 
          extra option tough, ``headers``. For instance we can pass::
 

	
 
             formatter = AnnotateHtmlFormatter(filenode, headers={
 
                'ls': '#',
 
                'annotate': 'Annotate',
 
                'code': 'Code',
 
             })
 

	
 
        """
 
        super(AnnotateHtmlFormatter, self).__init__(**options)
 
        self.annotate_from_changeset_func = annotate_from_changeset_func
 
        self.order = order or ('ls', 'annotate', 'code')
 
        headers = options.pop('headers', None)
 
        if headers and not ('ls' in headers and 'annotate' in headers and
 
            'code' in headers):
 
            raise ValueError("If headers option dict is specified it must "
 
                "all 'ls', 'annotate' and 'code' keys")
 
        self.headers = headers
 
        if isinstance(filenode, FileNode):
 
            self.filenode = filenode
 
        else:
 
            raise VCSError("This formatter expect FileNode parameter, not %r"
 
                % type(filenode))
 

	
 
    def annotate_from_changeset(self, changeset):
 
        """
 
        Returns full html line for single changeset per annotated line.
 
        """
 
        if self.annotate_from_changeset_func:
 
            return self.annotate_from_changeset_func(changeset)
 
        else:
 
            return ''.join((changeset.id, '\n'))
 

	
 
    def _wrap_tablelinenos(self, inner):
 
        dummyoutfile = StringIO.StringIO()
 
        lncount = 0
 
        for t, line in inner:
 
            if t:
 
                lncount += 1
 
            dummyoutfile.write(line)
 

	
 
        fl = self.linenostart
 
        mw = len(str(lncount + fl - 1))
 
        sp = self.linenospecial
 
        st = self.linenostep
 
        la = self.lineanchors
 
        aln = self.anchorlinenos
 
        if sp:
 
            lines = []
 

	
 
            for i in range(fl, fl + lncount):
 
                if i % st == 0:
 
                    if i % sp == 0:
 
                        if aln:
 
                            lines.append('<a href="#%s-%d" class="special">'
 
                                         '%*d</a>' %
 
                                         (la, i, mw, i))
 
                        else:
 
                            lines.append('<span class="special">'
 
                                         '%*d</span>' % (mw, i))
 
                    else:
 
                        if aln:
 
                            lines.append('<a href="#%s-%d">'
 
                                         '%*d</a>' % (la, i, mw, i))
 
                        else:
 
                            lines.append('%*d' % (mw, i))
 
                else:
 
                    lines.append('')
 
            ls = '\n'.join(lines)
 
        else:
 
            lines = []
 
            for i in range(fl, fl + lncount):
 
                if i % st == 0:
 
                    if aln:
 
                        lines.append('<a href="#%s-%d">%*d</a>' \
 
                                     % (la, i, mw, i))
 
                    else:
 
                        lines.append('%*d' % (mw, i))
 
                else:
 
                    lines.append('')
 
            ls = '\n'.join(lines)
 

	
 
        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
 
        # If pygments cropped last lines break we need do that too
 
        ln_cs = len(annotate_changesets)
 
        ln_ = len(ls.splitlines())
 
        if  ln_cs > ln_:
 
            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
 
        annotate = ''.join((self.annotate_from_changeset(changeset)
 
            for changeset in annotate_changesets))
 
#        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
 
##        TODO: not sure what that fixes
 
#        # If pygments cropped last lines break we need do that too
 
#        ln_cs = len(annotate_changesets)
 
#        ln_ = len(ls.splitlines())
 
#        if  ln_cs > ln_:
 
#            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
 
        annotate = ''.join((self.annotate_from_changeset(el[2]())
 
                            for el in self.filenode.annotate))
 
        # in case you wonder about the seemingly redundant <div> here:
 
        # since the content in the other cell also is wrapped in a div,
 
        # some browsers in some configurations seem to mess up the formatting.
 
        '''
 
        yield 0, ('<table class="%stable">' % self.cssclass +
 
                  '<tr><td class="linenos"><div class="linenodiv"><pre>' +
 
                  ls + '</pre></div></td>' +
 
                  '<td class="code">')
 
        yield 0, dummyoutfile.getvalue()
 
        yield 0, '</td></tr></table>'
 

	
 
        '''
 
        headers_row = []
 
        if self.headers:
 
            headers_row = ['<tr class="annotate-header">']
 
            for key in self.order:
 
                td = ''.join(('<td>', self.headers[key], '</td>'))
 
                headers_row.append(td)
 
            headers_row.append('</tr>')
 

	
 
        body_row_start = ['<tr>']
 
        for key in self.order:
 
            if key == 'ls':
 
                body_row_start.append(
 
                    '<td class="linenos"><div class="linenodiv"><pre>' +
 
                    ls + '</pre></div></td>')
 
            elif key == 'annotate':
 
                body_row_start.append(
 
                    '<td class="annotate"><div class="annotatediv"><pre>' +
 
                    annotate + '</pre></div></td>')
 
            elif key == 'code':
 
                body_row_start.append('<td class="code">')
 
        yield 0, ('<table class="%stable">' % self.cssclass +
 
                  ''.join(headers_row) +
 
                  ''.join(body_row_start)
 
                  )
 
        yield 0, dummyoutfile.getvalue()
 
        yield 0, '</td></tr></table>'
rhodecode/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -179,219 +179,233 @@ class GitChangeset(BaseChangeset):
 
        """
 
        return [self.repository.get_changeset(parent)
 
                for parent in self._commit.parents]
 

	
 
    def next(self, branch=None):
 

	
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _next(changeset, branch):
 
            try:
 
                next_ = changeset.revision + 1
 
                next_rev = changeset.repository.revisions[next_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 
            cs = changeset.repository.get_changeset(next_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _next(cs, branch)
 

	
 
            return cs
 

	
 
        return _next(self, branch)
 

	
 
    def prev(self, branch=None):
 
        if branch and self.branch != branch:
 
            raise VCSError('Branch option used on changeset not belonging '
 
                           'to that branch')
 

	
 
        def _prev(changeset, branch):
 
            try:
 
                prev_ = changeset.revision - 1
 
                if prev_ < 0:
 
                    raise IndexError
 
                prev_rev = changeset.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 

	
 
            cs = changeset.repository.get_changeset(prev_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _prev(cs, branch)
 

	
 
            return cs
 

	
 
        return _prev(self, branch)
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
 
        rev2 = self
 
        return ''.join(self.repository.get_diff(rev1, rev2,
 
                                    ignore_whitespace=ignore_whitespace,
 
                                    context=context))
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        # ensure path is traversed
 
        self._get_id_for_path(path)
 
        return self._stat_modes[path]
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.as_pretty_string()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.raw_length()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        node = self.get_node(path)
 
        return node.history[0]
 

	
 
    def get_file_history(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        TODO: This function now uses os underlying 'git' and 'grep' commands
 
        which is generally not good. Should be replaced with algorithm
 
        iterating commits.
 
        """
 
        self._get_filectx(path)
 

	
 
        cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
 
                  self.id, path
 
               )
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(id) for id in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        """
 
        self._get_filectx(path)
 
        from dulwich.walk import Walker
 
        include = [self.id]
 
        walker = Walker(self.repository._repo.object_store, include,
 
                        paths=[path], max_entries=1)
 
        return [self.repository.get_changeset(sha) 
 
                for sha in (x.commit.id for x in walker)]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a list of three element tuples with lineno,changeset and line
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 

	
 
        TODO: This function now uses os underlying 'git' command which is
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for bounderies
 
        # -r sha ==> blames for the given revision
 
        so, se = self.repository.run_git_command(cmd)
 

	
 
        annotate = []
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
            ln_no = i + 1
 
            id, line = re.split(r' ', blame_line, 1)
 
            annotate.append((ln_no, self.repository.get_changeset(id), line))
 
        return annotate
 
            sha, line = re.split(r' ', blame_line, 1)
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
 
        :raise VcsError: If given stream is None
 

	
 
        """
 
        allowed_kinds = settings.ARCHIVE_SPECS.keys()
 
        if kind not in allowed_kinds:
 
            raise ImproperArchiveTypeError('Archive kind not supported use one'
 
                'of %s', allowed_kinds)
 

	
 
        if prefix is None:
 
            prefix = '%s-%s' % (self.repository.name, self.short_id)
 
        elif prefix.startswith('/'):
 
            raise VCSError("Prefix cannot start with leading slash")
 
        elif prefix.strip() == '':
 
            raise VCSError("Prefix cannot be empty")
 

	
 
        if kind == 'zip':
 
            frmt = 'zip'
 
        else:
 
            frmt = 'tar'
 
        cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
 
            self.raw_id)
 
        if kind == 'tgz':
 
            cmd += ' | gzip -9'
 
        elif kind == 'tbz2':
 
            cmd += ' | bzip2 -9'
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 
        popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
 
            cwd=self.repository.path)
 

	
 
        buffer_size = 1024 * 8
 
        chunk = popen.stdout.read(buffer_size)
 
        while chunk:
 
            stream.write(chunk)
 
            chunk = popen.stdout.read(buffer_size)
 
        # Make sure all descriptors would be read
 
        popen.communicate()
 

	
 
    def get_nodes(self, path):
 
        if self._get_kind(path) != NodeKind.DIR:
 
            raise ChangesetError("Directory does not exist for revision %r at "
 
                " %r" % (self.revision, path))
 
        path = self._fix_path(path)
 
        id = self._get_id_for_path(path)
 
        tree = self.repository._repo[id]
 
        dirnodes = []
 
        filenodes = []
 
        als = self.repository.alias
 
        for name, stat, id in tree.iteritems():
 
            if objects.S_ISGITLINK(stat):
 
                dirnodes.append(SubModuleNode(name, url=None, changeset=id,
 
                                              alias=als))
 
                continue
 

	
 
            obj = self.repository._repo.get_object(id)
 
            if path != '':
 
                obj_path = '/'.join((path, name))
 
            else:
 
                obj_path = name
 
            if obj_path not in self._stat_modes:
 
                self._stat_modes[obj_path] = stat
 
            if isinstance(obj, objects.Tree):
 
                dirnodes.append(DirNode(obj_path, changeset=self))
 
            elif isinstance(obj, objects.Blob):
 
                filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
 
            else:
 
                raise ChangesetError("Requested object should be Tree "
 
                                     "or Blob, is %r" % type(obj))
 
        nodes = dirnodes + filenodes
 
        for node in nodes:
 
            if not node.path in self.nodes:
 
                self.nodes[node.path] = node
 
        nodes.sort()
 
        return nodes
 

	
 
    def get_node(self, path):
 
        if isinstance(path, unicode):
 
            path = path.encode('utf-8')
 
        path = self._fix_path(path)
rhodecode/lib/vcs/backends/hg/changeset.py
Show inline comments
 
@@ -142,203 +142,201 @@ class MercurialChangeset(BaseChangeset):
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 

	
 
            cs = changeset.repository.get_changeset(prev_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _prev(cs, branch)
 

	
 
            return cs
 

	
 
        return _prev(self, branch)
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        return ''.join(self._ctx.diff(git=True,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=context))
 

	
 
    def _fix_path(self, path):
 
        """
 
        Paths are stored without trailing slash so we need to get rid off it if
 
        needed. Also mercurial keeps filenodes as str so we need to decode
 
        from unicode to str
 
        """
 
        if path.endswith('/'):
 
            path = path.rstrip('/')
 

	
 
        return safe_str(path)
 

	
 
    def _get_kind(self, path):
 
        path = self._fix_path(path)
 
        if path in self._file_paths:
 
            return NodeKind.FILE
 
        elif path in self._dir_paths:
 
            return NodeKind.DIR
 
        else:
 
            raise ChangesetError("Node does not exist at the given path %r"
 
                % (path))
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %r at "
 
                " %r" % (self.raw_id, path))
 
        return self._ctx.filectx(path)
 

	
 
    def _extract_submodules(self):
 
        """
 
        returns a dictionary with submodule information from substate file
 
        of hg repository
 
        """
 
        return self._ctx.substate
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        if 'x' in fctx.flags():
 
            return 0100755
 
        else:
 
            return 0100644
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.data()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.size()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        node = self.get_node(path)
 
        return node.history[0]
 

	
 
    def get_file_history(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 
        """
 
        fctx = self._get_filectx(path)
 
        nodes = [fctx.filectx(x).node() for x in fctx.filelog()]
 
        changesets = [self.repository.get_changeset(hex(node))
 
            for node in reversed(nodes)]
 
        return changesets
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a list of three element tuples with lineno,changeset and line
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 
        """
 

	
 
        fctx = self._get_filectx(path)
 
        annotate = []
 
        for i, annotate_data in enumerate(fctx.annotate()):
 
            ln_no = i + 1
 
            annotate.append((ln_no, self.repository\
 
                             .get_changeset(hex(annotate_data[0].node())),
 
                             annotate_data[1],))
 

	
 
        return annotate
 
            sha = hex(annotate_data[0].node())
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
 
        :raise VcsError: If given stream is None
 
        """
 

	
 
        allowed_kinds = settings.ARCHIVE_SPECS.keys()
 
        if kind not in allowed_kinds:
 
            raise ImproperArchiveTypeError('Archive kind not supported use one'
 
                'of %s', allowed_kinds)
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 

	
 
        if prefix is None:
 
            prefix = '%s-%s' % (self.repository.name, self.short_id)
 
        elif prefix.startswith('/'):
 
            raise VCSError("Prefix cannot start with leading slash")
 
        elif prefix.strip() == '':
 
            raise VCSError("Prefix cannot be empty")
 

	
 
        archival.archive(self.repository._repo, stream, self.raw_id,
 
                         kind, prefix=prefix, subrepos=subrepos)
 

	
 
        if stream.closed and hasattr(stream, 'name'):
 
            stream = open(stream.name, 'rb')
 
        elif hasattr(stream, 'mode') and 'r' not in stream.mode:
 
            stream = open(stream.name, 'rb')
 
        else:
 
            stream.seek(0)
 

	
 
    def get_nodes(self, path):
 
        """
 
        Returns combined ``DirNode`` and ``FileNode`` objects list representing
 
        state of changeset at the given ``path``. If node at the given ``path``
 
        is not instance of ``DirNode``, ChangesetError would be raised.
 
        """
 

	
 
        if self._get_kind(path) != NodeKind.DIR:
 
            raise ChangesetError("Directory does not exist for revision %r at "
 
                " %r" % (self.revision, path))
 
        path = self._fix_path(path)
 

	
 
        filenodes = [FileNode(f, changeset=self) for f in self._file_paths
 
            if os.path.dirname(f) == path]
 
        dirs = path == '' and '' or [d for d in self._dir_paths
 
            if d and posixpath.dirname(d) == path]
 
        dirnodes = [DirNode(d, changeset=self) for d in dirs
 
            if os.path.dirname(d) == path]
 

	
 
        als = self.repository.alias
 
        for k, vals in self._extract_submodules().iteritems():
 
            #vals = url,rev,type
 
            loc = vals[0]
 
            cs = vals[1]
 
            dirnodes.append(SubModuleNode(k, url=loc, changeset=cs,
 
                                          alias=als))
 
        nodes = dirnodes + filenodes
 
        # cache nodes
 
        for node in nodes:
 
            self.nodes[node.path] = node
 
        nodes.sort()
 

	
 
        return nodes
 

	
 
    def get_node(self, path):
 
        """
 
        Returns ``Node`` object from the given ``path``. If there is no node at
 
        the given ``path``, ``ChangesetError`` would be raised.
 
        """
 

	
 
        path = self._fix_path(path)
 

	
 
        if not path in self.nodes:
 
            if path in self._file_paths:
 
                node = FileNode(path, changeset=self)
 
            elif path in self._dir_paths or path in self._dir_paths:
 
                if path == '':
 
                    node = RootNode(changeset=self)
 
                else:
 
                    node = DirNode(path, changeset=self)
 
            else:
 
                raise NodeDoesNotExistError("There is no file nor directory "
0 comments (0 inline, 0 general)