kallithea Changeset - c57a37430dc9

Changeset - c57a37430dc9

Parent rev.

Child rev.

[Not reviewed]

beta

0 3 0

Marcin Kuzminski - 13 years ago 2012-11-29 19:59:47
marcin@python-works.com

fixes #652 switch to generator approach when doing file annotation to prevent huge memory consumption when executed on large files. Thanks to ALexey Larikov for patch.
- added pure dulwich method for getting file history.

3 files changed with 33 insertions and 20 deletions:

rhodecode/lib/annotate.py

rhodecode/lib/vcs/backends/git/changeset.py

rhodecode/lib/vcs/backends/hg/changeset.py

0 comments (0 inline, 0 general)

rhodecode/lib/annotate.py

➞

Show inline comments

@@ @@ -97,94 +97,95 @@ class AnnotateHtmlFormatter(HtmlFormatte @@
         dummyoutfile = StringIO.StringIO()
         lncount = 0
         for t, line in inner:
             if t:
                 lncount += 1
             dummyoutfile.write(line)
         fl = self.linenostart
         mw = len(str(lncount + fl - 1))
         sp = self.linenospecial
         st = self.linenostep
         la = self.lineanchors
         aln = self.anchorlinenos
         if sp:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if i % sp == 0:
                         if aln:
                             lines.append('<a href="#%s-%d" class="special">'
                                          '%*d</a>' %
                                          (la, i, mw, i))
                         else:
                             lines.append('<span class="special">'
                                          '%*d</span>' % (mw, i))
                     else:
                         if aln:
                             lines.append('<a href="#%s-%d">'
                                          '%*d</a>' % (la, i, mw, i))
                         else:
                             lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         else:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if aln:
                         lines.append('<a href="#%s-%d">%*d</a>' \
                                      % (la, i, mw, i))
                     else:
                         lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         annotate_changesets = [tup[1] for tup in self.filenode.annotate]
         # If pygments cropped last lines break we need do that too
         ln_cs = len(annotate_changesets)
         ln_ = len(ls.splitlines())
         if  ln_cs > ln_:
             annotate_changesets = annotate_changesets[:ln_ - ln_cs]
         annotate = ''.join((self.annotate_from_changeset(changeset)
             for changeset in annotate_changesets))
 #        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
 ##        TODO: not sure what that fixes
 #        # If pygments cropped last lines break we need do that too
 #        ln_cs = len(annotate_changesets)
 #        ln_ = len(ls.splitlines())
 #        if  ln_cs > ln_:
 #            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
         annotate = ''.join((self.annotate_from_changeset(el[2]())
                             for el in self.filenode.annotate))
         # in case you wonder about the seemingly redundant <div> here:
         # since the content in the other cell also is wrapped in a div,
         # some browsers in some configurations seem to mess up the formatting.
         '''
         yield 0, ('<table class="%stable">' % self.cssclass +
                   '<tr><td class="linenos"><div class="linenodiv"><pre>' +
                   ls + '</pre></div></td>' +
                   '<td class="code">')
         yield 0, dummyoutfile.getvalue()
         yield 0, '</td></tr></table>'
         '''
         headers_row = []
         if self.headers:
             headers_row = ['<tr class="annotate-header">']
             for key in self.order:
                 td = ''.join(('<td>', self.headers[key], '</td>'))
                 headers_row.append(td)
             headers_row.append('</tr>')
         body_row_start = ['<tr>']
         for key in self.order:
             if key == 'ls':
                 body_row_start.append(
                     '<td class="linenos"><div class="linenodiv"><pre>' +
                     ls + '</pre></div></td>')
             elif key == 'annotate':
                 body_row_start.append(
                     '<td class="annotate"><div class="annotatediv"><pre>' +
                     annotate + '</pre></div></td>')
             elif key == 'code':
                 body_row_start.append('<td class="code">')
         yield 0, ('<table class="%stable">' % self.cssclass +
                   ''.join(headers_row) +
                   ''.join(body_row_start)
+                  )
         yield 0, dummyoutfile.getvalue()
         yield 0, '</td></tr></table>'

rhodecode/lib/vcs/backends/git/changeset.py

➞

Show inline comments

@@ @@ -227,123 +227,137 @@ class GitChangeset(BaseChangeset): @@
     def diff(self, ignore_whitespace=True, context=3):
         rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
         rev2 = self
         return ''.join(self.repository.get_diff(rev1, rev2,
                                     ignore_whitespace=ignore_whitespace,
                                     context=context))
     def get_file_mode(self, path):
         """
         Returns stat mode of the file at the given ``path``.
         """
         # ensure path is traversed
         self._get_id_for_path(path)
         return self._stat_modes[path]
     def get_file_content(self, path):
         """
         Returns content of the file at given ``path``.
         """
         id = self._get_id_for_path(path)
         blob = self.repository._repo[id]
         return blob.as_pretty_string()
     def get_file_size(self, path):
         """
         Returns size of the file at given ``path``.
         """
         id = self._get_id_for_path(path)
         blob = self.repository._repo[id]
         return blob.raw_length()
     def get_file_changeset(self, path):
         """
         Returns last commit of the file at the given ``path``.
         """
         node = self.get_node(path)
         return node.history[0]
     def get_file_history(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         TODO: This function now uses os underlying 'git' and 'grep' commands
         which is generally not good. Should be replaced with algorithm
         iterating commits.
         """
         self._get_filectx(path)
         cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
                   self.id, path
+               )
         so, se = self.repository.run_git_command(cmd)
         ids = re.findall(r'[0-9a-fA-F]{40}', so)
         return [self.repository.get_changeset(id) for id in ids]
     def get_file_history_2(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         """
         self._get_filectx(path)
         from dulwich.walk import Walker
         include = [self.id]
         walker = Walker(self.repository._repo.object_store, include,
                         paths=[path], max_entries=1)
         return [self.repository.get_changeset(sha)
                 for sha in (x.commit.id for x in walker)]
     def get_file_annotate(self, path):
         """
         Returns a list of three element tuples with lineno,changeset and line
         Returns a generator of four element tuples with
             lineno, sha, changeset lazy loader and line
         TODO: This function now uses os underlying 'git' command which is
         generally not good. Should be replaced with algorithm iterating
         commits.
         """
         cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
         # -l     ==> outputs long shas (and we need all 40 characters)
         # --root ==> doesn't put '^' character for bounderies
         # -r sha ==> blames for the given revision
         so, se = self.repository.run_git_command(cmd)
         annotate = []
         for i, blame_line in enumerate(so.split('\n')[:-1]):
             ln_no = i + 1
             id, line = re.split(r' ', blame_line, 1)
             annotate.append((ln_no, self.repository.get_changeset(id), line))
         return annotate
             sha, line = re.split(r' ', blame_line, 1)
             yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):
         """
         Fills up given stream.
         :param stream: file like object.
         :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
             Default: ``tgz``.
         :param prefix: name of root directory in archive.
             Default is repository name and changeset's raw_id joined with dash
             (``repo-tip.<KIND>``).
         :param subrepos: include subrepos in this archive.
         :raise ImproperArchiveTypeError: If given kind is wrong.
         :raise VcsError: If given stream is None
         """
         allowed_kinds = settings.ARCHIVE_SPECS.keys()
         if kind not in allowed_kinds:
             raise ImproperArchiveTypeError('Archive kind not supported use one'
                 'of %s', allowed_kinds)
         if prefix is None:
             prefix = '%s-%s' % (self.repository.name, self.short_id)
         elif prefix.startswith('/'):
             raise VCSError("Prefix cannot start with leading slash")
         elif prefix.strip() == '':
             raise VCSError("Prefix cannot be empty")
         if kind == 'zip':
             frmt = 'zip'
         else:
             frmt = 'tar'
         cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
             self.raw_id)
         if kind == 'tgz':
             cmd += ' | gzip -9'
         elif kind == 'tbz2':
             cmd += ' | bzip2 -9'
         if stream is None:
             raise VCSError('You need to pass in a valid stream for filling'
                            ' with archival data')
         popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
             cwd=self.repository.path)
         buffer_size = 1024 * 8

rhodecode/lib/vcs/backends/hg/changeset.py

➞

Show inline comments

@@ @@ -190,107 +190,105 @@ class MercurialChangeset(BaseChangeset): @@
         of hg repository
         """
         return self._ctx.substate
     def get_file_mode(self, path):
         """
         Returns stat mode of the file at the given ``path``.
         """
         fctx = self._get_filectx(path)
         if 'x' in fctx.flags():
             return 0100755
         else:
             return 0100644
     def get_file_content(self, path):
         """
         Returns content of the file at given ``path``.
         """
         fctx = self._get_filectx(path)
         return fctx.data()
     def get_file_size(self, path):
         """
         Returns size of the file at given ``path``.
         """
         fctx = self._get_filectx(path)
         return fctx.size()
     def get_file_changeset(self, path):
         """
         Returns last commit of the file at the given ``path``.
         """
         node = self.get_node(path)
         return node.history[0]
     def get_file_history(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         """
         fctx = self._get_filectx(path)
         nodes = [fctx.filectx(x).node() for x in fctx.filelog()]
         changesets = [self.repository.get_changeset(hex(node))
             for node in reversed(nodes)]
         return changesets
     def get_file_annotate(self, path):
         """
         Returns a list of three element tuples with lineno,changeset and line
         Returns a generator of four element tuples with
             lineno, sha, changeset lazy loader and line
         """
         fctx = self._get_filectx(path)
         annotate = []
         for i, annotate_data in enumerate(fctx.annotate()):
             ln_no = i + 1
             annotate.append((ln_no, self.repository\
                              .get_changeset(hex(annotate_data[0].node())),
                              annotate_data[1],))
         return annotate
             sha = hex(annotate_data[0].node())
             yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],)
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):
         """
         Fills up given stream.
         :param stream: file like object.
         :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
             Default: ``tgz``.
         :param prefix: name of root directory in archive.
             Default is repository name and changeset's raw_id joined with dash
             (``repo-tip.<KIND>``).
         :param subrepos: include subrepos in this archive.
         :raise ImproperArchiveTypeError: If given kind is wrong.
         :raise VcsError: If given stream is None
         """
         allowed_kinds = settings.ARCHIVE_SPECS.keys()
         if kind not in allowed_kinds:
             raise ImproperArchiveTypeError('Archive kind not supported use one'
                 'of %s', allowed_kinds)
         if stream is None:
             raise VCSError('You need to pass in a valid stream for filling'
                            ' with archival data')
         if prefix is None:
             prefix = '%s-%s' % (self.repository.name, self.short_id)
         elif prefix.startswith('/'):
             raise VCSError("Prefix cannot start with leading slash")
         elif prefix.strip() == '':
             raise VCSError("Prefix cannot be empty")
         archival.archive(self.repository._repo, stream, self.raw_id,
                          kind, prefix=prefix, subrepos=subrepos)
         if stream.closed and hasattr(stream, 'name'):
             stream = open(stream.name, 'rb')
         elif hasattr(stream, 'mode') and 'r' not in stream.mode:
             stream = open(stream.name, 'rb')
         else:
             stream.seek(0)
     def get_nodes(self, path):
         """
         Returns combined ``DirNode`` and ``FileNode`` objects list representing
         state of changeset at the given ``path``. If node at the given ``path``

0 comments (0 inline, 0 general)