kallithea Changeset - c57a37430dc9

Changeset - c57a37430dc9

Parent rev.

Child rev.

[Not reviewed]

beta

0 3 0

Marcin Kuzminski - 13 years ago 2012-11-29 19:59:47
marcin@python-works.com

fixes #652 switch to generator approach when doing file annotation to prevent huge memory consumption when executed on large files. Thanks to ALexey Larikov for patch.
- added pure dulwich method for getting file history.

3 files changed with 33 insertions and 20 deletions:

rhodecode/lib/annotate.py

rhodecode/lib/vcs/backends/git/changeset.py

rhodecode/lib/vcs/backends/hg/changeset.py

0 comments (0 inline, 0 general)

rhodecode/lib/annotate.py

➞

Show inline comments

@@ @@ -49,142 +49,143 @@ class AnnotateHtmlFormatter(HtmlFormatte @@
             order=None, **options):
         """
         If ``annotate_from_changeset_func`` is passed it should be a function
         which returns string from the given changeset. For example, we may pass
         following function as ``annotate_from_changeset_func``::
             def changeset_to_anchor(changeset):
                 return '<a href="/changesets/%s/">%s</a>\n' %\
                        (changeset.id, changeset.id)
         :param annotate_from_changeset_func: see above
         :param order: (default: ``['ls', 'annotate', 'code']``); order of
           columns;
         :param options: standard pygment's HtmlFormatter options, there is
           extra option tough, ``headers``. For instance we can pass::
              formatter = AnnotateHtmlFormatter(filenode, headers={
                 'ls': '#',
                 'annotate': 'Annotate',
                 'code': 'Code',
              })
         """
         super(AnnotateHtmlFormatter, self).__init__(**options)
         self.annotate_from_changeset_func = annotate_from_changeset_func
         self.order = order or ('ls', 'annotate', 'code')
         headers = options.pop('headers', None)
         if headers and not ('ls' in headers and 'annotate' in headers and
             'code' in headers):
             raise ValueError("If headers option dict is specified it must "
                 "all 'ls', 'annotate' and 'code' keys")
         self.headers = headers
         if isinstance(filenode, FileNode):
             self.filenode = filenode
         else:
             raise VCSError("This formatter expect FileNode parameter, not %r"
                 % type(filenode))
     def annotate_from_changeset(self, changeset):
         """
         Returns full html line for single changeset per annotated line.
         """
         if self.annotate_from_changeset_func:
             return self.annotate_from_changeset_func(changeset)
         else:
             return ''.join((changeset.id, '\n'))
     def _wrap_tablelinenos(self, inner):
         dummyoutfile = StringIO.StringIO()
         lncount = 0
         for t, line in inner:
             if t:
                 lncount += 1
             dummyoutfile.write(line)
         fl = self.linenostart
         mw = len(str(lncount + fl - 1))
         sp = self.linenospecial
         st = self.linenostep
         la = self.lineanchors
         aln = self.anchorlinenos
         if sp:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if i % sp == 0:
                         if aln:
                             lines.append('<a href="#%s-%d" class="special">'
                                          '%*d</a>' %
                                          (la, i, mw, i))
                         else:
                             lines.append('<span class="special">'
                                          '%*d</span>' % (mw, i))
                     else:
                         if aln:
                             lines.append('<a href="#%s-%d">'
                                          '%*d</a>' % (la, i, mw, i))
                         else:
                             lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         else:
             lines = []
             for i in range(fl, fl + lncount):
                 if i % st == 0:
                     if aln:
                         lines.append('<a href="#%s-%d">%*d</a>' \
                                      % (la, i, mw, i))
                     else:
                         lines.append('%*d' % (mw, i))
                 else:
                     lines.append('')
             ls = '\n'.join(lines)
         annotate_changesets = [tup[1] for tup in self.filenode.annotate]
         # If pygments cropped last lines break we need do that too
         ln_cs = len(annotate_changesets)
         ln_ = len(ls.splitlines())
         if  ln_cs > ln_:
             annotate_changesets = annotate_changesets[:ln_ - ln_cs]
         annotate = ''.join((self.annotate_from_changeset(changeset)
             for changeset in annotate_changesets))
 #        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
 ##        TODO: not sure what that fixes
 #        # If pygments cropped last lines break we need do that too
 #        ln_cs = len(annotate_changesets)
 #        ln_ = len(ls.splitlines())
 #        if  ln_cs > ln_:
 #            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
         annotate = ''.join((self.annotate_from_changeset(el[2]())
                             for el in self.filenode.annotate))
         # in case you wonder about the seemingly redundant <div> here:
         # since the content in the other cell also is wrapped in a div,
         # some browsers in some configurations seem to mess up the formatting.
         '''
         yield 0, ('<table class="%stable">' % self.cssclass +
                   '<tr><td class="linenos"><div class="linenodiv"><pre>' +
                   ls + '</pre></div></td>' +
                   '<td class="code">')
         yield 0, dummyoutfile.getvalue()
         yield 0, '</td></tr></table>'
         '''
         headers_row = []
         if self.headers:
             headers_row = ['<tr class="annotate-header">']
             for key in self.order:
                 td = ''.join(('<td>', self.headers[key], '</td>'))
                 headers_row.append(td)
             headers_row.append('</tr>')
         body_row_start = ['<tr>']
         for key in self.order:
             if key == 'ls':
                 body_row_start.append(
                     '<td class="linenos"><div class="linenodiv"><pre>' +
                     ls + '</pre></div></td>')
             elif key == 'annotate':
                 body_row_start.append(
                     '<td class="annotate"><div class="annotatediv"><pre>' +
                     annotate + '</pre></div></td>')
             elif key == 'code':
                 body_row_start.append('<td class="code">')
         yield 0, ('<table class="%stable">' % self.cssclass +
                   ''.join(headers_row) +
                   ''.join(body_row_start)
+                  )
         yield 0, dummyoutfile.getvalue()
         yield 0, '</td></tr></table>'

rhodecode/lib/vcs/backends/git/changeset.py

➞

Show inline comments

@@ @@ -179,219 +179,233 @@ class GitChangeset(BaseChangeset): @@
         """
         return [self.repository.get_changeset(parent)
                 for parent in self._commit.parents]
     def next(self, branch=None):
         if branch and self.branch != branch:
             raise VCSError('Branch option used on changeset not belonging '
                            'to that branch')
         def _next(changeset, branch):
             try:
                 next_ = changeset.revision + 1
                 next_rev = changeset.repository.revisions[next_]
             except IndexError:
                 raise ChangesetDoesNotExistError
             cs = changeset.repository.get_changeset(next_rev)
             if branch and branch != cs.branch:
                 return _next(cs, branch)
             return cs
         return _next(self, branch)
     def prev(self, branch=None):
         if branch and self.branch != branch:
             raise VCSError('Branch option used on changeset not belonging '
                            'to that branch')
         def _prev(changeset, branch):
             try:
                 prev_ = changeset.revision - 1
                 if prev_ < 0:
                     raise IndexError
                 prev_rev = changeset.repository.revisions[prev_]
             except IndexError:
                 raise ChangesetDoesNotExistError
             cs = changeset.repository.get_changeset(prev_rev)
             if branch and branch != cs.branch:
                 return _prev(cs, branch)
             return cs
         return _prev(self, branch)
     def diff(self, ignore_whitespace=True, context=3):
         rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
         rev2 = self
         return ''.join(self.repository.get_diff(rev1, rev2,
                                     ignore_whitespace=ignore_whitespace,
                                     context=context))
     def get_file_mode(self, path):
         """
         Returns stat mode of the file at the given ``path``.
         """
         # ensure path is traversed
         self._get_id_for_path(path)
         return self._stat_modes[path]
     def get_file_content(self, path):
         """
         Returns content of the file at given ``path``.
         """
         id = self._get_id_for_path(path)
         blob = self.repository._repo[id]
         return blob.as_pretty_string()
     def get_file_size(self, path):
         """
         Returns size of the file at given ``path``.
         """
         id = self._get_id_for_path(path)
         blob = self.repository._repo[id]
         return blob.raw_length()
     def get_file_changeset(self, path):
         """
         Returns last commit of the file at the given ``path``.
         """
         node = self.get_node(path)
         return node.history[0]
     def get_file_history(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         TODO: This function now uses os underlying 'git' and 'grep' commands
         which is generally not good. Should be replaced with algorithm
         iterating commits.
         """
         self._get_filectx(path)
         cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
                   self.id, path
+               )
         so, se = self.repository.run_git_command(cmd)
         ids = re.findall(r'[0-9a-fA-F]{40}', so)
         return [self.repository.get_changeset(id) for id in ids]
     def get_file_history_2(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         """
         self._get_filectx(path)
         from dulwich.walk import Walker
         include = [self.id]
         walker = Walker(self.repository._repo.object_store, include,
                         paths=[path], max_entries=1)
         return [self.repository.get_changeset(sha)
                 for sha in (x.commit.id for x in walker)]
     def get_file_annotate(self, path):
         """
         Returns a list of three element tuples with lineno,changeset and line
         Returns a generator of four element tuples with
             lineno, sha, changeset lazy loader and line
         TODO: This function now uses os underlying 'git' command which is
         generally not good. Should be replaced with algorithm iterating
         commits.
         """
         cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
         # -l     ==> outputs long shas (and we need all 40 characters)
         # --root ==> doesn't put '^' character for bounderies
         # -r sha ==> blames for the given revision
         so, se = self.repository.run_git_command(cmd)
         annotate = []
         for i, blame_line in enumerate(so.split('\n')[:-1]):
             ln_no = i + 1
             id, line = re.split(r' ', blame_line, 1)
             annotate.append((ln_no, self.repository.get_changeset(id), line))
         return annotate
             sha, line = re.split(r' ', blame_line, 1)
             yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):
         """
         Fills up given stream.
         :param stream: file like object.
         :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
             Default: ``tgz``.
         :param prefix: name of root directory in archive.
             Default is repository name and changeset's raw_id joined with dash
             (``repo-tip.<KIND>``).
         :param subrepos: include subrepos in this archive.
         :raise ImproperArchiveTypeError: If given kind is wrong.
         :raise VcsError: If given stream is None
         """
         allowed_kinds = settings.ARCHIVE_SPECS.keys()
         if kind not in allowed_kinds:
             raise ImproperArchiveTypeError('Archive kind not supported use one'
                 'of %s', allowed_kinds)
         if prefix is None:
             prefix = '%s-%s' % (self.repository.name, self.short_id)
         elif prefix.startswith('/'):
             raise VCSError("Prefix cannot start with leading slash")
         elif prefix.strip() == '':
             raise VCSError("Prefix cannot be empty")
         if kind == 'zip':
             frmt = 'zip'
         else:
             frmt = 'tar'
         cmd = 'git archive --format=%s --prefix=%s/ %s' % (frmt, prefix,
             self.raw_id)
         if kind == 'tgz':
             cmd += ' | gzip -9'
         elif kind == 'tbz2':
             cmd += ' | bzip2 -9'
         if stream is None:
             raise VCSError('You need to pass in a valid stream for filling'
                            ' with archival data')
         popen = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,
             cwd=self.repository.path)
         buffer_size = 1024 * 8
         chunk = popen.stdout.read(buffer_size)
         while chunk:
             stream.write(chunk)
             chunk = popen.stdout.read(buffer_size)
         # Make sure all descriptors would be read
         popen.communicate()
     def get_nodes(self, path):
         if self._get_kind(path) != NodeKind.DIR:
             raise ChangesetError("Directory does not exist for revision %r at "
                 " %r" % (self.revision, path))
         path = self._fix_path(path)
         id = self._get_id_for_path(path)
         tree = self.repository._repo[id]
         dirnodes = []
         filenodes = []
         als = self.repository.alias
         for name, stat, id in tree.iteritems():
             if objects.S_ISGITLINK(stat):
                 dirnodes.append(SubModuleNode(name, url=None, changeset=id,
                                               alias=als))
                 continue
             obj = self.repository._repo.get_object(id)
             if path != '':
                 obj_path = '/'.join((path, name))
             else:
                 obj_path = name
             if obj_path not in self._stat_modes:
                 self._stat_modes[obj_path] = stat
             if isinstance(obj, objects.Tree):
                 dirnodes.append(DirNode(obj_path, changeset=self))
             elif isinstance(obj, objects.Blob):
                 filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
             else:
                 raise ChangesetError("Requested object should be Tree "
                                      "or Blob, is %r" % type(obj))
         nodes = dirnodes + filenodes
         for node in nodes:
             if not node.path in self.nodes:
                 self.nodes[node.path] = node
         nodes.sort()
         return nodes
     def get_node(self, path):
         if isinstance(path, unicode):
             path = path.encode('utf-8')
         path = self._fix_path(path)

rhodecode/lib/vcs/backends/hg/changeset.py

➞

Show inline comments

@@ @@ -142,203 +142,201 @@ class MercurialChangeset(BaseChangeset): @@
             except IndexError:
                 raise ChangesetDoesNotExistError
             cs = changeset.repository.get_changeset(prev_rev)
             if branch and branch != cs.branch:
                 return _prev(cs, branch)
             return cs
         return _prev(self, branch)
     def diff(self, ignore_whitespace=True, context=3):
         return ''.join(self._ctx.diff(git=True,
                                       ignore_whitespace=ignore_whitespace,
                                       context=context))
     def _fix_path(self, path):
         """
         Paths are stored without trailing slash so we need to get rid off it if
         needed. Also mercurial keeps filenodes as str so we need to decode
         from unicode to str
         """
         if path.endswith('/'):
             path = path.rstrip('/')
         return safe_str(path)
     def _get_kind(self, path):
         path = self._fix_path(path)
         if path in self._file_paths:
             return NodeKind.FILE
         elif path in self._dir_paths:
             return NodeKind.DIR
         else:
             raise ChangesetError("Node does not exist at the given path %r"
                 % (path))
     def _get_filectx(self, path):
         path = self._fix_path(path)
         if self._get_kind(path) != NodeKind.FILE:
             raise ChangesetError("File does not exist for revision %r at "
                 " %r" % (self.raw_id, path))
         return self._ctx.filectx(path)
     def _extract_submodules(self):
         """
         returns a dictionary with submodule information from substate file
         of hg repository
         """
         return self._ctx.substate
     def get_file_mode(self, path):
         """
         Returns stat mode of the file at the given ``path``.
         """
         fctx = self._get_filectx(path)
         if 'x' in fctx.flags():
             return 0100755
         else:
             return 0100644
     def get_file_content(self, path):
         """
         Returns content of the file at given ``path``.
         """
         fctx = self._get_filectx(path)
         return fctx.data()
     def get_file_size(self, path):
         """
         Returns size of the file at given ``path``.
         """
         fctx = self._get_filectx(path)
         return fctx.size()
     def get_file_changeset(self, path):
         """
         Returns last commit of the file at the given ``path``.
         """
         node = self.get_node(path)
         return node.history[0]
     def get_file_history(self, path):
         """
         Returns history of file as reversed list of ``Changeset`` objects for
         which file at given ``path`` has been modified.
         """
         fctx = self._get_filectx(path)
         nodes = [fctx.filectx(x).node() for x in fctx.filelog()]
         changesets = [self.repository.get_changeset(hex(node))
             for node in reversed(nodes)]
         return changesets
     def get_file_annotate(self, path):
         """
         Returns a list of three element tuples with lineno,changeset and line
         Returns a generator of four element tuples with
             lineno, sha, changeset lazy loader and line
         """
         fctx = self._get_filectx(path)
         annotate = []
         for i, annotate_data in enumerate(fctx.annotate()):
             ln_no = i + 1
             annotate.append((ln_no, self.repository\
                              .get_changeset(hex(annotate_data[0].node())),
                              annotate_data[1],))
         return annotate
             sha = hex(annotate_data[0].node())
             yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],)
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):
         """
         Fills up given stream.
         :param stream: file like object.
         :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
             Default: ``tgz``.
         :param prefix: name of root directory in archive.
             Default is repository name and changeset's raw_id joined with dash
             (``repo-tip.<KIND>``).
         :param subrepos: include subrepos in this archive.
         :raise ImproperArchiveTypeError: If given kind is wrong.
         :raise VcsError: If given stream is None
         """
         allowed_kinds = settings.ARCHIVE_SPECS.keys()
         if kind not in allowed_kinds:
             raise ImproperArchiveTypeError('Archive kind not supported use one'
                 'of %s', allowed_kinds)
         if stream is None:
             raise VCSError('You need to pass in a valid stream for filling'
                            ' with archival data')
         if prefix is None:
             prefix = '%s-%s' % (self.repository.name, self.short_id)
         elif prefix.startswith('/'):
             raise VCSError("Prefix cannot start with leading slash")
         elif prefix.strip() == '':
             raise VCSError("Prefix cannot be empty")
         archival.archive(self.repository._repo, stream, self.raw_id,
                          kind, prefix=prefix, subrepos=subrepos)
         if stream.closed and hasattr(stream, 'name'):
             stream = open(stream.name, 'rb')
         elif hasattr(stream, 'mode') and 'r' not in stream.mode:
             stream = open(stream.name, 'rb')
         else:
             stream.seek(0)
     def get_nodes(self, path):
         """
         Returns combined ``DirNode`` and ``FileNode`` objects list representing
         state of changeset at the given ``path``. If node at the given ``path``
         is not instance of ``DirNode``, ChangesetError would be raised.
         """
         if self._get_kind(path) != NodeKind.DIR:
             raise ChangesetError("Directory does not exist for revision %r at "
                 " %r" % (self.revision, path))
         path = self._fix_path(path)
         filenodes = [FileNode(f, changeset=self) for f in self._file_paths
             if os.path.dirname(f) == path]
         dirs = path == '' and '' or [d for d in self._dir_paths
             if d and posixpath.dirname(d) == path]
         dirnodes = [DirNode(d, changeset=self) for d in dirs
             if os.path.dirname(d) == path]
         als = self.repository.alias
         for k, vals in self._extract_submodules().iteritems():
             #vals = url,rev,type
             loc = vals[0]
             cs = vals[1]
             dirnodes.append(SubModuleNode(k, url=loc, changeset=cs,
                                           alias=als))
         nodes = dirnodes + filenodes
         # cache nodes
         for node in nodes:
             self.nodes[node.path] = node
         nodes.sort()
         return nodes
     def get_node(self, path):
         """
         Returns ``Node`` object from the given ``path``. If there is no node at
         the given ``path``, ``ChangesetError`` would be raised.
         """
         path = self._fix_path(path)
         if not path in self.nodes:
             if path in self._file_paths:
                 node = FileNode(path, changeset=self)
             elif path in self._dir_paths or path in self._dir_paths:
                 if path == '':
                     node = RootNode(changeset=self)
                 else:
                     node = DirNode(path, changeset=self)
             else:
                 raise NodeDoesNotExistError("There is no file nor directory "

0 comments (0 inline, 0 general)