Changeset - 58905069da21
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 13 years ago 2013-03-07 13:46:24
marcin@python-works.com
Speed up of last_changeset extraction in VCS, in edge cases for git we can get 10x speed improvement by limiting the history extraction if we only need last changeset
4 files changed with 28 insertions and 18 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/base.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.base
 
    ~~~~~~~~~~~~~~~~~
 

	
 
    Base for all available scm backends
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 

	
 
from itertools import chain
 
from rhodecode.lib.vcs.utils import author_name, author_email
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.vcs.utils.helpers import get_dict_for_attrs
 
from rhodecode.lib.vcs.conf import settings
 

	
 
from rhodecode.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, \
 
    NodeAlreadyAddedError, NodeAlreadyChangedError, NodeAlreadyExistsError, \
 
    NodeAlreadyRemovedError, NodeDoesNotExistError, NodeNotChangedError, \
 
    RepositoryError
 
import datetime
 

	
 

	
 
class BaseRepository(object):
 
    """
 
    Base Repository for final backends
 

	
 
    **Attributes**
 

	
 
        ``DEFAULT_BRANCH_NAME``
 
            name of default branch (i.e. "trunk" for svn, "master" for git etc.
 

	
 
        ``scm``
 
            alias of scm, i.e. *git* or *hg*
 

	
 
        ``repo``
 
            object from external api
 

	
 
        ``revisions``
 
            list of all available revisions' ids, in ascending order
 

	
 
        ``changesets``
 
            storage dict caching returned changesets
 

	
 
        ``path``
 
            absolute path to the repository
 

	
 
        ``branches``
 
            branches as list of changesets
 

	
 
        ``tags``
 
            tags as list of changesets
 
    """
 
    scm = None
 
    DEFAULT_BRANCH_NAME = None
 
    EMPTY_CHANGESET = '0' * 40
 

	
 
    def __init__(self, repo_path, create=False, **kwargs):
 
        """
 
        Initializes repository. Raises RepositoryError if repository could
 
        not be find at the given ``repo_path`` or directory at ``repo_path``
 
        exists and ``create`` is set to True.
 

	
 
        :param repo_path: local path of the repository
 
        :param create=False: if set to True, would try to craete repository.
 
        :param src_url=None: if set, should be proper url from which repository
 
          would be cloned; requires ``create`` parameter to be set to True -
 
          raises RepositoryError if src_url is set and create evaluates to
 
          False
 
@@ -935,84 +936,84 @@ class BaseInMemoryChangeset(object):
 
        if self.removed and not parents:
 
            raise NodeDoesNotExistError("Cannot remove node at %s as there "
 
                "were no parents specified" % self.removed[0].path)
 
        really_removed = set()
 
        for p in parents:
 
            for node in self.removed:
 
                try:
 
                    p.get_node(node.path)
 
                    really_removed.add(node)
 
                except ChangesetError:
 
                    pass
 
        not_removed = set(self.removed) - really_removed
 
        if not_removed:
 
            raise NodeDoesNotExistError("Cannot remove node at %s from "
 
                "following parents: %s" % (not_removed[0], parents))
 

	
 
    def commit(self, message, author, parents=None, branch=None, date=None,
 
            **kwargs):
 
        """
 
        Performs in-memory commit (doesn't check workdir in any way) and
 
        returns newly created ``Changeset``. Updates repository's
 
        ``revisions``.
 

	
 
        .. note::
 
            While overriding this method each backend's should call
 
            ``self.check_integrity(parents)`` in the first place.
 

	
 
        :param message: message of the commit
 
        :param author: full username, i.e. "Joe Doe <joe.doe@example.com>"
 
        :param parents: single parent or sequence of parents from which commit
 
          would be derieved
 
        :param date: ``datetime.datetime`` instance. Defaults to
 
          ``datetime.datetime.now()``.
 
        :param branch: branch name, as string. If none given, default backend's
 
          branch would be used.
 

	
 
        :raises ``CommitError``: if any error occurs while committing
 
        """
 
        raise NotImplementedError
 

	
 

	
 
class EmptyChangeset(BaseChangeset):
 
    """
 
    An dummy empty changeset. It's possible to pass hash when creating
 
    an EmptyChangeset
 
    """
 

	
 
    def __init__(self, cs='0' * 40, repo=None, requested_revision=None,
 
                 alias=None, revision=-1, message='', author='', date=''):
 
                 alias=None, revision=-1, message='', author='', date=None):
 
        self._empty_cs = cs
 
        self.revision = revision
 
        self.message = message
 
        self.author = author
 
        self.date = date
 
        self.date = date or datetime.datetime.fromtimestamp(0)
 
        self.repository = repo
 
        self.requested_revision = requested_revision
 
        self.alias = alias
 

	
 
    @LazyProperty
 
    def raw_id(self):
 
        """
 
        Returns raw string identifying this changeset, useful for web
 
        representation.
 
        """
 

	
 
        return self._empty_cs
 

	
 
    @LazyProperty
 
    def branch(self):
 
        from rhodecode.lib.vcs.backends import get_backend
 
        return get_backend(self.alias).DEFAULT_BRANCH_NAME
 

	
 
    @LazyProperty
 
    def short_id(self):
 
        return self.raw_id[:12]
 

	
 
    def get_file_changeset(self, path):
 
        return self
 

	
 
    def get_file_content(self, path):
 
        return u''
 

	
 
    def get_file_size(self, path):
 
        return 0
rhodecode/lib/vcs/backends/git/changeset.py
Show inline comments
 
import re
 
from itertools import chain
 
from dulwich import objects
 
from subprocess import Popen, PIPE
 
import rhodecode
 
from rhodecode.lib.vcs.conf import settings
 
from rhodecode.lib.vcs.exceptions import RepositoryError
 
from rhodecode.lib.vcs.exceptions import ChangesetError
 
from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import VCSError
 
from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import ImproperArchiveTypeError
 
from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from rhodecode.lib.vcs.nodes import FileNode, DirNode, NodeKind, RootNode, \
 
    RemovedFileNode, SubModuleNode, ChangedFileNodesGenerator,\
 
    AddedFileNodesGenerator, RemovedFileNodesGenerator
 
from rhodecode.lib.vcs.utils import safe_unicode
 
from rhodecode.lib.vcs.utils import date_fromtimestamp
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.utils2 import safe_int
 

	
 

	
 
class GitChangeset(BaseChangeset):
 
    """
 
    Represents state of the repository at single revision.
 
    """
 

	
 
    def __init__(self, repository, revision):
 
        self._stat_modes = {}
 
        self.repository = repository
 

	
 
        try:
 
            commit = self.repository._repo.get_object(revision)
 
            if isinstance(commit, objects.Tag):
 
                revision = commit.object[1]
 
                commit = self.repository._repo.get_object(commit.object[1])
 
        except KeyError:
 
            raise RepositoryError("Cannot get object with id %s" % revision)
 
        self.raw_id = revision
 
        self.id = self.raw_id
 
        self.short_id = self.raw_id[:12]
 
        self._commit = commit
 

	
 
        self._tree_id = commit.tree
 
        self._committer_property = 'committer'
 
        self._author_property = 'author'
 
        self._date_property = 'commit_time'
 
        self._date_tz_property = 'commit_timezone'
 
        self.revision = repository.revisions.index(revision)
 

	
 
        self.message = safe_unicode(commit.message)
 

	
 
        self.nodes = {}
 
        self._paths = {}
 

	
 
    @LazyProperty
 
    def committer(self):
 
        return safe_unicode(getattr(self._commit, self._committer_property))
 

	
 
    @LazyProperty
 
    def author(self):
 
        return safe_unicode(getattr(self._commit, self._author_property))
 

	
 
    @LazyProperty
 
    def date(self):
 
        return date_fromtimestamp(getattr(self._commit, self._date_property),
 
                                  getattr(self._commit, self._date_tz_property))
 

	
 
@@ -230,113 +231,117 @@ class GitChangeset(BaseChangeset):
 
                prev_rev = changeset.repository.revisions[prev_]
 
            except IndexError:
 
                raise ChangesetDoesNotExistError
 

	
 
            cs = changeset.repository.get_changeset(prev_rev)
 

	
 
            if branch and branch != cs.branch:
 
                return _prev(cs, branch)
 

	
 
            return cs
 

	
 
        return _prev(self, branch)
 

	
 
    def diff(self, ignore_whitespace=True, context=3):
 
        rev1 = self.parents[0] if self.parents else self.repository.EMPTY_CHANGESET
 
        rev2 = self
 
        return ''.join(self.repository.get_diff(rev1, rev2,
 
                                    ignore_whitespace=ignore_whitespace,
 
                                    context=context))
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        # ensure path is traversed
 
        self._get_id_for_path(path)
 
        return self._stat_modes[path]
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.as_pretty_string()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        id = self._get_id_for_path(path)
 
        blob = self.repository._repo[id]
 
        return blob.raw_length()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        node = self.get_node(path)
 
        return node.history[0]
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path):
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        TODO: This function now uses os underlying 'git' and 'grep' commands
 
        which is generally not good. Should be replaced with algorithm
 
        iterating commits.
 
        """
 

	
 
        self._get_filectx(path)
 

	
 
        cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
 
                  self.id, path
 
               )
 
        if limit:
 
            cmd = 'log -n %s --pretty="format: %%H" -s -p %s -- "%s"' % (
 
                      safe_int(limit, 0), self.id, path
 
                   )
 
        else:
 
            cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
 
                      self.id, path
 
                   )
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(id) for id in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 

	
 
        """
 
        self._get_filectx(path)
 
        from dulwich.walk import Walker
 
        include = [self.id]
 
        walker = Walker(self.repository._repo.object_store, include,
 
                        paths=[path], max_entries=1)
 
        return [self.repository.get_changeset(sha)
 
                for sha in (x.commit.id for x in walker)]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 

	
 
        TODO: This function now uses os underlying 'git' command which is
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for bounderies
 
        # -r sha ==> blames for the given revision
 
        so, se = self.repository.run_git_command(cmd)
 

	
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
            ln_no = i + 1
 
            sha, line = re.split(r' ', blame_line, 1)
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
rhodecode/lib/vcs/backends/hg/changeset.py
Show inline comments
 
@@ -174,109 +174,113 @@ class MercurialChangeset(BaseChangeset):
 
        elif path in self._dir_paths:
 
            return NodeKind.DIR
 
        else:
 
            raise ChangesetError("Node does not exist at the given path %r"
 
                % (path))
 

	
 
    def _get_filectx(self, path):
 
        path = self._fix_path(path)
 
        if self._get_kind(path) != NodeKind.FILE:
 
            raise ChangesetError("File does not exist for revision %r at "
 
                " %r" % (self.raw_id, path))
 
        return self._ctx.filectx(path)
 

	
 
    def _extract_submodules(self):
 
        """
 
        returns a dictionary with submodule information from substate file
 
        of hg repository
 
        """
 
        return self._ctx.substate
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        if 'x' in fctx.flags():
 
            return 0100755
 
        else:
 
            return 0100644
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.data()
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at given ``path``.
 
        """
 
        fctx = self._get_filectx(path)
 
        return fctx.size()
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        node = self.get_node(path)
 
        return node.history[0]
 
        return self.get_file_history(path, limit=1)[0]
 

	
 
    def get_file_history(self, path):
 
    def get_file_history(self, path, limit=None):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 
        """
 
        fctx = self._get_filectx(path)
 
        nodes = [fctx.filectx(x).node() for x in fctx.filelog()]
 
        changesets = [self.repository.get_changeset(hex(node))
 
            for node in reversed(nodes)]
 
        return changesets
 
        hist = []
 
        cnt = 0
 
        for cs in reversed([x for x in fctx.filelog()]):
 
            cnt += 1
 
            hist.append(hex(fctx.filectx(cs).node()))
 
            if limit and cnt == limit:
 
                break
 

	
 
        return [self.repository.get_changeset(node) for node in hist]
 

	
 
    def get_file_annotate(self, path):
 
        """
 
        Returns a generator of four element tuples with
 
            lineno, sha, changeset lazy loader and line
 
        """
 

	
 
        fctx = self._get_filectx(path)
 
        for i, annotate_data in enumerate(fctx.annotate()):
 
            ln_no = i + 1
 
            sha = hex(annotate_data[0].node())
 
            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],)
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None,
 
                     subrepos=False):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tgz`` or ``tbz2``.
 
            Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash
 
            (``repo-tip.<KIND>``).
 
        :param subrepos: include subrepos in this archive.
 

	
 
        :raise ImproperArchiveTypeError: If given kind is wrong.
 
        :raise VcsError: If given stream is None
 
        """
 

	
 
        allowed_kinds = settings.ARCHIVE_SPECS.keys()
 
        if kind not in allowed_kinds:
 
            raise ImproperArchiveTypeError('Archive kind not supported use one'
 
                'of %s', allowed_kinds)
 

	
 
        if stream is None:
 
            raise VCSError('You need to pass in a valid stream for filling'
 
                           ' with archival data')
 

	
 
        if prefix is None:
 
            prefix = '%s-%s' % (self.repository.name, self.short_id)
 
        elif prefix.startswith('/'):
 
            raise VCSError("Prefix cannot start with leading slash")
 
        elif prefix.strip() == '':
 
            raise VCSError("Prefix cannot be empty")
 

	
 
        archival.archive(self.repository._repo, stream, self.raw_id,
 
                         kind, prefix=prefix, subrepos=subrepos)
rhodecode/templates/files/files_browser.html
Show inline comments
 
@@ -44,73 +44,73 @@
 
        <table class="code-browser">
 
            <thead>
 
                <tr>
 
                    <th>${_('Name')}</th>
 
                    <th>${_('Size')}</th>
 
                    <th>${_('Mimetype')}</th>
 
                    <th>${_('Last Revision')}</th>
 
                    <th>${_('Last modified')}</th>
 
                    <th>${_('Last committer')}</th>
 
                </tr>
 
            </thead>
 

	
 
            <tbody id="tbody">
 
                %if c.file.parent:
 
                <tr class="parity0">
 
                    <td>
 
                        ${h.link_to('..',h.url('files_home',repo_name=c.repo_name,revision=c.changeset.raw_id,f_path=c.file.parent.path),class_="browser-dir ypjax-link")}
 
                    </td>
 
                    <td></td>
 
                    <td></td>
 
                    <td></td>
 
                    <td></td>
 
                    <td></td>
 
                </tr>
 
                %endif
 

	
 
            %for cnt,node in enumerate(c.file):
 
                <tr class="parity${cnt%2}">
 
                     <td>
 
                        %if node.is_submodule():
 
                           ${h.link_to(node.name,node.url or '#',class_="submodule-dir ypjax-link")}
 
                        %else:
 
                          ${h.link_to(node.name, h.url('files_home',repo_name=c.repo_name,revision=c.changeset.raw_id,f_path=h.safe_unicode(node.path)),class_=file_class(node)+" ypjax-link")}
 
                        %endif:
 
                     </td>
 
                     <td>
 
                     %if node.is_file():
 
                         ${h.format_byte_size(node.size,binary=True)}
 
                     %endif
 
                     </td>
 
                     <td>
 
                      %if node.is_file():
 
                          ${node.mimetype}
 
                      %endif
 
                     </td>
 
                     <td>
 
                         %if node.is_file():
 
                             <div class="tooltip" title="${h.tooltip(node.last_changeset.message)}">
 
                             <pre>${'r%s:%s' % (node.last_changeset.revision,node.last_changeset.short_id)}</pre>
 
                            </div>
 
                              <pre>${'r%s:%s' % (node.last_changeset.revision,node.last_changeset.short_id)}</pre>
 
                             </div>
 
                         %endif
 
                     </td>
 
                     <td>
 
                         %if node.is_file():
 
                             <span class="tooltip" title="${h.tooltip(h.fmt_date(node.last_changeset.date))}">
 
                            ${h.age(node.last_changeset.date)}</span>
 
                         %endif
 
                     </td>
 
                     <td>
 
                         %if node.is_file():
 
                             <span title="${node.last_changeset.author}">
 
                            ${h.person(node.last_changeset.author)}
 
                            </span>
 
                         %endif
 
                     </td>
 
                </tr>
 
            %endfor
 
            </tbody>
 
            <tbody id="tbody_filtered" style="display:none">
 
            </tbody>
 
        </table>
 
    </div>
 
</div>
0 comments (0 inline, 0 general)