Changeset - 07fce1930417
[Not reviewed]
beta
0 4 0
Marcin Kuzminski - 14 years ago 2012-05-04 00:14:58
marcin@python-works.com
fixed issues with gitsubmodule diffs
4 files changed with 48 insertions and 24 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/diffs.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    rhodecode.lib.diffs
 
    ~~~~~~~~~~~~~~~~~~~
 

	
 
    Set of diffing helpers, previously part of vcs
 

	
 

	
 
    :created_on: Dec 4, 2011
 
    :author: marcink
 
    :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
 
    :original copyright: 2007-2008 by Armin Ronacher
 
    :license: GPLv3, see COPYING for more details.
 
"""
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import re
 
import difflib
 
import markupsafe
 
from itertools import tee, imap
 

	
 
from pylons.i18n.translation import _
 

	
 
from rhodecode.lib.vcs.exceptions import VCSError
 
from rhodecode.lib.vcs.nodes import FileNode
 

	
 
from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
 
from rhodecode.lib.helpers import escape
 
from rhodecode.lib.utils import EmptyChangeset
 

	
 

	
 
def wrap_to_table(str_):
 
    return '''<table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % str_
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    returns a wrapped diff into a table, checks for cut_off_limit and presents
 
    proper message
 
    """
 

	
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('binary file'))
 
        stats = (0, 0)
 
        size = 0
 

	
 
    elif cut_off_limit != -1 and (cut_off_limit is None or
 
    (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 
        size = len(diff or '')
 
    else:
 
        diff = wrap_to_table(_('Changeset was to big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 
        size = 0
 

	
 
    if not diff:
 
        diff = wrap_to_table(_('No changes detected'))
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return size, cs1, cs2, diff, stats
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 

	
 
    :param ignore_whitespace: ignore whitespaces in diff
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                 ignore_whitespace, context)
 

	
 
    return vcs_gitdiff
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 

	
 
    def __init__(self, diff, differ='diff', format='udiff'):
 
        """
 
        :param diff:   a text in diff format or generator
 
        :param format: format of diff passed, `udiff` or `gitdiff`
 
        """
 
        if isinstance(diff, basestring):
 
            diff = [diff]
 

	
 
        self.__udiff = diff
 
        self.__format = format
 
        self.adds = 0
 
        self.removes = 0
 

	
 
        if isinstance(self.__udiff, basestring):
 
            self.lines = iter(self.__udiff.splitlines(1))
 

	
 
        elif self.__format == 'gitdiff':
 
            udiff_copy = self.copy_iterator()
 
            self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
 
        else:
 
            udiff_copy = self.copy_iterator()
 
            self.lines = imap(self.escaper, udiff_copy)
 

	
 
        # Select a differ.
 
        if differ == 'difflib':
 
            self.differ = self._highlight_line_difflib
 
        else:
 
            self.differ = self._highlight_line_udiff
 

	
 
    def escaper(self, string):
 
        return markupsafe.escape(string)
 

	
 
    def copy_iterator(self):
 
        """
 
        make a fresh copy of generator, we should not iterate thru
 
        an original as it's needed for repeating operations on
 
        this instance of DiffProcessor
rhodecode/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -319,136 +319,142 @@ class GitChangeset(BaseChangeset):
 
            stream.write(chunk)
 
            chunk = popen.stdout.read(buffer_size)
 
        # Make sure all descriptors would be read
 
        popen.communicate()
 

	
 
    def get_nodes(self, path):
 
        if self._get_kind(path) != NodeKind.DIR:
 
            raise ChangesetError("Directory does not exist for revision %r at "
 
                " %r" % (self.revision, path))
 
        path = self._fix_path(path)
 
        id = self._get_id_for_path(path)
 
        tree = self.repository._repo[id]
 
        dirnodes = []
 
        filenodes = []
 
        als = self.repository.alias
 
        for name, stat, id in tree.iteritems():
 
            if objects.S_ISGITLINK(stat):
 
                dirnodes.append(SubModuleNode(name, url=None, changeset=id,
 
                                              alias=als))
 
                continue
 

	
 
            obj = self.repository._repo.get_object(id)
 
            if path != '':
 
                obj_path = '/'.join((path, name))
 
            else:
 
                obj_path = name
 
            if obj_path not in self._stat_modes:
 
                self._stat_modes[obj_path] = stat
 
            if isinstance(obj, objects.Tree):
 
                dirnodes.append(DirNode(obj_path, changeset=self))
 
            elif isinstance(obj, objects.Blob):
 
                filenodes.append(FileNode(obj_path, changeset=self, mode=stat))
 
            else:
 
                raise ChangesetError("Requested object should be Tree "
 
                                     "or Blob, is %r" % type(obj))
 
        nodes = dirnodes + filenodes
 
        for node in nodes:
 
            if not node.path in self.nodes:
 
                self.nodes[node.path] = node
 
        nodes.sort()
 
        return nodes
 

	
 
    def get_node(self, path):
 
        if isinstance(path, unicode):
 
            path = path.encode('utf-8')
 
        path = self._fix_path(path)
 
        if not path in self.nodes:
 
            try:
 
                id = self._get_id_for_path(path)
 
                id_ = self._get_id_for_path(path)
 
            except ChangesetError:
 
                raise NodeDoesNotExistError("Cannot find one of parents' "
 
                    "directories for a given path: %s" % path)
 
            obj = self.repository._repo.get_object(id)
 
            if isinstance(obj, objects.Tree):
 
                if path == '':
 
                    node = RootNode(changeset=self)
 

	
 
            als = self.repository.alias
 
            _GL = lambda m: m and objects.S_ISGITLINK(m)
 
            if _GL(self._stat_modes.get(path)):
 
                node = SubModuleNode(path, url=None, changeset=id_, alias=als)
 
            else:
 
                obj = self.repository._repo.get_object(id_)
 

	
 
                if isinstance(obj, objects.Tree):
 
                    if path == '':
 
                        node = RootNode(changeset=self)
 
                    else:
 
                        node = DirNode(path, changeset=self)
 
                    node._tree = obj
 
                elif isinstance(obj, objects.Blob):
 
                    node = FileNode(path, changeset=self)
 
                    node._blob = obj
 
                else:
 
                    node = DirNode(path, changeset=self)
 
                node._tree = obj
 
            elif isinstance(obj, objects.Blob):
 
                node = FileNode(path, changeset=self)
 
                node._blob = obj
 
            else:
 
                raise NodeDoesNotExistError("There is no file nor directory "
 
                    "at the given path %r at revision %r"
 
                    % (path, self.short_id))
 
                    raise NodeDoesNotExistError("There is no file nor directory "
 
                        "at the given path %r at revision %r"
 
                        % (path, self.short_id))
 
            # cache node
 
            self.nodes[path] = node
 
        return self.nodes[path]
 

	
 
    @LazyProperty
 
    def affected_files(self):
 
        """
 
        Get's a fast accessible file changes for given changeset
 
        """
 

	
 
        return self.added + self.changed
 

	
 
    @LazyProperty
 
    def _diff_name_status(self):
 
        output = []
 
        for parent in self.parents:
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, self.raw_id)
 
            so, se = self.repository.run_git_command(cmd)
 
            output.append(so.strip())
 
        return '\n'.join(output)
 

	
 
    def _get_paths_for_status(self, status):
 
        """
 
        Returns sorted list of paths for given ``status``.
 

	
 
        :param status: one of: *added*, *modified* or *deleted*
 
        """
 
        paths = set()
 
        char = status[0].upper()
 
        for line in self._diff_name_status.splitlines():
 
            if not line:
 
                continue
 

	
 
            if line.startswith(char):
 
                splitted = line.split(char, 1)
 
                if not len(splitted) == 2:
 
                    raise VCSError("Couldn't parse diff result:\n%s\n\n and "
 
                        "particularly that line: %s" % (self._diff_name_status,
 
                        line))
 
                _path = splitted[1].strip()
 
                paths.add(_path)
 

	
 
        return sorted(paths)
 

	
 
    @LazyProperty
 
    def added(self):
 
        """
 
        Returns list of added ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return list(self._get_file_nodes())
 
        return [self.get_node(path) for path in self._get_paths_for_status('added')]
 

	
 
    @LazyProperty
 
    def changed(self):
 
        """
 
        Returns list of modified ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return [self.get_node(path) for path in self._get_paths_for_status('modified')]
 

	
 
    @LazyProperty
 
    def removed(self):
 
        """
 
        Returns list of removed ``FileNode`` objects.
 
        """
 
        if not self.parents:
 
            return []
 
        return [RemovedFileNode(path) for path in self._get_paths_for_status('deleted')]
rhodecode/lib/vcs/backends/git/repository.py
Show inline comments
 
@@ -366,97 +366,97 @@ class GitRepository(BaseRepository):
 
                                          % branch_name)
 
        # %H at format means (full) commit hash, initial hashes are retrieved
 
        # in ascending date order
 
        cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
 
        cmd_params = {}
 
        if start_date:
 
            cmd_template += ' --since "$since"'
 
            cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
 
        if end_date:
 
            cmd_template += ' --until "$until"'
 
            cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
 
        if branch_name:
 
            cmd_template += ' $branch_name'
 
            cmd_params['branch_name'] = branch_name
 
        else:
 
            cmd_template += ' --all'
 

	
 
        cmd = Template(cmd_template).safe_substitute(**cmd_params)
 
        revs = self.run_git_command(cmd)[0].splitlines()
 
        start_pos = 0
 
        end_pos = len(revs)
 
        if start:
 
            _start = self._get_revision(start)
 
            try:
 
                start_pos = revs.index(_start)
 
            except ValueError:
 
                pass
 

	
 
        if end is not None:
 
            _end = self._get_revision(end)
 
            try:
 
                end_pos = revs.index(_end)
 
            except ValueError:
 
                pass
 

	
 
        if None not in [start, end] and start_pos > end_pos:
 
            raise RepositoryError('start cannot be after end')
 

	
 
        if end_pos is not None:
 
            end_pos += 1
 

	
 
        revs = revs[start_pos:end_pos]
 
        if reverse:
 
            revs = reversed(revs)
 
        for rev in revs:
 
            yield self.get_changeset(rev)
 

	
 
    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
            context=3):
 
                 context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``.
 
        """
 
        flags = ['-U%s' % context]
 
        if ignore_whitespace:
 
            flags.append('-w')
 

	
 
        if rev1 == self.EMPTY_CHANGESET:
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['show'] + flags + [rev2])
 
        else:
 
            rev1 = self.get_changeset(rev1).raw_id
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 

	
 
        if path:
 
            cmd += ' -- "%s"' % path
 
        stdout, stderr = self.run_git_command(cmd)
 
        # If we used 'show' command, strip first few lines (until actual diff
 
        # starts)
 
        if rev1 == self.EMPTY_CHANGESET:
 
            lines = stdout.splitlines()
 
            x = 0
 
            for line in lines:
 
                if line.startswith('diff'):
 
                    break
 
                x += 1
 
            # Append new line just like 'diff' command do
 
            stdout = '\n'.join(lines[x:]) + '\n'
 
        return stdout
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        """
 
        Returns ``GitInMemoryChangeset`` object for this repository.
 
        """
 
        return GitInMemoryChangeset(self)
 

	
rhodecode/lib/vcs/nodes.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.nodes
 
    ~~~~~~~~~
 

	
 
    Module holding everything related to vcs nodes.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 
import os
 
import stat
 
import posixpath
 
import mimetypes
 

	
 
from pygments import lexers
 

	
 
from rhodecode.lib.vcs.utils.lazy import LazyProperty
 
from rhodecode.lib.vcs.utils import safe_unicode, safe_str
 
from rhodecode.lib.vcs.exceptions import NodeError
 
from rhodecode.lib.vcs.exceptions import RemovedFileNodeError
 
from rhodecode.lib.utils import EmptyChangeset
 

	
 

	
 
class NodeKind:
 
    SUBMODULE = -1
 
    DIR = 1
 
    FILE = 2
 

	
 

	
 
class NodeState:
 
    ADDED = u'added'
 
    CHANGED = u'changed'
 
    NOT_CHANGED = u'not changed'
 
    REMOVED = u'removed'
 

	
 

	
 
class NodeGeneratorBase(object):
 
    """
 
    Base class for removed added and changed filenodes, it's a lazy generator
 
    class that will create filenodes only on iteration or call
 

	
 
    The len method doesn't need to create filenodes at all
 
    """
 

	
 
    def __init__(self, current_paths, cs):
 
        self.cs = cs
 
        self.current_paths = current_paths
 

	
 
    def __call__(self):
 
        return [n for n in self]
 

	
 
    def __getslice__(self, i, j):
 
        for p in self.current_paths[i:j]:
 
            yield self.cs.get_node(p)
 

	
 
    def __len__(self):
 
        return len(self.current_paths)
 

	
 
    def __iter__(self):
 
        for p in self.current_paths:
 
            yield self.cs.get_node(p)
 

	
 

	
 
class AddedFileNodesGenerator(NodeGeneratorBase):
 
    """
 
    Class holding Added files for current changeset
 
    """
 
    pass
 

	
 
@@ -531,70 +532,80 @@ class DirNode(Node):
 
                return self._nodes_dict[path]
 
            elif len(paths) > 1:
 
                if self.changeset is None:
 
                    raise NodeError("Cannot access deeper "
 
                                    "nodes without changeset")
 
                else:
 
                    path1, path2 = paths[0], '/'.join(paths[1:])
 
                    return self.get_node(path1).get_node(path2)
 
            else:
 
                raise KeyError
 
        except KeyError:
 
            raise NodeError("Node does not exist at %s" % path)
 

	
 
    @LazyProperty
 
    def state(self):
 
        raise NodeError("Cannot access state of DirNode")
 

	
 
    @LazyProperty
 
    def size(self):
 
        size = 0
 
        for root, dirs, files in self.changeset.walk(self.path):
 
            for f in files:
 
                size += f.size
 

	
 
        return size
 

	
 
    def __repr__(self):
 
        return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
 
                                 self.changeset.short_id)
 

	
 

	
 
class RootNode(DirNode):
 
    """
 
    DirNode being the root node of the repository.
 
    """
 

	
 
    def __init__(self, nodes=(), changeset=None):
 
        super(RootNode, self).__init__(path='', nodes=nodes,
 
            changeset=changeset)
 

	
 
    def __repr__(self):
 
        return '<%s>' % self.__class__.__name__
 

	
 

	
 
class SubModuleNode(Node):
 
    """
 
    represents a SubModule of Git or SubRepo of Mercurial
 
    """
 
    is_binary = False
 
    size = 0
 

	
 
    def __init__(self, name, url=None, changeset=None, alias=None):
 
        self.path = name
 
        self.kind = NodeKind.SUBMODULE
 
        self.alias = alias
 
        # changeset MUST be STR !! since it can point to non-valid SCM
 
        self.changeset = str(changeset)
 
        # we have to use emptyChangeset here since this can point to svn/git/hg
 
        # submodules we cannot get from repository
 
        self.changeset = EmptyChangeset(str(changeset), alias=alias)
 
        self.url = url or self._extract_submodule_url()
 

	
 
    def __repr__(self):
 
        return '<%s %r @ %s>' % (self.__class__.__name__, self.path,
 
                                 self.changeset.short_id)
 

	
 
    def _extract_submodule_url(self):
 
        if self.alias == 'git':
 
            #TODO: find a way to parse gits submodule file and extract the
 
            # linking URL
 
            return self.path
 
        if self.alias == 'hg':
 
            return self.path
 

	
 
    @LazyProperty
 
    def name(self):
 
        """
 
        Returns name of the node so if its path
 
        then only last part is returned.
 
        """
 
        org = safe_unicode(self.path.rstrip('/').split('/')[-1])
 
        return u'%s @ %s' % (org, self.changeset[:12])
 
        return u'%s @ %s' % (org, self.changeset.short_id)
0 comments (0 inline, 0 general)