Changeset - 2654edfb1700
[Not reviewed]
beta
0 2 0
Marcin Kuzminski - 13 years ago 2012-10-03 20:20:16
marcin@python-works.com
fixes #591 git backend was causing encoding errors when handling binary files
- added a test case for VCS lib tests
2 files changed with 9 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/vcs/backends/git/inmemory.py
Show inline comments
 
import time
 
import datetime
 
import posixpath
 
from dulwich import objects
 
from dulwich.repo import Repo
 
from rhodecode.lib.vcs.backends.base import BaseInMemoryChangeset
 
from rhodecode.lib.vcs.exceptions import RepositoryError
 
from rhodecode.lib.vcs.utils import safe_str
 

	
 

	
 
class GitInMemoryChangeset(BaseInMemoryChangeset):
 

	
 
    def commit(self, message, author, parents=None, branch=None, date=None,
 
               **kwargs):
 
        """
 
        Performs in-memory commit (doesn't check workdir in any way) and
 
        returns newly created ``Changeset``. Updates repository's
 
        ``revisions``.
 

	
 
        :param message: message of the commit
 
        :param author: full username, i.e. "Joe Doe <joe.doe@example.com>"
 
        :param parents: single parent or sequence of parents from which commit
 
          would be derieved
 
        :param date: ``datetime.datetime`` instance. Defaults to
 
          ``datetime.datetime.now()``.
 
        :param branch: branch name, as string. If none given, default backend's
 
          branch would be used.
 

	
 
        :raises ``CommitError``: if any error occurs while committing
 
        """
 
        self.check_integrity(parents)
 

	
 
        from .repository import GitRepository
 
        if branch is None:
 
            branch = GitRepository.DEFAULT_BRANCH_NAME
 

	
 
        repo = self.repository._repo
 
        object_store = repo.object_store
 

	
 
        ENCODING = "UTF-8"
 
        DIRMOD = 040000
 

	
 
        # Create tree and populates it with blobs
 
        commit_tree = self.parents[0] and repo[self.parents[0]._commit.tree] or\
 
            objects.Tree()
 
        for node in self.added + self.changed:
 
            # Compute subdirs if needed
 
            dirpath, nodename = posixpath.split(node.path)
 
            dirnames = dirpath and dirpath.split('/') or []
 
            parent = commit_tree
 
            ancestors = [('', parent)]
 

	
 
            # Tries to dig for the deepest existing tree
 
            while dirnames:
 
                curdir = dirnames.pop(0)
 
                try:
 
                    dir_id = parent[curdir][1]
 
                except KeyError:
 
                    # put curdir back into dirnames and stops
 
                    dirnames.insert(0, curdir)
 
                    break
 
                else:
 
                    # If found, updates parent
 
                    parent = self.repository._repo[dir_id]
 
                    ancestors.append((curdir, parent))
 
            # Now parent is deepest exising tree and we need to create subtrees
 
            # Now parent is deepest existing tree and we need to create subtrees
 
            # for dirnames (in reverse order) [this only applies for nodes from added]
 
            new_trees = []
 
            blob = objects.Blob.from_string(node.content.encode(ENCODING))
 

	
 
            if not node.is_binary:
 
                content = node.content.encode(ENCODING)
 
            else:
 
                content = node.content
 
            blob = objects.Blob.from_string(content)
 

	
 
            node_path = node.name.encode(ENCODING)
 
            if dirnames:
 
                # If there are trees which should be created we need to build
 
                # them now (in reverse order)
 
                reversed_dirnames = list(reversed(dirnames))
 
                curtree = objects.Tree()
 
                curtree[node_path] = node.mode, blob.id
 
                new_trees.append(curtree)
 
                for dirname in reversed_dirnames[:-1]:
 
                    newtree = objects.Tree()
 
                    #newtree.add(DIRMOD, dirname, curtree.id)
 
                    newtree[dirname] = DIRMOD, curtree.id
 
                    new_trees.append(newtree)
 
                    curtree = newtree
 
                parent[reversed_dirnames[-1]] = DIRMOD, curtree.id
 
            else:
 
                parent.add(name=node_path, mode=node.mode, hexsha=blob.id)
 

	
 
            new_trees.append(parent)
 
            # Update ancestors
 
            for parent, tree, path in reversed([(a[1], b[1], b[0]) for a, b in
 
                zip(ancestors, ancestors[1:])]):
 
                parent[path] = DIRMOD, tree.id
 
                object_store.add_object(tree)
 

	
 
            object_store.add_object(blob)
 
            for tree in new_trees:
 
                object_store.add_object(tree)
 
        for node in self.removed:
 
            paths = node.path.split('/')
 
            tree = commit_tree
 
            trees = [tree]
 
            # Traverse deep into the forest...
 
            for path in paths:
 
                try:
 
                    obj = self.repository._repo[tree[path][1]]
 
                    if isinstance(obj, objects.Tree):
 
                        trees.append(obj)
 
                        tree = obj
 
                except KeyError:
 
                    break
 
            # Cut down the blob and all rotten trees on the way back...
 
            for path, tree in reversed(zip(paths, trees)):
 
                del tree[path]
 
                if tree:
 
                    # This tree still has elements - don't remove it or any
 
                    # of it's parents
 
                    break
 

	
 
        object_store.add_object(commit_tree)
 

	
 
        # Create commit
 
        commit = objects.Commit()
 
        commit.tree = commit_tree.id
 
        commit.parents = [p._commit.id for p in self.parents if p]
 
        commit.author = commit.committer = safe_str(author)
 
        commit.encoding = ENCODING
 
        commit.message = safe_str(message)
 

	
 
        # Compute date
 
        if date is None:
 
            date = time.time()
 
        elif isinstance(date, datetime.datetime):
 
            date = time.mktime(date.timetuple())
 

	
 
        author_time = kwargs.pop('author_time', date)
 
        commit.commit_time = int(date)
 
        commit.author_time = int(author_time)
 
        tz = time.timezone
 
        author_tz = kwargs.pop('author_timezone', tz)
 
        commit.commit_timezone = tz
 
        commit.author_timezone = author_tz
 

	
 
        object_store.add_object(commit)
 

	
 
        ref = 'refs/heads/%s' % branch
 
        repo.refs[ref] = commit.id
 
        repo.refs.set_symbolic_ref('HEAD', ref)
 

	
 
        # Update vcs repository object & recreate dulwich repo
 
        self.repository.revisions.append(commit.id)
 
        self.repository._repo = Repo(self.repository.path)
 
        # invalidate parsed refs after commit
 
        self.repository._parsed_refs = self.repository._get_parsed_refs()
 
        tip = self.repository.get_changeset()
 
        self.reset()
 
        return tip
 

	
 
    def _get_missing_trees(self, path, root_tree):
 
        """
 
        Creates missing ``Tree`` objects for the given path.
 

	
 
        :param path: path given as a string. It may be a path to a file node
 
          (i.e. ``foo/bar/baz.txt``) or directory path - in that case it must
 
          end with slash (i.e. ``foo/bar/``).
 
        :param root_tree: ``dulwich.objects.Tree`` object from which we start
 
          traversing (should be commit's root tree)
 
        """
 
        dirpath = posixpath.split(path)[0]
 
        dirs = dirpath.split('/')
 
        if not dirs or dirs == ['']:
 
            return []
 

	
 
        def get_tree_for_dir(tree, dirname):
 
            for name, mode, id in tree.iteritems():
 
                if name == dirname:
 
                    obj = self.repository._repo[id]
 
                    if isinstance(obj, objects.Tree):
 
                        return obj
 
                    else:
 
                        raise RepositoryError("Cannot create directory %s "
 
                        "at tree %s as path is occupied and is not a "
 
                        "Tree" % (dirname, tree))
 
            return None
 

	
 
        trees = []
 
        parent = root_tree
 
        for dirname in dirs:
 
            tree = get_tree_for_dir(parent, dirname)
 
            if tree is None:
 
                tree = objects.Tree()
 
                dirmode = 040000
 
                parent.add(dirmode, dirname, tree.id)
 
                parent = tree
 
            # Always append tree
 
            trees.append(tree)
 
        return trees
rhodecode/tests/vcs/test_inmemchangesets.py
Show inline comments
 
"""
 
Tests so called "in memory changesets" commit API of vcs.
 
"""
 
from __future__ import with_statement
 

	
 
from rhodecode.lib import vcs
 
import time
 
import datetime
 
from conf import SCM_TESTS, get_new_dir
 
from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 
from rhodecode.lib.vcs.exceptions import NodeAlreadyAddedError
 
from rhodecode.lib.vcs.exceptions import NodeAlreadyExistsError
 
from rhodecode.lib.vcs.exceptions import NodeAlreadyRemovedError
 
from rhodecode.lib.vcs.exceptions import NodeAlreadyChangedError
 
from rhodecode.lib.vcs.exceptions import NodeDoesNotExistError
 
from rhodecode.lib.vcs.exceptions import NodeNotChangedError
 
from rhodecode.lib.vcs.nodes import DirNode
 
from rhodecode.lib.vcs.nodes import FileNode
 
from rhodecode.lib.vcs.utils.compat import unittest
 

	
 

	
 
class InMemoryChangesetTestMixin(object):
 
    """
 
    This is a backend independent test case class which should be created
 
    with ``type`` method.
 

	
 
    It is required to set following attributes at subclass:
 

	
 
    - ``backend_alias``: alias of used backend (see ``vcs.BACKENDS``)
 
    - ``repo_path``: path to the repository which would be created for set of
 
      tests
 
    """
 

	
 
    def get_backend(self):
 
        return vcs.get_backend(self.backend_alias)
 

	
 
    def setUp(self):
 
        Backend = self.get_backend()
 
        self.repo_path = get_new_dir(str(time.time()))
 
        self.repo = Backend(self.repo_path, create=True)
 
        self.imc = self.repo.in_memory_changeset
 
        self.nodes = [
 
            FileNode('foobar', content='Foo & bar'),
 
            FileNode('foobar2', content='Foo & bar, doubled!'),
 
            FileNode('foo bar with spaces', content=''),
 
            FileNode('foo/bar/baz', content='Inside'),
 
            FileNode('foo/bar/file.bin', content='\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00;\x00\x03\x00\xfe\xff\t\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x18\x00\x00\x00\x01\x00\x00\x00\xfe\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff'),
 
        ]
 

	
 
    def test_add(self):
 
        rev_count = len(self.repo.revisions)
 
        to_add = [FileNode(node.path, content=node.content)
 
            for node in self.nodes]
 
        for node in to_add:
 
            self.imc.add(node)
 
        message = u'Added: %s' % ', '.join((node.path for node in self.nodes))
 
        author = unicode(self.__class__)
 
        changeset = self.imc.commit(message=message, author=author)
 

	
 
        newtip = self.repo.get_changeset()
 
        self.assertEqual(changeset, newtip)
 
        self.assertEqual(rev_count + 1, len(self.repo.revisions))
 
        self.assertEqual(newtip.message, message)
 
        self.assertEqual(newtip.author, author)
 
        self.assertTrue(not any((self.imc.added, self.imc.changed,
 
            self.imc.removed)))
 
        for node in to_add:
 
            self.assertEqual(newtip.get_node(node.path).content, node.content)
 

	
 
    def test_add_in_bulk(self):
 
        rev_count = len(self.repo.revisions)
 
        to_add = [FileNode(node.path, content=node.content)
 
            for node in self.nodes]
 
        self.imc.add(*to_add)
 
        message = u'Added: %s' % ', '.join((node.path for node in self.nodes))
 
        author = unicode(self.__class__)
 
        changeset = self.imc.commit(message=message, author=author)
 

	
 
        newtip = self.repo.get_changeset()
 
        self.assertEqual(changeset, newtip)
 
        self.assertEqual(rev_count + 1, len(self.repo.revisions))
 
        self.assertEqual(newtip.message, message)
 
        self.assertEqual(newtip.author, author)
 
        self.assertTrue(not any((self.imc.added, self.imc.changed,
 
            self.imc.removed)))
 
        for node in to_add:
 
            self.assertEqual(newtip.get_node(node.path).content, node.content)
 

	
 
    def test_add_actually_adds_all_nodes_at_second_commit_too(self):
 
        self.imc.add(FileNode('foo/bar/image.png', content='\0'))
 
        self.imc.add(FileNode('foo/README.txt', content='readme!'))
 
        changeset = self.imc.commit(u'Initial', u'joe.doe@example.com')
 
        self.assertTrue(isinstance(changeset.get_node('foo'), DirNode))
 
        self.assertTrue(isinstance(changeset.get_node('foo/bar'), DirNode))
 
        self.assertEqual(changeset.get_node('foo/bar/image.png').content, '\0')
 
        self.assertEqual(changeset.get_node('foo/README.txt').content, 'readme!')
 

	
 
        # commit some more files again
 
        to_add = [
 
            FileNode('foo/bar/foobaz/bar', content='foo'),
 
            FileNode('foo/bar/another/bar', content='foo'),
 
            FileNode('foo/baz.txt', content='foo'),
 
            FileNode('foobar/foobaz/file', content='foo'),
 
            FileNode('foobar/barbaz', content='foo'),
 
        ]
 
        self.imc.add(*to_add)
 
        changeset = self.imc.commit(u'Another', u'joe.doe@example.com')
 
        self.assertEqual(changeset.get_node('foo/bar/foobaz/bar').content, 'foo')
 
        self.assertEqual(changeset.get_node('foo/bar/another/bar').content, 'foo')
 
        self.assertEqual(changeset.get_node('foo/baz.txt').content, 'foo')
 
        self.assertEqual(changeset.get_node('foobar/foobaz/file').content, 'foo')
 
        self.assertEqual(changeset.get_node('foobar/barbaz').content, 'foo')
 

	
 
    def test_add_raise_already_added(self):
 
        node = FileNode('foobar', content='baz')
 
        self.imc.add(node)
 
        self.assertRaises(NodeAlreadyAddedError, self.imc.add, node)
 

	
 
    def test_check_integrity_raise_already_exist(self):
 
        node = FileNode('foobar', content='baz')
 
        self.imc.add(node)
 
        self.imc.commit(message=u'Added foobar', author=unicode(self))
 
        self.imc.add(node)
 
        self.assertRaises(NodeAlreadyExistsError, self.imc.commit,
 
            message='new message',
 
            author=str(self))
 

	
 
    def test_change(self):
 
        self.imc.add(FileNode('foo/bar/baz', content='foo'))
 
        self.imc.add(FileNode('foo/fbar', content='foobar'))
 
        tip = self.imc.commit(u'Initial', u'joe.doe@example.com')
 

	
 
        # Change node's content
 
        node = FileNode('foo/bar/baz', content='My **changed** content')
 
        self.imc.change(node)
 
        self.imc.commit(u'Changed %s' % node.path, u'joe.doe@example.com')
 

	
 
        newtip = self.repo.get_changeset()
 
        self.assertNotEqual(tip, newtip)
 
        self.assertNotEqual(tip.id, newtip.id)
 
        self.assertEqual(newtip.get_node('foo/bar/baz').content,
 
            'My **changed** content')
 

	
 
    def test_change_raise_empty_repository(self):
 
        node = FileNode('foobar')
 
        self.assertRaises(EmptyRepositoryError, self.imc.change, node)
 

	
 
    def test_check_integrity_change_raise_node_does_not_exist(self):
 
        node = FileNode('foobar', content='baz')
 
        self.imc.add(node)
 
        self.imc.commit(message=u'Added foobar', author=unicode(self))
 
        node = FileNode('not-foobar', content='')
 
        self.imc.change(node)
 
        self.assertRaises(NodeDoesNotExistError, self.imc.commit,
 
            message='Changed not existing node',
 
            author=str(self))
 

	
 
    def test_change_raise_node_already_changed(self):
 
        node = FileNode('foobar', content='baz')
 
        self.imc.add(node)
 
        self.imc.commit(message=u'Added foobar', author=unicode(self))
 
        node = FileNode('foobar', content='more baz')
 
        self.imc.change(node)
 
        self.assertRaises(NodeAlreadyChangedError, self.imc.change, node)
 

	
 
    def test_check_integrity_change_raise_node_not_changed(self):
 
        self.test_add()  # Performs first commit
 

	
 
        node = FileNode(self.nodes[0].path, content=self.nodes[0].content)
 
        self.imc.change(node)
 
        self.assertRaises(NodeNotChangedError, self.imc.commit,
 
            message=u'Trying to mark node as changed without touching it',
 
            author=unicode(self))
 

	
 
    def test_change_raise_node_already_removed(self):
 
        node = FileNode('foobar', content='baz')
 
        self.imc.add(node)
 
        self.imc.commit(message=u'Added foobar', author=unicode(self))
 
        self.imc.remove(FileNode('foobar'))
 
        self.assertRaises(NodeAlreadyRemovedError, self.imc.change, node)
 

	
 
    def test_remove(self):
 
        self.test_add()  # Performs first commit
 

	
 
        tip = self.repo.get_changeset()
 
        node = self.nodes[0]
 
        self.assertEqual(node.content, tip.get_node(node.path).content)
 
        self.imc.remove(node)
 
        self.imc.commit(message=u'Removed %s' % node.path, author=unicode(self))
 

	
 
        newtip = self.repo.get_changeset()
 
        self.assertNotEqual(tip, newtip)
 
        self.assertNotEqual(tip.id, newtip.id)
 
        self.assertRaises(NodeDoesNotExistError, newtip.get_node, node.path)
 

	
 
    def test_remove_last_file_from_directory(self):
 
        node = FileNode('omg/qwe/foo/bar', content='foobar')
 
        self.imc.add(node)
 
        self.imc.commit(u'added', u'joe doe')
 

	
 
        self.imc.remove(node)
 
        tip = self.imc.commit(u'removed', u'joe doe')
 
        self.assertRaises(NodeDoesNotExistError, tip.get_node, 'omg/qwe/foo/bar')
 

	
 
    def test_remove_raise_node_does_not_exist(self):
 
        self.imc.remove(self.nodes[0])
 
        self.assertRaises(NodeDoesNotExistError, self.imc.commit,
 
            message='Trying to remove node at empty repository',
 
            author=str(self))
 

	
 
    def test_check_integrity_remove_raise_node_does_not_exist(self):
 
        self.test_add()  # Performs first commit
 

	
 
        node = FileNode('no-such-file')
 
        self.imc.remove(node)
 
        self.assertRaises(NodeDoesNotExistError, self.imc.commit,
 
            message=u'Trying to remove not existing node',
 
            author=unicode(self))
 

	
 
    def test_remove_raise_node_already_removed(self):
 
        self.test_add() # Performs first commit
 

	
 
        node = FileNode(self.nodes[0].path)
 
        self.imc.remove(node)
 
        self.assertRaises(NodeAlreadyRemovedError, self.imc.remove, node)
 

	
 
    def test_remove_raise_node_already_changed(self):
 
        self.test_add()  # Performs first commit
 

	
 
        node = FileNode(self.nodes[0].path, content='Bending time')
 
        self.imc.change(node)
 
        self.assertRaises(NodeAlreadyChangedError, self.imc.remove, node)
 

	
 
    def test_reset(self):
 
        self.imc.add(FileNode('foo', content='bar'))
 
        #self.imc.change(FileNode('baz', content='new'))
 
        #self.imc.remove(FileNode('qwe'))
 
        self.imc.reset()
 
        self.assertTrue(not any((self.imc.added, self.imc.changed,
0 comments (0 inline, 0 general)