diff --git a/rhodecode/lib/vcs/backends/base.py b/rhodecode/lib/vcs/backends/base.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/vcs/backends/base.py @@ -0,0 +1,911 @@ +# -*- coding: utf-8 -*- +""" + vcs.backends.base + ~~~~~~~~~~~~~~~~~ + + Base for all available scm backends + + :created_on: Apr 8, 2010 + :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak. +""" + + +from itertools import chain +from rhodecode.lib.vcs.utils import author_name, author_email +from rhodecode.lib.vcs.utils.lazy import LazyProperty +from rhodecode.lib.vcs.utils.helpers import get_dict_for_attrs +from rhodecode.lib.vcs.conf import settings + +from rhodecode.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, \ + NodeAlreadyAddedError, NodeAlreadyChangedError, NodeAlreadyExistsError, \ + NodeAlreadyRemovedError, NodeDoesNotExistError, NodeNotChangedError, \ + RepositoryError + + +class BaseRepository(object): + """ + Base Repository for final backends + + **Attributes** + + ``DEFAULT_BRANCH_NAME`` + name of default branch (i.e. "trunk" for svn, "master" for git etc. + + ``scm`` + alias of scm, i.e. *git* or *hg* + + ``repo`` + object from external api + + ``revisions`` + list of all available revisions' ids, in ascending order + + ``changesets`` + storage dict caching returned changesets + + ``path`` + absolute path to the repository + + ``branches`` + branches as list of changesets + + ``tags`` + tags as list of changesets + """ + scm = None + DEFAULT_BRANCH_NAME = None + EMPTY_CHANGESET = '0' * 40 + + def __init__(self, repo_path, create=False, **kwargs): + """ + Initializes repository. Raises RepositoryError if repository could + not be find at the given ``repo_path`` or directory at ``repo_path`` + exists and ``create`` is set to True. + + :param repo_path: local path of the repository + :param create=False: if set to True, would try to craete repository. + :param src_url=None: if set, should be proper url from which repository + would be cloned; requires ``create`` parameter to be set to True - + raises RepositoryError if src_url is set and create evaluates to + False + """ + raise NotImplementedError + + def __str__(self): + return '<%s at %s>' % (self.__class__.__name__, self.path) + + def __repr__(self): + return self.__str__() + + def __len__(self): + return self.count() + + @LazyProperty + def alias(self): + for k, v in settings.BACKENDS.items(): + if v.split('.')[-1] == str(self.__class__.__name__): + return k + + @LazyProperty + def name(self): + raise NotImplementedError + + @LazyProperty + def owner(self): + raise NotImplementedError + + @LazyProperty + def description(self): + raise NotImplementedError + + @LazyProperty + def size(self): + """ + Returns combined size in bytes for all repository files + """ + + size = 0 + try: + tip = self.get_changeset() + for topnode, dirs, files in tip.walk('/'): + for f in files: + size += tip.get_file_size(f.path) + for dir in dirs: + for f in files: + size += tip.get_file_size(f.path) + + except RepositoryError, e: + pass + return size + + def is_valid(self): + """ + Validates repository. + """ + raise NotImplementedError + + def get_last_change(self): + self.get_changesets() + + #========================================================================== + # CHANGESETS + #========================================================================== + + def get_changeset(self, revision=None): + """ + Returns instance of ``Changeset`` class. If ``revision`` is None, most + recent changeset is returned. + + :raises ``EmptyRepositoryError``: if there are no revisions + """ + raise NotImplementedError + + def __iter__(self): + """ + Allows Repository objects to be iterated. + + *Requires* implementation of ``__getitem__`` method. + """ + for revision in self.revisions: + yield self.get_changeset(revision) + + def get_changesets(self, start=None, end=None, start_date=None, + end_date=None, branch_name=None, reverse=False): + """ + Returns iterator of ``MercurialChangeset`` objects from start to end + not inclusive This should behave just like a list, ie. end is not + inclusive + + :param start: None or str + :param end: None or str + :param start_date: + :param end_date: + :param branch_name: + :param reversed: + """ + raise NotImplementedError + + def __getslice__(self, i, j): + """ + Returns a iterator of sliced repository + """ + for rev in self.revisions[i:j]: + yield self.get_changeset(rev) + + def __getitem__(self, key): + return self.get_changeset(key) + + def count(self): + return len(self.revisions) + + def tag(self, name, user, revision=None, message=None, date=None, **opts): + """ + Creates and returns a tag for the given ``revision``. + + :param name: name for new tag + :param user: full username, i.e.: "Joe Doe " + :param revision: changeset id for which new tag would be created + :param message: message of the tag's commit + :param date: date of tag's commit + + :raises TagAlreadyExistError: if tag with same name already exists + """ + raise NotImplementedError + + def remove_tag(self, name, user, message=None, date=None): + """ + Removes tag with the given ``name``. + + :param name: name of the tag to be removed + :param user: full username, i.e.: "Joe Doe " + :param message: message of the tag's removal commit + :param date: date of tag's removal commit + + :raises TagDoesNotExistError: if tag with given name does not exists + """ + raise NotImplementedError + + def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False, + context=3): + """ + Returns (git like) *diff*, as plain text. Shows changes introduced by + ``rev2`` since ``rev1``. + + :param rev1: Entry point from which diff is shown. Can be + ``self.EMPTY_CHANGESET`` - in this case, patch showing all + the changes since empty state of the repository until ``rev2`` + :param rev2: Until which revision changes should be shown. + :param ignore_whitespace: If set to ``True``, would not show whitespace + changes. Defaults to ``False``. + :param context: How many lines before/after changed lines should be + shown. Defaults to ``3``. + """ + raise NotImplementedError + + # ========== # + # COMMIT API # + # ========== # + + @LazyProperty + def in_memory_changeset(self): + """ + Returns ``InMemoryChangeset`` object for this repository. + """ + raise NotImplementedError + + def add(self, filenode, **kwargs): + """ + Commit api function that will add given ``FileNode`` into this + repository. + + :raises ``NodeAlreadyExistsError``: if there is a file with same path + already in repository + :raises ``NodeAlreadyAddedError``: if given node is already marked as + *added* + """ + raise NotImplementedError + + def remove(self, filenode, **kwargs): + """ + Commit api function that will remove given ``FileNode`` into this + repository. + + :raises ``EmptyRepositoryError``: if there are no changesets yet + :raises ``NodeDoesNotExistError``: if there is no file with given path + """ + raise NotImplementedError + + def commit(self, message, **kwargs): + """ + Persists current changes made on this repository and returns newly + created changeset. + + :raises ``NothingChangedError``: if no changes has been made + """ + raise NotImplementedError + + def get_state(self): + """ + Returns dictionary with ``added``, ``changed`` and ``removed`` lists + containing ``FileNode`` objects. + """ + raise NotImplementedError + + def get_config_value(self, section, name, config_file=None): + """ + Returns configuration value for a given [``section``] and ``name``. + + :param section: Section we want to retrieve value from + :param name: Name of configuration we want to retrieve + :param config_file: A path to file which should be used to retrieve + configuration from (might also be a list of file paths) + """ + raise NotImplementedError + + def get_user_name(self, config_file=None): + """ + Returns user's name from global configuration file. + + :param config_file: A path to file which should be used to retrieve + configuration from (might also be a list of file paths) + """ + raise NotImplementedError + + def get_user_email(self, config_file=None): + """ + Returns user's email from global configuration file. + + :param config_file: A path to file which should be used to retrieve + configuration from (might also be a list of file paths) + """ + raise NotImplementedError + + # =========== # + # WORKDIR API # + # =========== # + + @LazyProperty + def workdir(self): + """ + Returns ``Workdir`` instance for this repository. + """ + raise NotImplementedError + + +class BaseChangeset(object): + """ + Each backend should implement it's changeset representation. + + **Attributes** + + ``repository`` + repository object within which changeset exists + + ``id`` + may be ``raw_id`` or i.e. for mercurial's tip just ``tip`` + + ``raw_id`` + raw changeset representation (i.e. full 40 length sha for git + backend) + + ``short_id`` + shortened (if apply) version of ``raw_id``; it would be simple + shortcut for ``raw_id[:12]`` for git/mercurial backends or same + as ``raw_id`` for subversion + + ``revision`` + revision number as integer + + ``files`` + list of ``FileNode`` (``Node`` with NodeKind.FILE) objects + + ``dirs`` + list of ``DirNode`` (``Node`` with NodeKind.DIR) objects + + ``nodes`` + combined list of ``Node`` objects + + ``author`` + author of the changeset, as unicode + + ``message`` + message of the changeset, as unicode + + ``parents`` + list of parent changesets + + ``last`` + ``True`` if this is last changeset in repository, ``False`` + otherwise; trying to access this attribute while there is no + changesets would raise ``EmptyRepositoryError`` + """ + def __str__(self): + return '<%s at %s:%s>' % (self.__class__.__name__, self.revision, + self.short_id) + + def __repr__(self): + return self.__str__() + + def __unicode__(self): + return u'%s:%s' % (self.revision, self.short_id) + + def __eq__(self, other): + return self.raw_id == other.raw_id + + @LazyProperty + def last(self): + if self.repository is None: + raise ChangesetError("Cannot check if it's most recent revision") + return self.raw_id == self.repository.revisions[-1] + + @LazyProperty + def parents(self): + """ + Returns list of parents changesets. + """ + raise NotImplementedError + + @LazyProperty + def id(self): + """ + Returns string identifying this changeset. + """ + raise NotImplementedError + + @LazyProperty + def raw_id(self): + """ + Returns raw string identifying this changeset. + """ + raise NotImplementedError + + @LazyProperty + def short_id(self): + """ + Returns shortened version of ``raw_id`` attribute, as string, + identifying this changeset, useful for web representation. + """ + raise NotImplementedError + + @LazyProperty + def revision(self): + """ + Returns integer identifying this changeset. + + """ + raise NotImplementedError + + @LazyProperty + def author(self): + """ + Returns Author for given commit + """ + + raise NotImplementedError + + @LazyProperty + def author_name(self): + """ + Returns Author name for given commit + """ + + return author_name(self.author) + + @LazyProperty + def author_email(self): + """ + Returns Author email address for given commit + """ + + return author_email(self.author) + + def get_file_mode(self, path): + """ + Returns stat mode of the file at the given ``path``. + """ + raise NotImplementedError + + def get_file_content(self, path): + """ + Returns content of the file at the given ``path``. + """ + raise NotImplementedError + + def get_file_size(self, path): + """ + Returns size of the file at the given ``path``. + """ + raise NotImplementedError + + def get_file_changeset(self, path): + """ + Returns last commit of the file at the given ``path``. + """ + raise NotImplementedError + + def get_file_history(self, path): + """ + Returns history of file as reversed list of ``Changeset`` objects for + which file at given ``path`` has been modified. + """ + raise NotImplementedError + + def get_nodes(self, path): + """ + Returns combined ``DirNode`` and ``FileNode`` objects list representing + state of changeset at the given ``path``. + + :raises ``ChangesetError``: if node at the given ``path`` is not + instance of ``DirNode`` + """ + raise NotImplementedError + + def get_node(self, path): + """ + Returns ``Node`` object from the given ``path``. + + :raises ``NodeDoesNotExistError``: if there is no node at the given + ``path`` + """ + raise NotImplementedError + + def fill_archive(self, stream=None, kind='tgz', prefix=None): + """ + Fills up given stream. + + :param stream: file like object. + :param kind: one of following: ``zip``, ``tar``, ``tgz`` + or ``tbz2``. Default: ``tgz``. + :param prefix: name of root directory in archive. + Default is repository name and changeset's raw_id joined with dash. + + repo-tip. + """ + + raise NotImplementedError + + def get_chunked_archive(self, **kwargs): + """ + Returns iterable archive. Tiny wrapper around ``fill_archive`` method. + + :param chunk_size: extra parameter which controls size of returned + chunks. Default:8k. + """ + + chunk_size = kwargs.pop('chunk_size', 8192) + stream = kwargs.get('stream') + self.fill_archive(**kwargs) + while True: + data = stream.read(chunk_size) + if not data: + break + yield data + + @LazyProperty + def root(self): + """ + Returns ``RootNode`` object for this changeset. + """ + return self.get_node('') + + def next(self, branch=None): + """ + Returns next changeset from current, if branch is gives it will return + next changeset belonging to this branch + + :param branch: show changesets within the given named branch + """ + raise NotImplementedError + + def prev(self, branch=None): + """ + Returns previous changeset from current, if branch is gives it will + return previous changeset belonging to this branch + + :param branch: show changesets within the given named branch + """ + raise NotImplementedError + + @LazyProperty + def added(self): + """ + Returns list of added ``FileNode`` objects. + """ + raise NotImplementedError + + @LazyProperty + def changed(self): + """ + Returns list of modified ``FileNode`` objects. + """ + raise NotImplementedError + + @LazyProperty + def removed(self): + """ + Returns list of removed ``FileNode`` objects. + """ + raise NotImplementedError + + @LazyProperty + def size(self): + """ + Returns total number of bytes from contents of all filenodes. + """ + return sum((node.size for node in self.get_filenodes_generator())) + + def walk(self, topurl=''): + """ + Similar to os.walk method. Insted of filesystem it walks through + changeset starting at given ``topurl``. Returns generator of tuples + (topnode, dirnodes, filenodes). + """ + topnode = self.get_node(topurl) + yield (topnode, topnode.dirs, topnode.files) + for dirnode in topnode.dirs: + for tup in self.walk(dirnode.path): + yield tup + + def get_filenodes_generator(self): + """ + Returns generator that yields *all* file nodes. + """ + for topnode, dirs, files in self.walk(): + for node in files: + yield node + + def as_dict(self): + """ + Returns dictionary with changeset's attributes and their values. + """ + data = get_dict_for_attrs(self, ['id', 'raw_id', 'short_id', + 'revision', 'date', 'message']) + data['author'] = {'name': self.author_name, 'email': self.author_email} + data['added'] = [node.path for node in self.added] + data['changed'] = [node.path for node in self.changed] + data['removed'] = [node.path for node in self.removed] + return data + + +class BaseWorkdir(object): + """ + Working directory representation of single repository. + + :attribute: repository: repository object of working directory + """ + + def __init__(self, repository): + self.repository = repository + + def get_branch(self): + """ + Returns name of current branch. + """ + raise NotImplementedError + + def get_changeset(self): + """ + Returns current changeset. + """ + raise NotImplementedError + + def get_added(self): + """ + Returns list of ``FileNode`` objects marked as *new* in working + directory. + """ + raise NotImplementedError + + def get_changed(self): + """ + Returns list of ``FileNode`` objects *changed* in working directory. + """ + raise NotImplementedError + + def get_removed(self): + """ + Returns list of ``RemovedFileNode`` objects marked as *removed* in + working directory. + """ + raise NotImplementedError + + def get_untracked(self): + """ + Returns list of ``FileNode`` objects which are present within working + directory however are not tracked by repository. + """ + raise NotImplementedError + + def get_status(self): + """ + Returns dict with ``added``, ``changed``, ``removed`` and ``untracked`` + lists. + """ + raise NotImplementedError + + def commit(self, message, **kwargs): + """ + Commits local (from working directory) changes and returns newly + created + ``Changeset``. Updates repository's ``revisions`` list. + + :raises ``CommitError``: if any error occurs while committing + """ + raise NotImplementedError + + def update(self, revision=None): + """ + Fetches content of the given revision and populates it within working + directory. + """ + raise NotImplementedError + + def checkout_branch(self, branch=None): + """ + Checks out ``branch`` or the backend's default branch. + + Raises ``BranchDoesNotExistError`` if the branch does not exist. + """ + raise NotImplementedError + + +class BaseInMemoryChangeset(object): + """ + Represents differences between repository's state (most recent head) and + changes made *in place*. + + **Attributes** + + ``repository`` + repository object for this in-memory-changeset + + ``added`` + list of ``FileNode`` objects marked as *added* + + ``changed`` + list of ``FileNode`` objects marked as *changed* + + ``removed`` + list of ``FileNode`` or ``RemovedFileNode`` objects marked to be + *removed* + + ``parents`` + list of ``Changeset`` representing parents of in-memory changeset. + Should always be 2-element sequence. + + """ + + def __init__(self, repository): + self.repository = repository + self.added = [] + self.changed = [] + self.removed = [] + self.parents = [] + + def add(self, *filenodes): + """ + Marks given ``FileNode`` objects as *to be committed*. + + :raises ``NodeAlreadyExistsError``: if node with same path exists at + latest changeset + :raises ``NodeAlreadyAddedError``: if node with same path is already + marked as *added* + """ + # Check if not already marked as *added* first + for node in filenodes: + if node.path in (n.path for n in self.added): + raise NodeAlreadyAddedError("Such FileNode %s is already " + "marked for addition" % node.path) + for node in filenodes: + self.added.append(node) + + def change(self, *filenodes): + """ + Marks given ``FileNode`` objects to be *changed* in next commit. + + :raises ``EmptyRepositoryError``: if there are no changesets yet + :raises ``NodeAlreadyExistsError``: if node with same path is already + marked to be *changed* + :raises ``NodeAlreadyRemovedError``: if node with same path is already + marked to be *removed* + :raises ``NodeDoesNotExistError``: if node doesn't exist in latest + changeset + :raises ``NodeNotChangedError``: if node hasn't really be changed + """ + for node in filenodes: + if node.path in (n.path for n in self.removed): + raise NodeAlreadyRemovedError("Node at %s is already marked " + "as removed" % node.path) + try: + self.repository.get_changeset() + except EmptyRepositoryError: + raise EmptyRepositoryError("Nothing to change - try to *add* new " + "nodes rather than changing them") + for node in filenodes: + if node.path in (n.path for n in self.changed): + raise NodeAlreadyChangedError("Node at '%s' is already " + "marked as changed" % node.path) + self.changed.append(node) + + def remove(self, *filenodes): + """ + Marks given ``FileNode`` (or ``RemovedFileNode``) objects to be + *removed* in next commit. + + :raises ``NodeAlreadyRemovedError``: if node has been already marked to + be *removed* + :raises ``NodeAlreadyChangedError``: if node has been already marked to + be *changed* + """ + for node in filenodes: + if node.path in (n.path for n in self.removed): + raise NodeAlreadyRemovedError("Node is already marked to " + "for removal at %s" % node.path) + if node.path in (n.path for n in self.changed): + raise NodeAlreadyChangedError("Node is already marked to " + "be changed at %s" % node.path) + # We only mark node as *removed* - real removal is done by + # commit method + self.removed.append(node) + + def reset(self): + """ + Resets this instance to initial state (cleans ``added``, ``changed`` + and ``removed`` lists). + """ + self.added = [] + self.changed = [] + self.removed = [] + self.parents = [] + + def get_ipaths(self): + """ + Returns generator of paths from nodes marked as added, changed or + removed. + """ + for node in chain(self.added, self.changed, self.removed): + yield node.path + + def get_paths(self): + """ + Returns list of paths from nodes marked as added, changed or removed. + """ + return list(self.get_ipaths()) + + def check_integrity(self, parents=None): + """ + Checks in-memory changeset's integrity. Also, sets parents if not + already set. + + :raises CommitError: if any error occurs (i.e. + ``NodeDoesNotExistError``). + """ + if not self.parents: + parents = parents or [] + if len(parents) == 0: + try: + parents = [self.repository.get_changeset(), None] + except EmptyRepositoryError: + parents = [None, None] + elif len(parents) == 1: + parents += [None] + self.parents = parents + + # Local parents, only if not None + parents = [p for p in self.parents if p] + + # Check nodes marked as added + for p in parents: + for node in self.added: + try: + p.get_node(node.path) + except NodeDoesNotExistError: + pass + else: + raise NodeAlreadyExistsError("Node at %s already exists " + "at %s" % (node.path, p)) + + # Check nodes marked as changed + missing = set(self.changed) + not_changed = set(self.changed) + if self.changed and not parents: + raise NodeDoesNotExistError(str(self.changed[0].path)) + for p in parents: + for node in self.changed: + try: + old = p.get_node(node.path) + missing.remove(node) + if old.content != node.content: + not_changed.remove(node) + except NodeDoesNotExistError: + pass + if self.changed and missing: + raise NodeDoesNotExistError("Node at %s is missing " + "(parents: %s)" % (node.path, parents)) + + if self.changed and not_changed: + raise NodeNotChangedError("Node at %s wasn't actually changed " + "since parents' changesets: %s" % (not_changed.pop().path, + parents) + ) + + # Check nodes marked as removed + if self.removed and not parents: + raise NodeDoesNotExistError("Cannot remove node at %s as there " + "were no parents specified" % self.removed[0].path) + really_removed = set() + for p in parents: + for node in self.removed: + try: + p.get_node(node.path) + really_removed.add(node) + except ChangesetError: + pass + not_removed = set(self.removed) - really_removed + if not_removed: + raise NodeDoesNotExistError("Cannot remove node at %s from " + "following parents: %s" % (not_removed[0], parents)) + + def commit(self, message, author, parents=None, branch=None, date=None, + **kwargs): + """ + Performs in-memory commit (doesn't check workdir in any way) and + returns newly created ``Changeset``. Updates repository's + ``revisions``. + + .. note:: + While overriding this method each backend's should call + ``self.check_integrity(parents)`` in the first place. + + :param message: message of the commit + :param author: full username, i.e. "Joe Doe " + :param parents: single parent or sequence of parents from which commit + would be derieved + :param date: ``datetime.datetime`` instance. Defaults to + ``datetime.datetime.now()``. + :param branch: branch name, as string. If none given, default backend's + branch would be used. + + :raises ``CommitError``: if any error occurs while committing + """ + raise NotImplementedError