kallithea Changeset - 1f4d4b8d72f5

Changeset - 1f4d4b8d72f5

Parent rev.

Child rev.

[Not reviewed]

beta

0 2 0

Marcin Kuzminski - 13 years ago 2012-07-30 22:45:43
marcin@python-works.com

switched git_command to subprocession for non-blocking Popen.

2 files changed with 25 insertions and 13 deletions:

rhodecode/lib/subprocessio.py

rhodecode/lib/vcs/backends/git/repository.py

0 comments (0 inline, 0 general)

rhodecode/lib/subprocessio.py

➞

Show inline comments

@@ @@ -245,161 +245,164 @@ class BufferedGenerator(): @@
         '''
         return self.worker.EOF.is_set()
     @property
     def length(self):
         '''
         returns int.
         This is the lenght of the que of chunks, not the length of
         the combined contents in those chunks.
         __len__() cannot be meaningfully implemented because this
         reader is just flying throuh a bottomless pit content and
         can only know the lenght of what it already saw.
         If __len__() on WSGI server per PEP 3333 returns a value,
         the responce's length will be set to that. In order not to
         confuse WSGI PEP3333 servers, we will not implement __len__
         at all.
         '''
         return len(self.data)
     def prepend(self, x):
         self.data.appendleft(x)
     def append(self, x):
         self.data.append(x)
     def extend(self, o):
         self.data.extend(o)
     def __getitem__(self, i):
         return self.data[i]
 class SubprocessIOChunker(object):
     '''
     Processor class wrapping handling of subprocess IO.
     In a way, this is a "communicate()" replacement with a twist.
     - We are multithreaded. Writing in and reading out, err are all sep threads.
     - We support concurrent (in and out) stream processing.
     - The output is not a stream. It's a queue of read string (bytes, not unicode)
       chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
     - We are non-blocking in more respects than communicate()
       (reading from subprocess out pauses when internal buffer is full, but
        does not block the parent calling code. On the flip side, reading from
        slow-yielding subprocess may block the iteration until data shows up. This
        does not block the parallel inpipe reading occurring parallel thread.)
     The purpose of the object is to allow us to wrap subprocess interactions into
     and interable that can be passed to a WSGI server as the application's return
     value. Because of stream-processing-ability, WSGI does not have to read ALL
     of the subprocess's output and buffer it, before handing it to WSGI server for
     HTTP response. Instead, the class initializer reads just a bit of the stream
     to figure out if error ocurred or likely to occur and if not, just hands the
     further iteration over subprocess output to the server for completion of HTTP
     response.
     The real or perceived subprocess error is trapped and raised as one of
     EnvironmentError family of exceptions
     Example usage:
     #    try:
     #        answer = SubprocessIOChunker(
     #            cmd,
     #            input,
     #            buffer_size = 65536,
     #            chunk_size = 4096
     #            )
     #    except (EnvironmentError) as e:
     #        print str(e)
     #        raise e
+    #
     #    return answer
     '''
     def __init__(self, cmd, inputstream=None, buffer_size=65536,
                  chunk_size=4096, starting_values=[], **kwargs):
         '''
         Initializes SubprocessIOChunker
         :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
         :param inputstream: (Default: None) A file-like, string, or file pointer.
         :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
         :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
         :param starting_values: (Default: []) An array of strings to put in front of output que.
         '''
         if inputstream:
             input_streamer = StreamFeeder(inputstream)
             input_streamer.start()
             inputstream = input_streamer.output
         if isinstance(cmd, (list, tuple)):
             cmd = ' '.join(cmd)
         _p = subprocess.Popen(cmd,
             bufsize=-1,
             shell=True,
             stdin=inputstream,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             **kwargs
+            )
         bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size, starting_values)
         bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
         while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
             # doing this until we reach either end of file, or end of buffer.
             bg_out.data_added_event.wait(1)
             bg_out.data_added_event.clear()
         # at this point it's still ambiguous if we are done reading or just full buffer.
         # Either way, if error (returned by ended process, or implied based on
         # presence of stuff in stderr output) we error out.
         # Else, we are happy.
         _returncode = _p.poll()
         if _returncode or (_returncode == None and bg_err.length):
             try:
                 _p.terminate()
             except:
                 pass
             bg_out.stop()
             bg_err.stop()
             err = '%r' % ''.join(bg_err)
             raise EnvironmentError("Subprocess exited due to an error.\n" + err)
             err = '%s' % ''.join(bg_err)
             raise EnvironmentError("Subprocess exited due to an error:\n" + err)
         self.process = _p
         self.output = bg_out
         self.error = bg_err
     def __iter__(self):
         return self
     def next(self):
         if self.process.poll():
-            err = '%r' % ''.join(self.error)
+            err = '%s' % ''.join(self.error)
             raise EnvironmentError("Subprocess exited due to an error:\n" + err)
         return self.output.next()
     def throw(self, type, value=None, traceback=None):
         if self.output.length or not self.output.done_reading:
             raise type(value)
     def close(self):
         try:
             self.process.terminate()
         except:
             pass
         try:
             self.output.close()
         except:
             pass
         try:
             self.error.close()
         except:
             pass
     def __del__(self):
         self.close()

rhodecode/lib/vcs/backends/git/repository.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     vcs.backends.git
     ~~~~~~~~~~~~~~~~
     Git backend implementation.
     :created_on: Apr 8, 2010
     :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 """
 import os
 import re
 import time
 import posixpath
 import logging
 import traceback
 from dulwich.repo import Repo, NotGitRepository
 #from dulwich.config import ConfigFile
 from string import Template
 from subprocess import Popen, PIPE
 from rhodecode.lib.vcs.backends.base import BaseRepository
 from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 from rhodecode.lib.vcs.exceptions import RepositoryError
 from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
 from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
 from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 from rhodecode.lib.vcs.utils.lazy import LazyProperty
 from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
 from rhodecode.lib.vcs.utils.paths import abspath
 from rhodecode.lib.vcs.utils.paths import get_user_home
 from .workdir import GitWorkdir
 from .changeset import GitChangeset
 from .inmemory import GitInMemoryChangeset
 from .config import ConfigFile
 from rhodecode.lib import subprocessio
 log = logging.getLogger(__name__)
 class GitRepository(BaseRepository):
     """
     Git repository backend.
     """
     DEFAULT_BRANCH_NAME = 'master'
     scm = 'git'
     def __init__(self, repo_path, create=False, src_url=None,
                  update_after_clone=False, bare=False):
         self.path = abspath(repo_path)
         self._repo = self._get_repo(create, src_url, update_after_clone, bare)
         #temporary set that to now at later we will move it to constructor
         baseui = None
         if baseui is None:
             from mercurial.ui import ui
             baseui = ui()
         # patch the instance of GitRepo with an "FAKE" ui object to add
         # compatibility layer with Mercurial
         setattr(self._repo, 'ui', baseui)
         try:
             self.head = self._repo.head()
         except KeyError:
             self.head = None
         self._config_files = [
             bare and abspath(self.path, 'config') or abspath(self.path, '.git',
                 'config'),
             abspath(get_user_home(), '.gitconfig'),
+        ]
         self.bare = self._repo.bare
     @LazyProperty
     def revisions(self):
         """
         Returns list of revisions' ids, in ascending order.  Being lazy
         attribute allows external tools to inject shas from cache.
         """
         return self._get_all_revisions()
     def run_git_command(self, cmd):
         """
         Runs given ``cmd`` as git command and returns tuple
         (returncode, stdout, stderr).
         .. note::
            This method exists only until log/blame functionality is implemented
            at Dulwich (see https://bugs.launchpad.net/bugs/645142). Parsing
            os command's output is road to hell...
         :param cmd: git command to be executed
         """
         _copts = ['-c', 'core.quotepath=false', ]
         _str_cmd = False
         if isinstance(cmd, basestring):
             cmd = [cmd]
             _str_cmd = True
         gitenv = os.environ
         # need to clean fix GIT_DIR !
         if 'GIT_DIR' in gitenv:
             del gitenv['GIT_DIR']
         gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
         cmd = ['git'] + _copts + cmd
         if _str_cmd:
             cmd = ' '.join(cmd)
         try:
             opts = dict(
                 shell=isinstance(cmd, basestring),
                 stdout=PIPE,
                 stderr=PIPE,
                 env=gitenv,
+            )
             if os.path.isdir(self.path):
                 opts['cwd'] = self.path
             p = Popen(cmd, **opts)
         except OSError, err:
             p = subprocessio.SubprocessIOChunker(cmd, **opts)
         except (EnvironmentError, OSError), err:
             log.error(traceback.format_exc())
             raise RepositoryError("Couldn't run git command (%s).\n"
                 "Original error was:%s" % (cmd, err))
         so, se = p.communicate()
         if not se.startswith("fatal: bad default revision 'HEAD'") and \
             p.returncode != 0:
             raise RepositoryError("Couldn't run git command (%s).\n"
                 "stderr:\n%s" % (cmd, se))
         so = ''.join(p)
         se = None
         return so, se
     def _check_url(self, url):
         """
         Functon will check given url and try to verify if it's a valid
         link. Sometimes it may happened that mercurial will issue basic
         auth request that can cause whole API to hang when used from python
         or other external calls.
         On failures it'll raise urllib2.HTTPError
         """
         #TODO: implement this
         pass
     def _get_repo(self, create, src_url=None, update_after_clone=False,
             bare=False):
         if create and os.path.exists(self.path):
             raise RepositoryError("Location already exist")
         if src_url and not create:
             raise RepositoryError("Create should be set to True if src_url is "
                                   "given (clone operation creates repository)")
         try:
             if create and src_url:
                 self._check_url(src_url)
                 self.clone(src_url, update_after_clone, bare)
                 return Repo(self.path)
             elif create:
                 os.mkdir(self.path)
                 if bare:
                     return Repo.init_bare(self.path)
                 else:
                     return Repo.init(self.path)
             else:
                 return Repo(self.path)
         except (NotGitRepository, OSError), err:
             raise RepositoryError(err)
     def _get_all_revisions(self):
         # we must check if this repo is not empty, since later command
         # fails if it is. And it's cheaper to ask than throw the subprocess
         # errors
         try:
             self._repo.head()
         except KeyError:
             return []
         cmd = 'rev-list --all --reverse --date-order'
         try:
             so, se = self.run_git_command(cmd)
         except RepositoryError:
             # Can be raised for empty repositories
             return []
         return so.splitlines()
     def _get_all_revisions2(self):
         #alternate implementation using dulwich
         includes = [x[1][0] for x in self._parsed_refs.iteritems()
                     if x[1][1] != 'T']
         return [c.commit.id for c in self._repo.get_walker(include=includes)]
     def _get_revision(self, revision):
         """
         For git backend we always return integer here. This way we ensure
         that changset's revision attribute would become integer.
         """
         pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
         is_bstr = lambda o: isinstance(o, (str, unicode))
         is_null = lambda o: len(o) == revision.count('0')
         if len(self.revisions) == 0:
             raise EmptyRepositoryError("There are no changesets yet")
         if revision in (None, '', 'tip', 'HEAD', 'head', -1):
             revision = self.revisions[-1]
         if ((is_bstr(revision) and revision.isdigit() and len(revision) < 12)
             or isinstance(revision, int) or is_null(revision)):
             try:
                 revision = self.revisions[int(revision)]
             except:
                 raise ChangesetDoesNotExistError("Revision %r does not exist "
                     "for this repository %s" % (revision, self))
         elif is_bstr(revision):
             # get by branch/tag name
             _ref_revision = self._parsed_refs.get(revision)
             _tags_shas = self.tags.values()
             if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
                 return _ref_revision[0]
             # maybe it's a tag ? we don't have them in self.revisions
             elif revision in _tags_shas:
                 return _tags_shas[_tags_shas.index(revision)]
             elif not pattern.match(revision) or revision not in self.revisions:
                 raise ChangesetDoesNotExistError("Revision %r does not exist "
                     "for this repository %s" % (revision, self))
         # Ensure we return full id
         if not pattern.match(str(revision)):
             raise ChangesetDoesNotExistError("Given revision %r not recognized"
                 % revision)
         return revision
     def _get_archives(self, archive_name='tip'):
         for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
                 yield {"type": i[0], "extension": i[1], "node": archive_name}
     def _get_url(self, url):
         """
         Returns normalized url. If schema is not given, would fall to
         filesystem (``file:///``) schema.
         """
         url = str(url)
         if url != 'default' and not '://' in url:
             url = ':///'.join(('file', url))
         return url
     @LazyProperty
     def name(self):
         return os.path.basename(self.path)
     @LazyProperty
     def last_change(self):
         """
         Returns last change made on this repository as datetime object
         """
         return date_fromtimestamp(self._get_mtime(), makedate()[1])
     def _get_mtime(self):
         try:
             return time.mktime(self.get_changeset().date.timetuple())
         except RepositoryError:
             idx_loc = '' if self.bare else '.git'
             # fallback to filesystem
             in_path = os.path.join(self.path, idx_loc, "index")
             he_path = os.path.join(self.path, idx_loc, "HEAD")
             if os.path.exists(in_path):
                 return os.stat(in_path).st_mtime
             else:
                 return os.stat(he_path).st_mtime

0 comments (0 inline, 0 general)