kallithea Changeset - 64c194492aad

Changeset - 64c194492aad

Parent rev.

Child rev.

[Not reviewed]

beta

0 3 0

Marcin Kuzminski - 13 years ago 2013-02-25 17:16:45
marcin@python-works.com

--version command should be safe, and bare no modifications
- improved subprocess calls error detection
- fixed I/O read on closed file errors

3 files changed with 28 insertions and 7 deletions:

rhodecode/lib/subprocessio.py

rhodecode/lib/utils.py

rhodecode/lib/vcs/backends/git/repository.py

0 comments (0 inline, 0 general)

rhodecode/lib/subprocessio.py

➞

Show inline comments

 '''
 Module provides a class allowing to wrap communication over subprocess.Popen
 input, output, error streams into a meaningfull, non-blocking, concurrent
 stream processor exposing the output data as an iterator fitting to be a
 return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
 Copyright (c) 2011  Daniel Dotsenko <dotsa@hotmail.com>
 This file is part of git_http_backend.py Project.
 git_http_backend.py Project is free software: you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public License as
 published by the Free Software Foundation, either version 2.1 of the License,
 or (at your option) any later version.
 git_http_backend.py Project is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU Lesser General Public License for more details.
 You should have received a copy of the GNU Lesser General Public License
 along with git_http_backend.py Project.
 If not, see <http://www.gnu.org/licenses/>.
 '''
 import os
 import subprocess
 from rhodecode.lib.compat import deque, Event, Thread, _bytes, _bytearray
 class StreamFeeder(Thread):
     """
     Normal writing into pipe-like is blocking once the buffer is filled.
     This thread allows a thread to seep data from a file-like into a pipe
     without blocking the main thread.
     We close inpipe once the end of the source stream is reached.
     """
     def __init__(self, source):
         super(StreamFeeder, self).__init__()
         self.daemon = True
         filelike = False
         self.bytes = _bytes()
         if type(source) in (type(''), _bytes, _bytearray):  # string-like
             self.bytes = _bytes(source)
         else:  # can be either file pointer or file-like
             if type(source) in (int, long):  # file pointer it is
                 ## converting file descriptor (int) stdin into file-like
                 try:
                     source = os.fdopen(source, 'rb', 16384)
                 except Exception:
                     pass
             # let's see if source is file-like by now
             try:
                 filelike = source.read
             except Exception:
                 pass
         if not filelike and not self.bytes:
             raise TypeError("StreamFeeder's source object must be a readable "
                             "file-like, a file descriptor, or a string-like.")
         self.source = source
         self.readiface, self.writeiface = os.pipe()
     def run(self):
         t = self.writeiface
         if self.bytes:
             os.write(t, self.bytes)
         else:
             s = self.source
             b = s.read(4096)
             while b:
                 os.write(t, b)
                 b = s.read(4096)
         os.close(t)
     @property
     def output(self):
         return self.readiface
 class InputStreamChunker(Thread):
     def __init__(self, source, target, buffer_size, chunk_size):
         super(InputStreamChunker, self).__init__()
         self.daemon = True  # die die die.
         self.source = source
         self.target = target
         self.chunk_count_max = int(buffer_size / chunk_size) + 1
         self.chunk_size = chunk_size
         self.data_added = Event()
         self.data_added.clear()
         self.keep_reading = Event()
         self.keep_reading.set()
         self.EOF = Event()
         self.EOF.clear()
         self.go = Event()
         self.go.set()
     def stop(self):
         self.go.clear()
         self.EOF.set()
         try:
             # this is not proper, but is done to force the reader thread let
             # go of the input because, if successful, .close() will send EOF
             # down the pipe.
             self.source.close()
         except:
             pass
     def run(self):
         s = self.source
         t = self.target
         cs = self.chunk_size
         ccm = self.chunk_count_max
         kr = self.keep_reading
         da = self.data_added
         go = self.go
         b = s.read(cs)
         try:
             b = s.read(cs)
         except ValueError:
             b = ''
         while b and go.is_set():
             if len(t) > ccm:
                 kr.clear()
                 kr.wait(2)
 #                # this only works on 2.7.x and up
 #                if not kr.wait(10):
 #                    raise Exception("Timed out while waiting for input to be read.")
                 # instead we'll use this
                 if len(t) > ccm + 3:
                     raise IOError("Timed out while waiting for input from subprocess.")
             t.append(b)
             da.set()
             b = s.read(cs)
         self.EOF.set()
         da.set()  # for cases when done but there was no input.
 class BufferedGenerator():
     '''
     Class behaves as a non-blocking, buffered pipe reader.
     Reads chunks of data (through a thread)
     from a blocking pipe, and attaches these to an array (Deque) of chunks.
     Reading is halted in the thread when max chunks is internally buffered.
     The .next() may operate in blocking or non-blocking fashion by yielding
     '' if no data is ready
     to be sent or by not returning until there is some data to send
     When we get EOF from underlying source pipe we raise the marker to raise
     StopIteration after the last chunk of data is yielded.
     '''
     def __init__(self, source, buffer_size=65536, chunk_size=4096,
                  starting_values=[], bottomless=False):
         if bottomless:
             maxlen = int(buffer_size / chunk_size)
         else:
             maxlen = None
         self.data = deque(starting_values, maxlen)
         self.worker = InputStreamChunker(source, self.data, buffer_size,
                                          chunk_size)
         if starting_values:
             self.worker.data_added.set()
         self.worker.start()
     ####################
     # Generator's methods
     ####################
     def __iter__(self):
         return self
     def next(self):
         while not len(self.data) and not self.worker.EOF.is_set():
             self.worker.data_added.clear()
             self.worker.data_added.wait(0.2)
         if len(self.data):
             self.worker.keep_reading.set()
             return _bytes(self.data.popleft())
         elif self.worker.EOF.is_set():
             raise StopIteration
     def throw(self, type, value=None, traceback=None):
         if not self.worker.EOF.is_set():
             raise type(value)
     def start(self):
         self.worker.start()
     def stop(self):
         self.worker.stop()
     def close(self):
         try:
             self.worker.stop()
             self.throw(GeneratorExit)
         except (GeneratorExit, StopIteration):
             pass
     def __del__(self):
         self.close()
     ####################
     # Threaded reader's infrastructure.
     ####################
     @property
     def input(self):
         return self.worker.w
     @property
     def data_added_event(self):
         return self.worker.data_added
     @property
     def data_added(self):
         return self.worker.data_added.is_set()
     @property
     def reading_paused(self):
         return not self.worker.keep_reading.is_set()
     @property
     def done_reading_event(self):
         '''
         Done_reding does not mean that the iterator's buffer is empty.
         Iterator might have done reading from underlying source, but the read
         chunks might still be available for serving through .next() method.
         @return An Event class instance.
         '''
         return self.worker.EOF
     @property
     def done_reading(self):
         '''
         Done_reding does not mean that the iterator's buffer is empty.
         Iterator might have done reading from underlying source, but the read
         chunks might still be available for serving through .next() method.
         @return An Bool value.
         '''
         return self.worker.EOF.is_set()
     @property
     def length(self):
         '''
         returns int.
         This is the lenght of the que of chunks, not the length of
         the combined contents in those chunks.
         __len__() cannot be meaningfully implemented because this
         reader is just flying throuh a bottomless pit content and
         can only know the lenght of what it already saw.
         If __len__() on WSGI server per PEP 3333 returns a value,
         the responce's length will be set to that. In order not to
         confuse WSGI PEP3333 servers, we will not implement __len__
         at all.
         '''
         return len(self.data)
     def prepend(self, x):
         self.data.appendleft(x)
     def append(self, x):
         self.data.append(x)
     def extend(self, o):
         self.data.extend(o)
     def __getitem__(self, i):
         return self.data[i]
 class SubprocessIOChunker(object):
     '''
     Processor class wrapping handling of subprocess IO.
     In a way, this is a "communicate()" replacement with a twist.
     - We are multithreaded. Writing in and reading out, err are all sep threads.
     - We support concurrent (in and out) stream processing.
     - The output is not a stream. It's a queue of read string (bytes, not unicode)
       chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
     - We are non-blocking in more respects than communicate()
       (reading from subprocess out pauses when internal buffer is full, but
        does not block the parent calling code. On the flip side, reading from
        slow-yielding subprocess may block the iteration until data shows up. This
        does not block the parallel inpipe reading occurring parallel thread.)
     The purpose of the object is to allow us to wrap subprocess interactions into
     and interable that can be passed to a WSGI server as the application's return
     value. Because of stream-processing-ability, WSGI does not have to read ALL
     of the subprocess's output and buffer it, before handing it to WSGI server for
     HTTP response. Instead, the class initializer reads just a bit of the stream
     to figure out if error ocurred or likely to occur and if not, just hands the
     further iteration over subprocess output to the server for completion of HTTP
     response.
     The real or perceived subprocess error is trapped and raised as one of
     EnvironmentError family of exceptions
     Example usage:
     #    try:
     #        answer = SubprocessIOChunker(
     #            cmd,
     #            input,
     #            buffer_size = 65536,
     #            chunk_size = 4096
     #            )
     #    except (EnvironmentError) as e:
     #        print str(e)
     #        raise e
+    #
     #    return answer
     '''
     def __init__(self, cmd, inputstream=None, buffer_size=65536,
                  chunk_size=4096, starting_values=[], **kwargs):
         '''
         Initializes SubprocessIOChunker
         :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
         :param inputstream: (Default: None) A file-like, string, or file pointer.
         :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
         :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
         :param starting_values: (Default: []) An array of strings to put in front of output que.
         '''
         if inputstream:
             input_streamer = StreamFeeder(inputstream)
             input_streamer.start()
             inputstream = input_streamer.output
         if isinstance(cmd, (list, tuple)):
             cmd = ' '.join(cmd)
         _shell = kwargs.get('shell') or True
         kwargs['shell'] = _shell
         _p = subprocess.Popen(cmd,
             bufsize=-1,
             stdin=inputstream,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             **kwargs
+            )
         bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size, starting_values)
         bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
         while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
             # doing this until we reach either end of file, or end of buffer.
             bg_out.data_added_event.wait(1)
             bg_out.data_added_event.clear()
         # at this point it's still ambiguous if we are done reading or just full buffer.
         # Either way, if error (returned by ended process, or implied based on
         # presence of stuff in stderr output) we error out.
         # Else, we are happy.
         _returncode = _p.poll()
         if _returncode or (_returncode == None and bg_err.length):
             try:
                 _p.terminate()
             except:
                 pass
             bg_out.stop()
             bg_err.stop()
             err = '%s' % ''.join(bg_err)
             raise EnvironmentError("Subprocess exited due to an error:\n" + err)
             if err:
                 raise EnvironmentError("Subprocess exited due to an error:\n" + err)
             raise EnvironmentError("Subprocess exited with non 0 ret code:%s" % _returncode)
         self.process = _p
         self.output = bg_out
         self.error = bg_err
     def __iter__(self):
         return self
     def next(self):
         if self.process.poll():
             err = '%s' % ''.join(self.error)
             raise EnvironmentError("Subprocess exited due to an error:\n" + err)
         return self.output.next()
     def throw(self, type, value=None, traceback=None):
         if self.output.length or not self.output.done_reading:
             raise type(value)
     def close(self):
         try:
             self.process.terminate()
         except:
             pass
         try:
             self.output.close()
         except:
             pass
         try:
             self.error.close()
         except:
             pass
     def __del__(self):
         self.close()

rhodecode/lib/utils.py

➞

Show inline comments

@@ @@ -367,434 +367,435 @@ def invalidate_cache(cache_key, *args): @@
     from rhodecode.model.scm import ScmModel
     if cache_key.startswith('get_repo_cached_'):
         name = cache_key.split('get_repo_cached_')[-1]
         ScmModel().mark_for_invalidation(name)
 def map_groups(path):
     """
     Given a full path to a repository, create all nested groups that this
     repo is inside. This function creates parent-child relationships between
     groups and creates default perms for all new groups.
     :param paths: full path to repository
     """
     sa = meta.Session()
     groups = path.split(Repository.url_sep())
     parent = None
     group = None
     # last element is repo in nested groups structure
     groups = groups[:-1]
     rgm = ReposGroupModel(sa)
     for lvl, group_name in enumerate(groups):
         group_name = '/'.join(groups[:lvl] + [group_name])
         group = RepoGroup.get_by_group_name(group_name)
         desc = '%s group' % group_name
         # skip folders that are now removed repos
         if REMOVED_REPO_PAT.match(group_name):
             break
         if group is None:
             log.debug('creating group level: %s group_name: %s' % (lvl,
                                                                    group_name))
             group = RepoGroup(group_name, parent)
             group.group_description = desc
             sa.add(group)
             rgm._create_default_perms(group)
             sa.flush()
         parent = group
     return group
 def repo2db_mapper(initial_repo_list, remove_obsolete=False,
                    install_git_hook=False):
     """
     maps all repos given in initial_repo_list, non existing repositories
     are created, if remove_obsolete is True it also check for db entries
     that are not in initial_repo_list and removes them.
     :param initial_repo_list: list of repositories found by scanning methods
     :param remove_obsolete: check for obsolete entries in database
     :param install_git_hook: if this is True, also check and install githook
         for a repo if missing
     """
     from rhodecode.model.repo import RepoModel
     from rhodecode.model.scm import ScmModel
     sa = meta.Session()
     rm = RepoModel()
     user = sa.query(User).filter(User.admin == True).first()
     if user is None:
         raise Exception('Missing administrative account!')
     added = []
 #    # clear cache keys
 #    log.debug("Clearing cache keys now...")
 #    CacheInvalidation.clear_cache()
 #    sa.commit()
     ##creation defaults
     defs = RhodeCodeSetting.get_default_repo_settings(strip_prefix=True)
     enable_statistics = defs.get('repo_enable_statistics')
     enable_locking = defs.get('repo_enable_locking')
     enable_downloads = defs.get('repo_enable_downloads')
     private = defs.get('repo_private')
     for name, repo in initial_repo_list.items():
         group = map_groups(name)
         db_repo = rm.get_by_repo_name(name)
         # found repo that is on filesystem not in RhodeCode database
         if not db_repo:
             log.info('repository %s not found, creating now' % name)
             added.append(name)
             desc = (repo.description
                     if repo.description != 'unknown'
                     else '%s repository' % name)
             new_repo = rm.create_repo(
                 repo_name=name,
                 repo_type=repo.alias,
                 description=desc,
                 repos_group=getattr(group, 'group_id', None),
                 owner=user,
                 just_db=True,
                 enable_locking=enable_locking,
                 enable_downloads=enable_downloads,
                 enable_statistics=enable_statistics,
                 private=private
+            )
             # we added that repo just now, and make sure it has githook
             # installed
             if new_repo.repo_type == 'git':
                 ScmModel().install_git_hook(new_repo.scm_instance)
             new_repo.update_changeset_cache()
         elif install_git_hook:
             if db_repo.repo_type == 'git':
                 ScmModel().install_git_hook(db_repo.scm_instance)
         # during starting install all cache keys for all repositories in the
         # system, this will register all repos and multiple instances
         key, _prefix, _org_key = CacheInvalidation._get_key(name)
         CacheInvalidation.invalidate(name)
         log.debug("Creating a cache key for %s, instance_id %s"
                   % (name, _prefix or 'unknown'))
     sa.commit()
     removed = []
     if remove_obsolete:
         # remove from database those repositories that are not in the filesystem
         for repo in sa.query(Repository).all():
             if repo.repo_name not in initial_repo_list.keys():
                 log.debug("Removing non-existing repository found in db `%s`" %
                           repo.repo_name)
                 try:
                     sa.delete(repo)
                     sa.commit()
                     removed.append(repo.repo_name)
                 except:
                     #don't hold further removals on error
                     log.error(traceback.format_exc())
                     sa.rollback()
     return added, removed
 # set cache regions for beaker so celery can utilise it
 def add_cache(settings):
     cache_settings = {'regions': None}
     for key in settings.keys():
         for prefix in ['beaker.cache.', 'cache.']:
             if key.startswith(prefix):
                 name = key.split(prefix)[1].strip()
                 cache_settings[name] = settings[key].strip()
     if cache_settings['regions']:
         for region in cache_settings['regions'].split(','):
             region = region.strip()
             region_settings = {}
             for key, value in cache_settings.items():
                 if key.startswith(region):
                     region_settings[key.split('.')[1]] = value
             region_settings['expire'] = int(region_settings.get('expire',
 ))
             region_settings.setdefault('lock_dir',
                                        cache_settings.get('lock_dir'))
             region_settings.setdefault('data_dir',
                                        cache_settings.get('data_dir'))
             if 'type' not in region_settings:
                 region_settings['type'] = cache_settings.get('type',
                                                              'memory')
             beaker.cache.cache_regions[region] = region_settings
 def load_rcextensions(root_path):
     import rhodecode
     from rhodecode.config import conf
     path = os.path.join(root_path, 'rcextensions', '__init__.py')
     if os.path.isfile(path):
         rcext = create_module('rc', path)
         EXT = rhodecode.EXTENSIONS = rcext
         log.debug('Found rcextensions now loading %s...' % rcext)
         # Additional mappings that are not present in the pygments lexers
         conf.LANGUAGES_EXTENSIONS_MAP.update(getattr(EXT, 'EXTRA_MAPPINGS', {}))
         #OVERRIDE OUR EXTENSIONS FROM RC-EXTENSIONS (if present)
         if getattr(EXT, 'INDEX_EXTENSIONS', []) != []:
             log.debug('settings custom INDEX_EXTENSIONS')
             conf.INDEX_EXTENSIONS = getattr(EXT, 'INDEX_EXTENSIONS', [])
         #ADDITIONAL MAPPINGS
         log.debug('adding extra into INDEX_EXTENSIONS')
         conf.INDEX_EXTENSIONS.extend(getattr(EXT, 'EXTRA_INDEX_EXTENSIONS', []))
         # auto check if the module is not missing any data, set to default if is
         # this will help autoupdate new feature of rcext module
         from rhodecode.config import rcextensions
         for k in dir(rcextensions):
             if not k.startswith('_') and not hasattr(EXT, k):
                 setattr(EXT, k, getattr(rcextensions, k))
 def get_custom_lexer(extension):
     """
     returns a custom lexer if it's defined in rcextensions module, or None
     if there's no custom lexer defined
     """
     import rhodecode
     from pygments import lexers
     #check if we didn't define this extension as other lexer
     if rhodecode.EXTENSIONS and extension in rhodecode.EXTENSIONS.EXTRA_LEXERS:
         _lexer_name = rhodecode.EXTENSIONS.EXTRA_LEXERS[extension]
         return lexers.get_lexer_by_name(_lexer_name)
 #==============================================================================
 # TEST FUNCTIONS AND CREATORS
 #==============================================================================
 def create_test_index(repo_location, config, full_index):
     """
     Makes default test index
     :param config: test config
     :param full_index:
     """
     from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
     from rhodecode.lib.pidlock import DaemonLock, LockHeld
     repo_location = repo_location
     index_location = os.path.join(config['app_conf']['index_dir'])
     if not os.path.exists(index_location):
         os.makedirs(index_location)
     try:
         l = DaemonLock(file_=jn(dn(index_location), 'make_index.lock'))
         WhooshIndexingDaemon(index_location=index_location,
                              repo_location=repo_location)\
             .run(full_index=full_index)
         l.release()
     except LockHeld:
         pass
 def create_test_env(repos_test_path, config):
     """
     Makes a fresh database and
     install test repository into tmp dir
     """
     from rhodecode.lib.db_manage import DbManage
     from rhodecode.tests import HG_REPO, GIT_REPO, TESTS_TMP_PATH
     # PART ONE create db
     dbconf = config['sqlalchemy.db1.url']
     log.debug('making test db %s' % dbconf)
     # create test dir if it doesn't exist
     if not os.path.isdir(repos_test_path):
         log.debug('Creating testdir %s' % repos_test_path)
         os.makedirs(repos_test_path)
     dbmanage = DbManage(log_sql=True, dbconf=dbconf, root=config['here'],
                         tests=True)
     dbmanage.create_tables(override=True)
     dbmanage.create_settings(dbmanage.config_prompt(repos_test_path))
     dbmanage.create_default_user()
     dbmanage.admin_prompt()
     dbmanage.create_permissions()
     dbmanage.populate_default_permissions()
     Session().commit()
     # PART TWO make test repo
     log.debug('making test vcs repositories')
     idx_path = config['app_conf']['index_dir']
     data_path = config['app_conf']['cache_dir']
     #clean index and data
     if idx_path and os.path.exists(idx_path):
         log.debug('remove %s' % idx_path)
         shutil.rmtree(idx_path)
     if data_path and os.path.exists(data_path):
         log.debug('remove %s' % data_path)
         shutil.rmtree(data_path)
     #CREATE DEFAULT TEST REPOS
     cur_dir = dn(dn(abspath(__file__)))
     tar = tarfile.open(jn(cur_dir, 'tests', "vcs_test_hg.tar.gz"))
     tar.extractall(jn(TESTS_TMP_PATH, HG_REPO))
     tar.close()
     cur_dir = dn(dn(abspath(__file__)))
     tar = tarfile.open(jn(cur_dir, 'tests', "vcs_test_git.tar.gz"))
     tar.extractall(jn(TESTS_TMP_PATH, GIT_REPO))
     tar.close()
     #LOAD VCS test stuff
     from rhodecode.tests.vcs import setup_package
     setup_package()
 #==============================================================================
 # PASTER COMMANDS
 #==============================================================================
 class BasePasterCommand(Command):
     """
     Abstract Base Class for paster commands.
     The celery commands are somewhat aggressive about loading
     celery.conf, and since our module sets the `CELERY_LOADER`
     environment variable to our loader, we have to bootstrap a bit and
     make sure we've had a chance to load the pylons config off of the
     command line, otherwise everything fails.
     """
     min_args = 1
     min_args_error = "Please provide a paster config file as an argument."
     takes_config_file = 1
     requires_config_file = True
     def notify_msg(self, msg, log=False):
         """Make a notification to user, additionally if logger is passed
         it logs this action using given logger
         :param msg: message that will be printed to user
         :param log: logging instance, to use to additionally log this message
         """
         if log and isinstance(log, logging):
             log(msg)
     def run(self, args):
         """
         Overrides Command.run
         Checks for a config file argument and loads it.
         """
         if len(args) < self.min_args:
             raise BadCommand(
                 self.min_args_error % {'min_args': self.min_args,
                                        'actual_args': len(args)})
         # Decrement because we're going to lob off the first argument.
         # @@ This is hacky
         self.min_args -= 1
         self.bootstrap_config(args[0])
         self.update_parser()
         return super(BasePasterCommand, self).run(args[1:])
     def update_parser(self):
         """
         Abstract method.  Allows for the class's parser to be updated
         before the superclass's `run` method is called.  Necessary to
         allow options/arguments to be passed through to the underlying
         celery command.
         """
         raise NotImplementedError("Abstract Method.")
     def bootstrap_config(self, conf):
         """
         Loads the pylons configuration.
         """
         from pylons import config as pylonsconfig
         self.path_to_ini_file = os.path.realpath(conf)
         conf = paste.deploy.appconfig('config:' + self.path_to_ini_file)
         pylonsconfig.init_app(conf.global_conf, conf.local_conf)
     def _init_session(self):
         """
         Inits SqlAlchemy Session
         """
         logging.config.fileConfig(self.path_to_ini_file)
         from pylons import config
         from rhodecode.model import init_model
         from rhodecode.lib.utils2 import engine_from_config
         #get to remove repos !!
         add_cache(config)
         engine = engine_from_config(config, 'sqlalchemy.db1.')
         init_model(engine)
 def check_git_version():
     """
     Checks what version of git is installed in system, and issues a warning
     if it's too old for RhodeCode to properly work.
     """
     from rhodecode import BACKENDS
     from rhodecode.lib.vcs.backends.git.repository import GitRepository
     from distutils.version import StrictVersion
     stdout, stderr = GitRepository._run_git_command('--version')
     stdout, stderr = GitRepository._run_git_command('--version', _bare=True,
                                                     _safe=True)
     ver = (stdout.split(' ')[-1] or '').strip() or '0.0.0'
     if len(ver.split('.')) > 3:
         #StrictVersion needs to be only 3 element type
         ver = '.'.join(ver.split('.')[:3])
     try:
         _ver = StrictVersion(ver)
     except:
         _ver = StrictVersion('0.0.0')
         stderr = traceback.format_exc()
     req_ver = '1.7.4'
     to_old_git = False
     if  _ver < StrictVersion(req_ver):
         to_old_git = True
     if 'git' in BACKENDS:
         log.debug('GIT version detected: %s' % stdout)
         if stderr:
             log.warning('Unable to detect git version org error was:%r' % stderr)
         elif to_old_git:
             log.warning('RhodeCode detected git version %s, which is too old '
                         'for the system to function properly. Make sure '
                         'its version is at least %s' % (ver, req_ver))
     return _ver
 @decorator.decorator
 def jsonify(func, *args, **kwargs):
     """Action decorator that formats output for JSON
     Given a function that will return content, this decorator will turn
     the result into JSON, with a content-type of 'application/json' and
     output it.
     """
     from pylons.decorators.util import get_pylons
     from rhodecode.lib.ext_json import json
     pylons = get_pylons(args)
     pylons.response.headers['Content-Type'] = 'application/json; charset=utf-8'
     data = func(*args, **kwargs)
     if isinstance(data, (list, tuple)):
         msg = "JSON responses with Array envelopes are susceptible to " \
               "cross-site data leak attacks, see " \
               "http://wiki.pylonshq.com/display/pylonsfaq/Warnings"
         warnings.warn(msg, Warning, 2)
         log.warning(msg)
     log.debug("Returning JSON wrapped action output")
     return json.dumps(data, encoding='utf-8')

rhodecode/lib/vcs/backends/git/repository.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 """
     vcs.backends.git
     ~~~~~~~~~~~~~~~~
     Git backend implementation.
     :created_on: Apr 8, 2010
     :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 """
 import os
 import re
 import time
 import posixpath
 import logging
 import traceback
 import urllib
 import urllib2
 from dulwich.repo import Repo, NotGitRepository
 from dulwich.objects import Tag
 from string import Template
 import rhodecode
 from rhodecode.lib.vcs.backends.base import BaseRepository
 from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
 from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
 from rhodecode.lib.vcs.exceptions import RepositoryError
 from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
 from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
 from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
 from rhodecode.lib.vcs.utils.lazy import LazyProperty, ThreadLocalLazyProperty
 from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
 from rhodecode.lib.vcs.utils.paths import abspath
 from rhodecode.lib.vcs.utils.paths import get_user_home
 from .workdir import GitWorkdir
 from .changeset import GitChangeset
 from .inmemory import GitInMemoryChangeset
 from .config import ConfigFile
 from rhodecode.lib import subprocessio
 log = logging.getLogger(__name__)
 class GitRepository(BaseRepository):
     """
     Git repository backend.
     """
     DEFAULT_BRANCH_NAME = 'master'
     scm = 'git'
     def __init__(self, repo_path, create=False, src_url=None,
                  update_after_clone=False, bare=False):
         self.path = abspath(repo_path)
         repo = self._get_repo(create, src_url, update_after_clone, bare)
         self.bare = repo.bare
         self._config_files = [
             bare and abspath(self.path, 'config')
                      or abspath(self.path, '.git', 'config'),
             abspath(get_user_home(), '.gitconfig'),
+        ]
     @ThreadLocalLazyProperty
     def _repo(self):
         repo = Repo(self.path)
         #temporary set that to now at later we will move it to constructor
         baseui = None
         if baseui is None:
             from mercurial.ui import ui
             baseui = ui()
         # patch the instance of GitRepo with an "FAKE" ui object to add
         # compatibility layer with Mercurial
         setattr(repo, 'ui', baseui)
         return repo
     @property
     def head(self):
         try:
             return self._repo.head()
         except KeyError:
             return None
     @LazyProperty
     def revisions(self):
         """
         Returns list of revisions' ids, in ascending order.  Being lazy
         attribute allows external tools to inject shas from cache.
         """
         return self._get_all_revisions()
     @classmethod
     def _run_git_command(cls, cmd, **opts):
         """
         Runs given ``cmd`` as git command and returns tuple
         (stdout, stderr).
         :param cmd: git command to be executed
         :param opts: env options to pass into Subprocess command
         """
         _copts = ['-c', 'core.quotepath=false', ]
         if '_bare' in opts:
             _copts = []
             del opts['_bare']
         else:
             _copts = ['-c', 'core.quotepath=false', ]
         safe_call = False
         if '_safe' in opts:
             #no exc on failure
             del opts['_safe']
             safe_call = True
         _str_cmd = False
         if isinstance(cmd, basestring):
             cmd = [cmd]
             _str_cmd = True
         gitenv = os.environ
         # need to clean fix GIT_DIR !
         if 'GIT_DIR' in gitenv:
             del gitenv['GIT_DIR']
         gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
         _git_path = rhodecode.CONFIG.get('git_path', 'git')
         cmd = [_git_path] + _copts + cmd
         if _str_cmd:
             cmd = ' '.join(cmd)
         try:
             _opts = dict(
                 env=gitenv,
                 shell=False,
+            )
             _opts.update(opts)
             p = subprocessio.SubprocessIOChunker(cmd, **_opts)
         except (EnvironmentError, OSError), err:
             log.error(traceback.format_exc())
             raise RepositoryError("Couldn't run git command (%s).\n"
                                   "Original error was:%s" % (cmd, err))
             tb_err = ("Couldn't run git command (%s).\n"
                       "Original error was:%s\n" % (cmd, err))
             log.error(tb_err)
             if safe_call:
                 return '', err
             else:
                 raise RepositoryError(tb_err)
         return ''.join(p.output), ''.join(p.error)
     def run_git_command(self, cmd):
         opts = {}
         if os.path.isdir(self.path):
             opts['cwd'] = self.path
         return self._run_git_command(cmd, **opts)
     @classmethod
     def _check_url(cls, url):
         """
         Functon will check given url and try to verify if it's a valid
         link. Sometimes it may happened that mercurial will issue basic
         auth request that can cause whole API to hang when used from python
         or other external calls.
         On failures it'll raise urllib2.HTTPError
         """
         from mercurial.util import url as Url
         # those authnadlers are patched for python 2.6.5 bug an
         # infinit looping when given invalid resources
         from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
         # check first if it's not an local url
         if os.path.isdir(url) or url.startswith('file:'):
             return True
         if('+' in url[:url.find('://')]):
             url = url[url.find('+') + 1:]
         handlers = []
         test_uri, authinfo = Url(url).authinfo()
         if not test_uri.endswith('info/refs'):
             test_uri = test_uri.rstrip('/') + '/info/refs'
         if authinfo:
             #create a password manager
             passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
             passmgr.add_password(*authinfo)
             handlers.extend((httpbasicauthhandler(passmgr),
                              httpdigestauthhandler(passmgr)))
         o = urllib2.build_opener(*handlers)
         o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
         q = {"service": 'git-upload-pack'}
         qs = '?%s' % urllib.urlencode(q)
         cu = "%s%s" % (test_uri, qs)
         req = urllib2.Request(cu, None, {})
         try:
             resp = o.open(req)
             return resp.code == 200
         except Exception, e:
             # means it cannot be cloned
             raise urllib2.URLError("[%s] %s" % (url, e))
     def _get_repo(self, create, src_url=None, update_after_clone=False,
                   bare=False):
         if create and os.path.exists(self.path):
             raise RepositoryError("Location already exist")
         if src_url and not create:
             raise RepositoryError("Create should be set to True if src_url is "
                                   "given (clone operation creates repository)")
         try:
             if create and src_url:
                 GitRepository._check_url(src_url)
                 self.clone(src_url, update_after_clone, bare)
                 return Repo(self.path)
             elif create:
                 os.mkdir(self.path)
                 if bare:
                     return Repo.init_bare(self.path)
                 else:
                     return Repo.init(self.path)
             else:
                 return self._repo
         except (NotGitRepository, OSError), err:
             raise RepositoryError(err)
     def _get_all_revisions(self):
         # we must check if this repo is not empty, since later command
         # fails if it is. And it's cheaper to ask than throw the subprocess
         # errors
         try:
             self._repo.head()
         except KeyError:
             return []
         cmd = 'rev-list --all --reverse --date-order'
         try:
             so, se = self.run_git_command(cmd)
         except RepositoryError:
             # Can be raised for empty repositories
             return []
         return so.splitlines()
     def _get_all_revisions2(self):
         #alternate implementation using dulwich
         includes = [x[1][0] for x in self._parsed_refs.iteritems()
                     if x[1][1] != 'T']
         return [c.commit.id for c in self._repo.get_walker(include=includes)]
     def _get_revision(self, revision):
         """
         For git backend we always return integer here. This way we ensure
         that changset's revision attribute would become integer.
         """
         pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
         is_bstr = lambda o: isinstance(o, (str, unicode))
         is_null = lambda o: len(o) == revision.count('0')
         if len(self.revisions) == 0:
             raise EmptyRepositoryError("There are no changesets yet")
         if revision in (None, '', 'tip', 'HEAD', 'head', -1):
             revision = self.revisions[-1]
         if ((is_bstr(revision) and revision.isdigit() and len(revision) < 12)
             or isinstance(revision, int) or is_null(revision)):
             try:
                 revision = self.revisions[int(revision)]
             except:
                 raise ChangesetDoesNotExistError("Revision %r does not exist "
                     "for this repository %s" % (revision, self))
         elif is_bstr(revision):
             # get by branch/tag name
             _ref_revision = self._parsed_refs.get(revision)
             _tags_shas = self.tags.values()
             if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
                 return _ref_revision[0]
             # maybe it's a tag ? we don't have them in self.revisions
             elif revision in _tags_shas:
                 return _tags_shas[_tags_shas.index(revision)]
             elif not pattern.match(revision) or revision not in self.revisions:
                 raise ChangesetDoesNotExistError("Revision %r does not exist "
                     "for this repository %s" % (revision, self))
         # Ensure we return full id
         if not pattern.match(str(revision)):
             raise ChangesetDoesNotExistError("Given revision %r not recognized"
                 % revision)
         return revision
     def _get_archives(self, archive_name='tip'):
         for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
                 yield {"type": i[0], "extension": i[1], "node": archive_name}
     def _get_url(self, url):
         """
         Returns normalized url. If schema is not given, would fall to
         filesystem (``file:///``) schema.
         """
         url = str(url)
         if url != 'default' and not '://' in url:
             url = ':///'.join(('file', url))
         return url
     @LazyProperty
     def name(self):
         return os.path.basename(self.path)
     @LazyProperty
     def last_change(self):
         """
         Returns last change made on this repository as datetime object
         """
         return date_fromtimestamp(self._get_mtime(), makedate()[1])
     def _get_mtime(self):
         try:
             return time.mktime(self.get_changeset().date.timetuple())
         except RepositoryError:
             idx_loc = '' if self.bare else '.git'
             # fallback to filesystem
             in_path = os.path.join(self.path, idx_loc, "index")
             he_path = os.path.join(self.path, idx_loc, "HEAD")
             if os.path.exists(in_path):
                 return os.stat(in_path).st_mtime
             else:
                 return os.stat(he_path).st_mtime
     @LazyProperty
     def description(self):
         idx_loc = '' if self.bare else '.git'
         undefined_description = u'unknown'
         description_path = os.path.join(self.path, idx_loc, 'description')
         if os.path.isfile(description_path):
             return safe_unicode(open(description_path).read())
         else:
             return undefined_description
     @LazyProperty
     def contact(self):
         undefined_contact = u'Unknown'
         return undefined_contact
     @property
     def branches(self):
         if not self.revisions:
             return {}
         sortkey = lambda ctx: ctx[0]
         _branches = [(x[0], x[1][0])
                      for x in self._parsed_refs.iteritems() if x[1][1] == 'H']
         return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
     @LazyProperty
     def tags(self):
         return self._get_tags()
     def _get_tags(self):
         if not self.revisions:
             return {}
         sortkey = lambda ctx: ctx[0]
         _tags = [(x[0], x[1][0])
                  for x in self._parsed_refs.iteritems() if x[1][1] == 'T']
         return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
     def tag(self, name, user, revision=None, message=None, date=None,
             **kwargs):
         """
         Creates and returns a tag for the given ``revision``.
         :param name: name for new tag
         :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
         :param revision: changeset id for which new tag would be created
         :param message: message of the tag's commit
         :param date: date of tag's commit
         :raises TagAlreadyExistError: if tag with same name already exists
         """
         if name in self.tags:
             raise TagAlreadyExistError("Tag %s already exists" % name)
         changeset = self.get_changeset(revision)
         message = message or "Added tag %s for commit %s" % (name,
             changeset.raw_id)
         self._repo.refs["refs/tags/%s" % name] = changeset._commit.id
         self._parsed_refs = self._get_parsed_refs()
         self.tags = self._get_tags()
         return changeset
     def remove_tag(self, name, user, message=None, date=None):
         """
         Removes tag with the given ``name``.
         :param name: name of the tag to be removed
         :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
         :param message: message of the tag's removal commit
         :param date: date of tag's removal commit
         :raises TagDoesNotExistError: if tag with given name does not exists
         """
         if name not in self.tags:
             raise TagDoesNotExistError("Tag %s does not exist" % name)
         tagpath = posixpath.join(self._repo.refs.path, 'refs', 'tags', name)
         try:
             os.remove(tagpath)
             self._parsed_refs = self._get_parsed_refs()
             self.tags = self._get_tags()
         except OSError, e:
             raise RepositoryError(e.strerror)
     @LazyProperty
     def _parsed_refs(self):
         return self._get_parsed_refs()
     def _get_parsed_refs(self):
         refs = self._repo.get_refs()
         keys = [('refs/heads/', 'H'),
                 ('refs/remotes/origin/', 'RH'),
                 ('refs/tags/', 'T')]
         _refs = {}
         for ref, sha in refs.iteritems():
             for k, type_ in keys:
                 if ref.startswith(k):
                     _key = ref[len(k):]
                     if type_ == 'T':
                         obj = self._repo.get_object(sha)
                         if isinstance(obj, Tag):
                             sha = self._repo.get_object(sha).object[1]
                     _refs[_key] = [sha, type_]
                     break
         return _refs
     def _heads(self, reverse=False):
         refs = self._repo.get_refs()
         heads = {}
         for key, val in refs.items():
             for ref_key in ['refs/heads/', 'refs/remotes/origin/']:
                 if key.startswith(ref_key):
                     n = key[len(ref_key):]
                     if n not in ['HEAD']:
                         heads[n] = val
         return heads if reverse else dict((y, x) for x, y in heads.iteritems())
     def get_changeset(self, revision=None):
         """
         Returns ``GitChangeset`` object representing commit from git repository
         at the given revision or head (most recent commit) if None given.
         """
         if isinstance(revision, GitChangeset):
             return revision
         revision = self._get_revision(revision)
         changeset = GitChangeset(repository=self, revision=revision)
         return changeset
     def get_changesets(self, start=None, end=None, start_date=None,
            end_date=None, branch_name=None, reverse=False):
         """
         Returns iterator of ``GitChangeset`` objects from start to end (both
         are inclusive), in ascending date order (unless ``reverse`` is set).
         :param start: changeset ID, as str; first returned changeset
         :param end: changeset ID, as str; last returned changeset
         :param start_date: if specified, changesets with commit date less than
           ``start_date`` would be filtered out from returned set
         :param end_date: if specified, changesets with commit date greater than
           ``end_date`` would be filtered out from returned set
         :param branch_name: if specified, changesets not reachable from given
           branch would be filtered out from returned set
         :param reverse: if ``True``, returned generator would be reversed
           (meaning that returned changesets would have descending date order)
         :raise BranchDoesNotExistError: If given ``branch_name`` does not
             exist.
         :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
           ``end`` could not be found.
         """
         if branch_name and branch_name not in self.branches:
             raise BranchDoesNotExistError("Branch '%s' not found" \
                                           % branch_name)
         # %H at format means (full) commit hash, initial hashes are retrieved
         # in ascending date order
         cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
         cmd_params = {}
         if start_date:
             cmd_template += ' --since "$since"'
             cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
         if end_date:
             cmd_template += ' --until "$until"'
             cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
         if branch_name:
             cmd_template += ' $branch_name'
             cmd_params['branch_name'] = branch_name
         else:
             cmd_template += ' --all'
         cmd = Template(cmd_template).safe_substitute(**cmd_params)
         revs = self.run_git_command(cmd)[0].splitlines()
         start_pos = 0
         end_pos = len(revs)
         if start:
             _start = self._get_revision(start)
             try:
                 start_pos = revs.index(_start)
             except ValueError:
                 pass
         if end is not None:
             _end = self._get_revision(end)
             try:
                 end_pos = revs.index(_end)
             except ValueError:
                 pass
         if None not in [start, end] and start_pos > end_pos:
             raise RepositoryError('start cannot be after end')
         if end_pos is not None:
             end_pos += 1
         revs = revs[start_pos:end_pos]
         if reverse:
             revs = reversed(revs)

0 comments (0 inline, 0 general)