Changeset - 0e2d450feb03
[Not reviewed]
default
0 8 0
Mads Kiilerich - 10 years ago 2015-06-09 22:53:24
madski@unity3d.com
git: run external commands as list of strings so we really get correct quoting (Issue #135)

a6dfd14d4b20 from
https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend
fixed that issue but did not make it "safe". The vcs git backend still used
command strings but tried to quote them correctly ... but that approach is
almost impossible to get right.

Instead, pass a string list all the way to the subprocess module and let it do
the quoting. This also makes some of the code more simple.
8 files changed with 58 insertions and 92 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/compare.py
Show inline comments
 
@@ -127,19 +127,19 @@ class CompareController(BaseRepoControll
 
                else:
 
                    # no changesets from other repo, ancestor is the other_rev
 
                    ancestor = other_rev
 

	
 
            else:
 
                so, se = org_repo.run_git_command(
 
                    'log --reverse --pretty="format: %%H" -s %s..%s'
 
                        % (org_rev, other_rev)
 
                    ['log', '--reverse', '--pretty=format:%H',
 
                     '-s', '%s..%s' % (org_rev, other_rev)]
 
                )
 
                other_changesets = [org_repo.get_changeset(cs)
 
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
                so, se = org_repo.run_git_command(
 
                    'merge-base %s %s' % (org_rev, other_rev)
 
                    ['merge-base', org_rev, other_rev]
 
                )
 
                ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
 
            org_changesets = []
 

	
 
        else:
 
            raise Exception('Bad alias only git and hg is allowed')
kallithea/lib/hooks.py
Show inline comments
 
@@ -444,27 +444,27 @@ def handle_git_receive(repo_path, revs, 
 
                if push_ref['old_rev'] == EmptyChangeset().raw_id:
 
                    # update the symbolic ref if we push new repo
 
                    if repo.is_empty():
 
                        repo._repo.refs.set_symbolic_ref('HEAD',
 
                                            'refs/heads/%s' % push_ref['name'])
 

	
 
                    cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'"
 
                    cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*']
 
                    heads = repo.run_git_command(cmd)[0]
 
                    cmd = ['log', push_ref['new_rev'],
 
                           '--reverse', '--pretty=format:%H', '--not']
 
                    heads = heads.replace(push_ref['ref'], '')
 
                    heads = ' '.join(map(lambda c: c.strip('\n').strip(),
 
                                         heads.splitlines()))
 
                    cmd = (('log %(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H" --not ' + heads)
 
                    for l in heads.splitlines():
 
                        cmd.append(l.strip())
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
                elif push_ref['new_rev'] == EmptyChangeset().raw_id:
 
                    #delete branch case
 
                    git_revs += ['delete_branch=>%s' % push_ref['name']]
 
                else:
 
                    cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H"')
 
                    cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref,
 
                           '--reverse', '--pretty=format:%H']
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
            elif _type == 'tags':
 
                git_revs += ['tag=>%s' % push_ref['name']]
 

	
 
        log_push_action(baseui, repo, _git_revs=git_revs)
kallithea/lib/middleware/pygrack.py
Show inline comments
 
@@ -79,19 +79,18 @@ class GitRepository(object):
 
        # blows up if you sprinkle "flush" (0000) as "0001\n".
 
        # It reads binary, per number of bytes specified.
 
        # if you do add '\n' as part of data, count it.
 
        server_advert = '# service=%s' % git_command
 
        packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
 
        _git_path = kallithea.CONFIG.get('git_path', 'git')
 
        cmd = [_git_path, git_command[4:],
 
               '--stateless-rpc', '--advertise-refs', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            out = subprocessio.SubprocessIOChunker(
 
                r'%s %s --stateless-rpc --advertise-refs "%s"' % (
 
                    _git_path, git_command[4:], self.content_path),
 
                starting_values=[
 
                    packet_len + server_advert + '0000'
 
                ]
 
            out = subprocessio.SubprocessIOChunker(cmd,
 
                starting_values=[packet_len + server_advert + '0000']
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
 
            raise exc.HTTPExpectationFailed()
 
        resp = Response()
 
        resp.content_type = 'application/x-%s-advertisement' % str(git_command)
 
@@ -115,27 +114,23 @@ class GitRepository(object):
 
        if 'CONTENT_LENGTH' in environ:
 
            inputstream = FileWrapper(environ['wsgi.input'],
 
                                      request.content_length)
 
        else:
 
            inputstream = environ['wsgi.input']
 

	
 
        gitenv = dict(os.environ)
 
        # forget all configs
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 
        cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            gitenv = os.environ
 
            # forget all configs
 
            gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 
            opts = dict(
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
            cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:],
 
                                                   self.content_path),
 
            log.debug('handling cmd %s' % cmd)
 
            out = subprocessio.SubprocessIOChunker(
 
                cmd,
 
                inputstream=inputstream,
 
                **opts
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
 
            raise exc.HTTPExpectationFailed()
 

	
 
        if git_command in [u'git-receive-pack']:
kallithea/lib/utils.py
Show inline comments
 
@@ -796,13 +796,13 @@ def check_git_version():
 
    from kallithea.lib.vcs.conf import settings
 
    from distutils.version import StrictVersion
 

	
 
    if 'git' not in BACKENDS:
 
        return None
 

	
 
    stdout, stderr = GitRepository._run_git_command('--version', _bare=True,
 
    stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True,
 
                                                    _safe=True)
 

	
 
    m = re.search("\d+.\d+.\d+", stdout)
 
    if m:
 
        ver = StrictVersion(m.group(0))
 
    else:
kallithea/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -186,13 +186,13 @@ class GitChangeset(BaseChangeset):
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        rev_filter = settings.GIT_REV_FILTER
 
        so, se = self.repository.run_git_command(
 
            "rev-list %s --children" % (rev_filter)
 
            ['rev-list', rev_filter, '--children']
 
        )
 

	
 
        children = []
 
        pat = re.compile(r'^%s' % self.raw_id)
 
        for l in so.splitlines():
 
            if pat.match(l):
 
@@ -285,18 +285,18 @@ class GitChangeset(BaseChangeset):
 
        """
 
        self._get_filectx(path)
 
        cs_id = safe_str(self.id)
 
        f_path = safe_str(path)
 

	
 
        if limit:
 
            cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % (
 
                      safe_int(limit, 0), cs_id, f_path)
 
            cmd = ['log', '-n', str(safe_int(limit, 0)),
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 

	
 
        else:
 
            cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % (
 
                      cs_id, f_path)
 
            cmd = ['log',
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(sha) for sha in ids]
 

	
 
    def get_file_history_2(self, path):
 
        """
 
@@ -318,13 +318,13 @@ class GitChangeset(BaseChangeset):
 
            lineno, sha, changeset lazy loader and line
 

	
 
        TODO: This function now uses os underlying 'git' command which is
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        cmd = ['blame', '-l', '--root', '-r', self.id, '--', path]
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for boundaries
 
        # -r sha ==> blames for the given revision
 
        so, se = self.repository.run_git_command(cmd)
 

	
 
        for i, blame_line in enumerate(so.split('\n')[:-1]):
 
@@ -468,14 +468,14 @@ class GitChangeset(BaseChangeset):
 
        return list(added.union(modified).union(deleted))
 

	
 
    @LazyProperty
 
    def _diff_name_status(self):
 
        output = []
 
        for parent in self.parents:
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
 
                                                                self.raw_id)
 
            cmd = ['diff', '--name-status', parent.raw_id, self.raw_id,
 
                   '--encoding=utf8']
 
            so, se = self.repository.run_git_command(cmd)
 
            output.append(so.strip())
 
        return '\n'.join(output)
 

	
 
    @LazyProperty
 
    def _changes_cache(self):
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
@@ -15,24 +15,12 @@ import time
 
import errno
 
import urllib
 
import urllib2
 
import logging
 
import posixpath
 
import string
 
import sys
 
if sys.platform == "win32":
 
    from subprocess import list2cmdline
 
    def quote(s):
 
        return list2cmdline([s])
 
else:
 
    try:
 
        # Python <=2.7
 
        from pipes import quote
 
    except ImportError:
 
        # Python 3.3+
 
        from shlex import quote
 

	
 
from dulwich.objects import Tag
 
from dulwich.repo import Repo, NotGitRepository
 
from dulwich.config import ConfigFile
 

	
 
from kallithea.lib.vcs import subprocessio
 
@@ -132,32 +120,27 @@ class GitRepository(BaseRepository):
 
        safe_call = False
 
        if '_safe' in opts:
 
            #no exc on failure
 
            del opts['_safe']
 
            safe_call = True
 

	
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 
        assert isinstance(cmd, list), cmd
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
        if 'GIT_DIR' in gitenv:
 
            del gitenv['GIT_DIR']
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        _git_path = settings.GIT_EXECUTABLE_PATH
 
        cmd = [_git_path] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 

	
 
        try:
 
            _opts = dict(
 
                env=gitenv,
 
                shell=True,
 
                shell=False,
 
            )
 
            _opts.update(opts)
 
            p = subprocessio.SubprocessIOChunker(cmd, **_opts)
 
        except (EnvironmentError, OSError), err:
 
            tb_err = ("Couldn't run git command (%s).\n"
 
                      "Original error was:%s\n" % (cmd, err))
 
@@ -265,13 +248,13 @@ class GitRepository(BaseRepository):
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 

	
 
        rev_filter = settings.GIT_REV_FILTER
 
        cmd = 'rev-list %s --reverse --date-order' % (rev_filter)
 
        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 
@@ -548,28 +531,22 @@ class GitRepository(BaseRepository):
 
        # subprocess commands
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        # %H at format means (full) commit hash, initial hashes are retrieved
 
        # in ascending date order
 
        cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
 
        cmd_params = {}
 
        cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
 
        if start_date:
 
            cmd_template += ' --since "$since"'
 
            cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if end_date:
 
            cmd_template += ' --until "$until"'
 
            cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if branch_name:
 
            cmd_template += ' $branch_name'
 
            cmd_params['branch_name'] = branch_name
 
            cmd.append(branch_name)
 
        else:
 
            rev_filter = settings.GIT_REV_FILTER
 
            cmd_template += ' %s' % (rev_filter)
 
            cmd.append(settings.GIT_REV_FILTER)
 

	
 
        cmd = string.Template(cmd_template).safe_substitute(**cmd_params)
 
        revs = self.run_git_command(cmd)[0].splitlines()
 
        start_pos = 0
 
        end_pos = len(revs)
 
        if start:
 
            _start = self._get_revision(start)
 
            try:
 
@@ -619,20 +596,20 @@ class GitRepository(BaseRepository):
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        if rev1 == self.EMPTY_CHANGESET:
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['show'] + flags + [rev2])
 
            cmd = ['show'] + flags + [rev2]
 
        else:
 
            rev1 = self.get_changeset(rev1).raw_id
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 
            cmd = ['diff'] + flags + [rev1, rev2]
 

	
 
        if path:
 
            cmd += ' -- "%s"' % path
 
            cmd += ['--', path]
 

	
 
        stdout, stderr = self.run_git_command(cmd)
 
        # TODO: don't ignore stderr
 
        # If we used 'show' command, strip first few lines (until actual diff
 
        # starts)
 
        if rev1 == self.EMPTY_CHANGESET:
 
@@ -660,39 +637,35 @@ class GitRepository(BaseRepository):
 
        url = self._get_url(url)
 
        cmd = ['clone', '-q']
 
        if bare:
 
            cmd.append('--bare')
 
        elif not update_after_clone:
 
            cmd.append('--no-checkout')
 
        cmd += ['--', quote(url), quote(self.path)]
 
        cmd = ' '.join(cmd)
 
        cmd += ['--', url, self.path]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def pull(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        cmd = ['pull', "--ff-only", quote(url)]
 
        cmd = ' '.join(cmd)
 
        cmd = ['pull', '--ff-only', url]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def fetch(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        so, se = self.run_git_command('ls-remote -h %s' % quote(url))
 
        refs = []
 
        so, se = self.run_git_command(['ls-remote', '-h', url])
 
        cmd = ['fetch', url, '--']
 
        for line in (x for x in so.splitlines()):
 
            sha, ref = line.split('\t')
 
            refs.append(ref)
 
        refs = ' '.join(('+%s:%s' % (r, r) for r in refs))
 
        cmd = '''fetch %s -- %s''' % (quote(url), refs)
 
            cmd.append('+%s:%s' % (ref, ref))
 
        self.run_git_command(cmd)
 

	
 
    def _update_server_info(self):
 
        """
 
        runs gits update-server-info command in this repo instance
 
        """
kallithea/lib/vcs/subprocessio.py
Show inline comments
 
@@ -339,17 +339,15 @@ class SubprocessIOChunker(object):
 

	
 
        if inputstream:
 
            input_streamer = StreamFeeder(inputstream)
 
            input_streamer.start()
 
            inputstream = input_streamer.output
 

	
 
        _shell = kwargs.get('shell', True)
 
        if isinstance(cmd, (list, tuple)):
 
            cmd = ' '.join(cmd)
 
        # Note: fragile cmd mangling has been removed for use in Kallithea
 
        assert isinstance(cmd, list), cmd
 

	
 
        kwargs['shell'] = _shell
 
        _p = subprocess.Popen(cmd, bufsize=-1,
 
                              stdin=inputstream,
 
                              stdout=subprocess.PIPE,
 
                              stderr=subprocess.PIPE,
 
                              **kwargs)
 

	
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -679,40 +679,40 @@ class GitSpecificWithRepoTest(_BackendTe
 
    def test_paths_fast_traversing(self):
 
        cs = self.repo.get_changeset()
 
        self.assertEqual(cs.get_node('foobar/static/js/admin/base.js').content,
 
            'base')
 

	
 
    def test_workdir_get_branch(self):
 
        self.repo.run_git_command('checkout -b production')
 
        self.repo.run_git_command(['checkout', '-b', 'production'])
 
        # Regression test: one of following would fail if we don't check
 
        # .git/HEAD file
 
        self.repo.run_git_command('checkout production')
 
        self.repo.run_git_command(['checkout', 'production'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'production')
 
        self.repo.run_git_command('checkout master')
 
        self.repo.run_git_command(['checkout', 'master'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'master')
 

	
 
    def test_get_diff_runs_git_command_with_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' %
 
            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_str_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
            'show -U%s --full-index --binary -p -M --abbrev=40 %s' %
 
            (3, self.repo._get_revision(1)))
 
            ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_path_if_its_given(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1, 'foo')
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"'
 
            % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo'])
 

	
 

	
 
class GitRegressionTest(_BackendTestMixin, unittest.TestCase):
 
    backend_alias = 'git'
 

	
 
    @classmethod
0 comments (0 inline, 0 general)