Changeset - 0e2d450feb03
[Not reviewed]
default
0 8 0
Mads Kiilerich - 10 years ago 2015-06-09 22:53:24
madski@unity3d.com
git: run external commands as list of strings so we really get correct quoting (Issue #135)

a6dfd14d4b20 from
https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend
fixed that issue but did not make it "safe". The vcs git backend still used
command strings but tried to quote them correctly ... but that approach is
almost impossible to get right.

Instead, pass a string list all the way to the subprocess module and let it do
the quoting. This also makes some of the code more simple.
8 files changed with 58 insertions and 92 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/compare.py
Show inline comments
 
@@ -130,13 +130,13 @@ class CompareController(BaseRepoControll
 

	
 
            else:
 
                so, se = org_repo.run_git_command(
 
                    'log --reverse --pretty="format: %%H" -s %s..%s'
 
                        % (org_rev, other_rev)
 
                    ['log', '--reverse', '--pretty=format:%H',
 
                     '-s', '%s..%s' % (org_rev, other_rev)]
 
                )
 
                other_changesets = [org_repo.get_changeset(cs)
 
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
                so, se = org_repo.run_git_command(
 
                    'merge-base %s %s' % (org_rev, other_rev)
 
                    ['merge-base', org_rev, other_rev]
 
                )
 
                ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
 
            org_changesets = []
kallithea/lib/hooks.py
Show inline comments
 
@@ -447,21 +447,21 @@ def handle_git_receive(repo_path, revs, 
 
                        repo._repo.refs.set_symbolic_ref('HEAD',
 
                                            'refs/heads/%s' % push_ref['name'])
 

	
 
                    cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'"
 
                    cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*']
 
                    heads = repo.run_git_command(cmd)[0]
 
                    cmd = ['log', push_ref['new_rev'],
 
                           '--reverse', '--pretty=format:%H', '--not']
 
                    heads = heads.replace(push_ref['ref'], '')
 
                    heads = ' '.join(map(lambda c: c.strip('\n').strip(),
 
                                         heads.splitlines()))
 
                    cmd = (('log %(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H" --not ' + heads)
 
                    for l in heads.splitlines():
 
                        cmd.append(l.strip())
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
                elif push_ref['new_rev'] == EmptyChangeset().raw_id:
 
                    #delete branch case
 
                    git_revs += ['delete_branch=>%s' % push_ref['name']]
 
                else:
 
                    cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) +
 
                           ' --reverse --pretty=format:"%H"')
 
                    cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref,
 
                           '--reverse', '--pretty=format:%H']
 
                    git_revs += repo.run_git_command(cmd)[0].splitlines()
 

	
 
            elif _type == 'tags':
kallithea/lib/middleware/pygrack.py
Show inline comments
 
@@ -82,13 +82,12 @@ class GitRepository(object):
 
        server_advert = '# service=%s' % git_command
 
        packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
 
        _git_path = kallithea.CONFIG.get('git_path', 'git')
 
        cmd = [_git_path, git_command[4:],
 
               '--stateless-rpc', '--advertise-refs', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            out = subprocessio.SubprocessIOChunker(
 
                r'%s %s --stateless-rpc --advertise-refs "%s"' % (
 
                    _git_path, git_command[4:], self.content_path),
 
                starting_values=[
 
                    packet_len + server_advert + '0000'
 
                ]
 
            out = subprocessio.SubprocessIOChunker(cmd,
 
                starting_values=[packet_len + server_advert + '0000']
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
 
@@ -118,21 +117,17 @@ class GitRepository(object):
 
        else:
 
            inputstream = environ['wsgi.input']
 

	
 
        gitenv = dict(os.environ)
 
        # forget all configs
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 
        cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path]
 
        log.debug('handling cmd %s', cmd)
 
        try:
 
            gitenv = os.environ
 
            # forget all configs
 
            gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 
            opts = dict(
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
            cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:],
 
                                                   self.content_path),
 
            log.debug('handling cmd %s' % cmd)
 
            out = subprocessio.SubprocessIOChunker(
 
                cmd,
 
                inputstream=inputstream,
 
                **opts
 
                env=gitenv,
 
                cwd=self.content_path,
 
            )
 
        except EnvironmentError, e:
 
            log.error(traceback.format_exc())
kallithea/lib/utils.py
Show inline comments
 
@@ -799,7 +799,7 @@ def check_git_version():
 
    if 'git' not in BACKENDS:
 
        return None
 

	
 
    stdout, stderr = GitRepository._run_git_command('--version', _bare=True,
 
    stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True,
 
                                                    _safe=True)
 

	
 
    m = re.search("\d+.\d+.\d+", stdout)
kallithea/lib/vcs/backends/git/changeset.py
Show inline comments
 
@@ -189,7 +189,7 @@ class GitChangeset(BaseChangeset):
 
        """
 
        rev_filter = settings.GIT_REV_FILTER
 
        so, se = self.repository.run_git_command(
 
            "rev-list %s --children" % (rev_filter)
 
            ['rev-list', rev_filter, '--children']
 
        )
 

	
 
        children = []
 
@@ -288,12 +288,12 @@ class GitChangeset(BaseChangeset):
 
        f_path = safe_str(path)
 

	
 
        if limit:
 
            cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % (
 
                      safe_int(limit, 0), cs_id, f_path)
 
            cmd = ['log', '-n', str(safe_int(limit, 0)),
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 

	
 
        else:
 
            cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % (
 
                      cs_id, f_path)
 
            cmd = ['log',
 
                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 
        so, se = self.repository.run_git_command(cmd)
 
        ids = re.findall(r'[0-9a-fA-F]{40}', so)
 
        return [self.repository.get_changeset(sha) for sha in ids]
 
@@ -321,7 +321,7 @@ class GitChangeset(BaseChangeset):
 
        generally not good. Should be replaced with algorithm iterating
 
        commits.
 
        """
 
        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
 
        cmd = ['blame', '-l', '--root', '-r', self.id, '--', path]
 
        # -l     ==> outputs long shas (and we need all 40 characters)
 
        # --root ==> doesn't put '^' character for boundaries
 
        # -r sha ==> blames for the given revision
 
@@ -471,8 +471,8 @@ class GitChangeset(BaseChangeset):
 
    def _diff_name_status(self):
 
        output = []
 
        for parent in self.parents:
 
            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
 
                                                                self.raw_id)
 
            cmd = ['diff', '--name-status', parent.raw_id, self.raw_id,
 
                   '--encoding=utf8']
 
            so, se = self.repository.run_git_command(cmd)
 
            output.append(so.strip())
 
        return '\n'.join(output)
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
@@ -18,18 +18,6 @@ import urllib2
 
import logging
 
import posixpath
 
import string
 
import sys
 
if sys.platform == "win32":
 
    from subprocess import list2cmdline
 
    def quote(s):
 
        return list2cmdline([s])
 
else:
 
    try:
 
        # Python <=2.7
 
        from pipes import quote
 
    except ImportError:
 
        # Python 3.3+
 
        from shlex import quote
 

	
 
from dulwich.objects import Tag
 
from dulwich.repo import Repo, NotGitRepository
 
@@ -135,10 +123,7 @@ class GitRepository(BaseRepository):
 
            del opts['_safe']
 
            safe_call = True
 

	
 
        _str_cmd = False
 
        if isinstance(cmd, basestring):
 
            cmd = [cmd]
 
            _str_cmd = True
 
        assert isinstance(cmd, list), cmd
 

	
 
        gitenv = os.environ
 
        # need to clean fix GIT_DIR !
 
@@ -148,13 +133,11 @@ class GitRepository(BaseRepository):
 

	
 
        _git_path = settings.GIT_EXECUTABLE_PATH
 
        cmd = [_git_path] + _copts + cmd
 
        if _str_cmd:
 
            cmd = ' '.join(cmd)
 

	
 
        try:
 
            _opts = dict(
 
                env=gitenv,
 
                shell=True,
 
                shell=False,
 
            )
 
            _opts.update(opts)
 
            p = subprocessio.SubprocessIOChunker(cmd, **_opts)
 
@@ -268,7 +251,7 @@ class GitRepository(BaseRepository):
 
            return []
 

	
 
        rev_filter = settings.GIT_REV_FILTER
 
        cmd = 'rev-list %s --reverse --date-order' % (rev_filter)
 
        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
 
        try:
 
            so, se = self.run_git_command(cmd)
 
        except RepositoryError:
 
@@ -551,22 +534,16 @@ class GitRepository(BaseRepository):
 

	
 
        # %H at format means (full) commit hash, initial hashes are retrieved
 
        # in ascending date order
 
        cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
 
        cmd_params = {}
 
        cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
 
        if start_date:
 
            cmd_template += ' --since "$since"'
 
            cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if end_date:
 
            cmd_template += ' --until "$until"'
 
            cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
 
            cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if branch_name:
 
            cmd_template += ' $branch_name'
 
            cmd_params['branch_name'] = branch_name
 
            cmd.append(branch_name)
 
        else:
 
            rev_filter = settings.GIT_REV_FILTER
 
            cmd_template += ' %s' % (rev_filter)
 
            cmd.append(settings.GIT_REV_FILTER)
 

	
 
        cmd = string.Template(cmd_template).safe_substitute(**cmd_params)
 
        revs = self.run_git_command(cmd)[0].splitlines()
 
        start_pos = 0
 
        end_pos = len(revs)
 
@@ -622,14 +599,14 @@ class GitRepository(BaseRepository):
 

	
 
        if rev1 == self.EMPTY_CHANGESET:
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['show'] + flags + [rev2])
 
            cmd = ['show'] + flags + [rev2]
 
        else:
 
            rev1 = self.get_changeset(rev1).raw_id
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
 
            cmd = ['diff'] + flags + [rev1, rev2]
 

	
 
        if path:
 
            cmd += ' -- "%s"' % path
 
            cmd += ['--', path]
 

	
 
        stdout, stderr = self.run_git_command(cmd)
 
        # TODO: don't ignore stderr
 
@@ -663,8 +640,7 @@ class GitRepository(BaseRepository):
 
            cmd.append('--bare')
 
        elif not update_after_clone:
 
            cmd.append('--no-checkout')
 
        cmd += ['--', quote(url), quote(self.path)]
 
        cmd = ' '.join(cmd)
 
        cmd += ['--', url, self.path]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
@@ -673,8 +649,7 @@ class GitRepository(BaseRepository):
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        cmd = ['pull', "--ff-only", quote(url)]
 
        cmd = ' '.join(cmd)
 
        cmd = ['pull', '--ff-only', url]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
@@ -683,13 +658,11 @@ class GitRepository(BaseRepository):
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        so, se = self.run_git_command('ls-remote -h %s' % quote(url))
 
        refs = []
 
        so, se = self.run_git_command(['ls-remote', '-h', url])
 
        cmd = ['fetch', url, '--']
 
        for line in (x for x in so.splitlines()):
 
            sha, ref = line.split('\t')
 
            refs.append(ref)
 
        refs = ' '.join(('+%s:%s' % (r, r) for r in refs))
 
        cmd = '''fetch %s -- %s''' % (quote(url), refs)
 
            cmd.append('+%s:%s' % (ref, ref))
 
        self.run_git_command(cmd)
 

	
 
    def _update_server_info(self):
kallithea/lib/vcs/subprocessio.py
Show inline comments
 
@@ -342,11 +342,9 @@ class SubprocessIOChunker(object):
 
            input_streamer.start()
 
            inputstream = input_streamer.output
 

	
 
        _shell = kwargs.get('shell', True)
 
        if isinstance(cmd, (list, tuple)):
 
            cmd = ' '.join(cmd)
 
        # Note: fragile cmd mangling has been removed for use in Kallithea
 
        assert isinstance(cmd, list), cmd
 

	
 
        kwargs['shell'] = _shell
 
        _p = subprocess.Popen(cmd, bufsize=-1,
 
                              stdin=inputstream,
 
                              stdout=subprocess.PIPE,
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -682,34 +682,34 @@ class GitSpecificWithRepoTest(_BackendTe
 
            'base')
 

	
 
    def test_workdir_get_branch(self):
 
        self.repo.run_git_command('checkout -b production')
 
        self.repo.run_git_command(['checkout', '-b', 'production'])
 
        # Regression test: one of following would fail if we don't check
 
        # .git/HEAD file
 
        self.repo.run_git_command('checkout production')
 
        self.repo.run_git_command(['checkout', 'production'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'production')
 
        self.repo.run_git_command('checkout master')
 
        self.repo.run_git_command(['checkout', 'master'])
 
        self.assertEqual(self.repo.workdir.get_branch(), 'master')
 

	
 
    def test_get_diff_runs_git_command_with_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' %
 
            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_str_hashes(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
 
        self.repo.run_git_command.assert_called_once_with(
 
            'show -U%s --full-index --binary -p -M --abbrev=40 %s' %
 
            (3, self.repo._get_revision(1)))
 
            ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(1)])
 

	
 
    def test_get_diff_runs_git_command_with_path_if_its_given(self):
 
        self.repo.run_git_command = mock.Mock(return_value=['', ''])
 
        self.repo.get_diff(0, 1, 'foo')
 
        self.repo.run_git_command.assert_called_once_with(
 
          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"'
 
            % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
 
            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
 
             self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo'])
 

	
 

	
 
class GitRegressionTest(_BackendTestMixin, unittest.TestCase):
0 comments (0 inline, 0 general)