Changeset - e26c0616e003
[Not reviewed]
default
0 7 0
Mads Kiilerich - 6 years ago 2019-12-16 02:37:05
mads@kiilerich.com
Grafted from: 44cc1056a1ac
py3: use global next() function instead of .next() method

From 2to3 -f next.
7 files changed with 21 insertions and 21 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -229,451 +229,451 @@ def wrapped_diff(filenode_old, filenode_
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = [o for o in [filenode_new, filenode_old] if isinstance(o, SubModuleNode)]
 
    if submodules:
 
        return b''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = get_diff(repo, old_raw_id, new_raw_id, filenode_new.path,
 
                           ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 

	
 
def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):
 
    """
 
    A thin wrapper around vcs lib get_diff.
 
    """
 
    try:
 
        return scm_instance.get_diff(rev1, rev2, path=path,
 
                                     ignore_whitespace=ignore_whitespace, context=context)
 
    except MemoryError:
 
        h.flash('MemoryError: Diff is too big', category='error')
 
        return b''
 

	
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _diff_git_re = re.compile(b'^diff --git', re.MULTILINE)
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, bytes):
 
            raise Exception('Diff must be bytes - got %s' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        self.diff_limit = diff_limit
 
        self.limited_diff = False
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        Might set limited_diff.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            if self.diff_limit and end > self.diff_limit:
 
                self.limited_diff = True
 
                continue
 

	
 
            head, diff_lines = _get_header(self.vcs, buffer(self._diff, start, end - start))
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'removed'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'added'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'renamed'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'added'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'removed'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                chunks, added, deleted = _parse_lines(diff_lines)
 
                stats['binary'] = False
 
                stats['added'] = added
 
                stats['deleted'] = deleted
 
                # explicit mark that it's a modified file
 
                if op == 'modified':
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'removed' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
                'old_lineno': '',
 
                'new_lineno': '',
 
                'action':     'context',
 
                'line':       msg,
 
                } for _op, msg in stats['ops'].iteritems()
 
                  if _op not in [MOD_FILENODE]])
 

	
 
            _files.append({
 
                'old_filename':     head['a_path'],
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        if not inline_diff:
 
            return _files
 

	
 
        # highlight inline changes when one del is followed by one add
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    peekline = lineiter.next()
 
                    peekline = next(lineiter)
 
                    while True:
 
                        # find a first del line
 
                        while peekline['action'] != 'del':
 
                            peekline = lineiter.next()
 
                            peekline = next(lineiter)
 
                        delline = peekline
 
                        peekline = lineiter.next()
 
                        peekline = next(lineiter)
 
                        # if not followed by add, eat all following del lines
 
                        if peekline['action'] != 'add':
 
                            while peekline['action'] == 'del':
 
                                peekline = lineiter.next()
 
                                peekline = next(lineiter)
 
                            continue
 
                        # found an add - make sure it is the only one
 
                        addline = peekline
 
                        try:
 
                            peekline = lineiter.next()
 
                            peekline = next(lineiter)
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return _files
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
def _escaper(string):
 
    """
 
    Do HTML escaping/markup
 
    """
 

	
 
    def substitute(m):
 
        groups = m.groups()
 
        if groups[0]:
 
            return '&amp;'
 
        if groups[1]:
 
            return '&lt;'
 
        if groups[2]:
 
            return '&gt;'
 
        if groups[3]:
 
            return '<u>\t</u>'
 
        if groups[4]:
 
            return '<u class="cr"></u>'
 
        if groups[5]:
 
            return ' <i></i>'
 
        assert False
 

	
 
    return _escape_re.sub(substitute, safe_unicode(string))
 

	
 

	
 
_git_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
       ^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_hg_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
    (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
       ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 

	
 

	
 
def _get_header(vcs, diff_chunk):
 
    """
 
    Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
    1. A dict with meta info:
 

	
 
        a_path, b_path, similarity_index, rename_from, rename_to,
 
        old_mode, new_mode, new_file_mode, deleted_file_mode,
 
        a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
    2. An iterator yielding lines with simple HTML markup.
 
    """
 
    match = None
 
    if vcs == 'git':
 
        match = _git_header_re.match(diff_chunk)
 
    elif vcs == 'hg':
 
        match = _hg_header_re.match(diff_chunk)
 
    if match is None:
 
        raise Exception('diff not recognized as valid %s diff' % vcs)
 
    meta_info = match.groupdict()
 
    rest = diff_chunk[match.end():]
 
    if rest and _header_next_check.match(rest):
 
        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000]))
 
    diff_lines = (_escaper(m.group(0)) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
_newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
 

	
 
def _parse_lines(diff_lines):
 
    """
 
    Given an iterator of diff body lines, parse them and return a dict per
 
    line and added/removed totals.
 
    """
 
    added = deleted = 0
 
    old_line = old_end = new_line = new_end = None
 

	
 
    chunks = []
 
    try:
 
        line = diff_lines.next()
 
        line = next(diff_lines)
 

	
 
        while True:
 
            lines = []
 
            chunks.append(lines)
 

	
 
            match = _chunk_re.match(line)
 

	
 
            if not match:
 
                raise Exception('error parsing diff @@ line %r' % line)
 

	
 
            gr = match.groups()
 
            (old_line, old_end,
 
             new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
            old_line -= 1
 
            new_line -= 1
 

	
 
            context = len(gr) == 5
 
            old_end += old_line
 
            new_end += new_line
 

	
 
            if context:
 
                # skip context only if it's first line
 
                if int(gr[0]) > 1:
 
                    lines.append({
 
                        'old_lineno': '...',
 
                        'new_lineno': '...',
 
                        'action':     'context',
 
                        'line':       line,
 
                    })
 

	
 
            line = diff_lines.next()
 
            line = next(diff_lines)
 

	
 
            while old_line < old_end or new_line < new_end:
 
                if not line:
 
                    raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                affects_old = affects_new = False
 

	
 
                command = line[0]
 
                if command == '+':
 
                    affects_new = True
 
                    action = 'add'
 
                    added += 1
 
                elif command == '-':
 
                    affects_old = True
 
                    action = 'del'
 
                    deleted += 1
 
                elif command == ' ':
 
                    affects_old = affects_new = True
 
                    action = 'unmod'
 
                else:
 
                    raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                if not _newline_marker.match(line):
 
                    old_line += affects_old
 
                    new_line += affects_new
 
                    lines.append({
 
                        'old_lineno':   affects_old and old_line or '',
 
                        'new_lineno':   affects_new and new_line or '',
 
                        'action':       action,
 
                        'line':         line[1:],
 
                    })
 

	
 
                line = diff_lines.next()
 
                line = next(diff_lines)
 

	
 
                if _newline_marker.match(line):
 
                    # we need to append to lines, since this is not
 
                    # counted in the line specs of diff
 
                    lines.append({
 
                        'old_lineno':   '...',
 
                        'new_lineno':   '...',
 
                        'action':       'context',
 
                        'line':         line,
 
                    })
 
                    line = diff_lines.next()
 
                    line = next(diff_lines)
 
            if old_line > old_end:
 
                raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
            if new_line > new_end:
 
                raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
    except StopIteration:
 
        pass
 
    if old_line != old_end or new_line != new_end:
 
        raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
    return chunks, added, deleted
 

	
 
# Used for inline highlighter word split, must match the substitutions in _escaper
 
_token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 

	
 
def _highlight_inline_diff(old, new):
 
    """
 
    Highlight simple add/remove in two lines given as info dicts. They are
 
    modified in place and given markup with <del>/<ins>.
 
    """
 
    assert old['action'] == 'del'
 
    assert new['action'] == 'add'
 

	
 
    oldwords = _token_re.split(old['line'])
 
    newwords = _token_re.split(new['line'])
 
    sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
    oldfragments, newfragments = [], []
 
    for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
        oldfrag = ''.join(oldwords[i1:i2])
 
        newfrag = ''.join(newwords[j1:j2])
 
        if tag != 'equal':
 
            if oldfrag:
 
                oldfrag = '<del>%s</del>' % oldfrag
 
            if newfrag:
 
                newfrag = '<ins>%s</ins>' % newfrag
 
        oldfragments.append(oldfrag)
 
        newfragments.append(newfrag)
 

	
 
    old['line'] = "".join(oldfragments)
 
    new['line'] = "".join(newfragments)
kallithea/lib/helpers.py
Show inline comments
 
@@ -198,385 +198,385 @@ def FID(raw_id, path):
 
    :param raw_id:
 
    :param path:
 
    """
 

	
 
    return 'C-%s-%s' % (short_id(raw_id), hashlib.md5(safe_bytes(path)).hexdigest()[:12])
 

	
 

	
 
class _FilesBreadCrumbs(object):
 

	
 
    def __call__(self, repo_name, rev, paths):
 
        if isinstance(paths, str):
 
            paths = safe_unicode(paths)
 
        url_l = [link_to(repo_name, url('files_home',
 
                                        repo_name=repo_name,
 
                                        revision=rev, f_path=''),
 
                         class_='ypjax-link')]
 
        paths_l = paths.split('/')
 
        for cnt, p in enumerate(paths_l):
 
            if p != '':
 
                url_l.append(link_to(p,
 
                                     url('files_home',
 
                                         repo_name=repo_name,
 
                                         revision=rev,
 
                                         f_path='/'.join(paths_l[:cnt + 1])
 
                                         ),
 
                                     class_='ypjax-link'
 
                                     )
 
                             )
 

	
 
        return literal('/'.join(url_l))
 

	
 

	
 
files_breadcrumbs = _FilesBreadCrumbs()
 

	
 

	
 
class CodeHtmlFormatter(HtmlFormatter):
 
    """
 
    My code Html Formatter for source codes
 
    """
 

	
 
    def wrap(self, source, outfile):
 
        return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
 

	
 
    def _wrap_code(self, source):
 
        for cnt, it in enumerate(source):
 
            i, t = it
 
            t = '<span id="L%s">%s</span>' % (cnt + 1, t)
 
            yield i, t
 

	
 
    def _wrap_tablelinenos(self, inner):
 
        inner_lines = []
 
        lncount = 0
 
        for t, line in inner:
 
            if t:
 
                lncount += 1
 
            inner_lines.append(line)
 

	
 
        fl = self.linenostart
 
        mw = len(str(lncount + fl - 1))
 
        sp = self.linenospecial
 
        st = self.linenostep
 
        la = self.lineanchors
 
        aln = self.anchorlinenos
 
        nocls = self.noclasses
 
        if sp:
 
            lines = []
 

	
 
            for i in range(fl, fl + lncount):
 
                if i % st == 0:
 
                    if i % sp == 0:
 
                        if aln:
 
                            lines.append('<a href="#%s%d" class="special">%*d</a>' %
 
                                         (la, i, mw, i))
 
                        else:
 
                            lines.append('<span class="special">%*d</span>' % (mw, i))
 
                    else:
 
                        if aln:
 
                            lines.append('<a href="#%s%d">%*d</a>' % (la, i, mw, i))
 
                        else:
 
                            lines.append('%*d' % (mw, i))
 
                else:
 
                    lines.append('')
 
            ls = '\n'.join(lines)
 
        else:
 
            lines = []
 
            for i in range(fl, fl + lncount):
 
                if i % st == 0:
 
                    if aln:
 
                        lines.append('<a href="#%s%d">%*d</a>' % (la, i, mw, i))
 
                    else:
 
                        lines.append('%*d' % (mw, i))
 
                else:
 
                    lines.append('')
 
            ls = '\n'.join(lines)
 

	
 
        # in case you wonder about the seemingly redundant <div> here: since the
 
        # content in the other cell also is wrapped in a div, some browsers in
 
        # some configurations seem to mess up the formatting...
 
        if nocls:
 
            yield 0, ('<table class="%stable">' % self.cssclass +
 
                      '<tr><td><div class="linenodiv">'
 
                      '<pre>' + ls + '</pre></div></td>'
 
                      '<td id="hlcode" class="code">')
 
        else:
 
            yield 0, ('<table class="%stable">' % self.cssclass +
 
                      '<tr><td class="linenos"><div class="linenodiv">'
 
                      '<pre>' + ls + '</pre></div></td>'
 
                      '<td id="hlcode" class="code">')
 
        yield 0, ''.join(inner_lines)
 
        yield 0, '</td></tr></table>'
 

	
 

	
 
_whitespace_re = re.compile(r'(\t)|( )(?=\n|</div>)')
 

	
 

	
 
def _markup_whitespace(m):
 
    groups = m.groups()
 
    if groups[0]:
 
        return '<u>\t</u>'
 
    if groups[1]:
 
        return ' <i></i>'
 

	
 

	
 
def markup_whitespace(s):
 
    return _whitespace_re.sub(_markup_whitespace, s)
 

	
 

	
 
def pygmentize(filenode, **kwargs):
 
    """
 
    pygmentize function using pygments
 

	
 
    :param filenode:
 
    """
 
    lexer = get_custom_lexer(filenode.extension) or filenode.lexer
 
    return literal(markup_whitespace(
 
        code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs))))
 

	
 

	
 
def hsv_to_rgb(h, s, v):
 
    if s == 0.0:
 
        return v, v, v
 
    i = int(h * 6.0)  # XXX assume int() truncates!
 
    f = (h * 6.0) - i
 
    p = v * (1.0 - s)
 
    q = v * (1.0 - s * f)
 
    t = v * (1.0 - s * (1.0 - f))
 
    i = i % 6
 
    if i == 0:
 
        return v, t, p
 
    if i == 1:
 
        return q, v, p
 
    if i == 2:
 
        return p, v, t
 
    if i == 3:
 
        return p, q, v
 
    if i == 4:
 
        return t, p, v
 
    if i == 5:
 
        return v, p, q
 

	
 

	
 
def gen_color(n=10000):
 
    """generator for getting n of evenly distributed colors using
 
    hsv color and golden ratio. It always return same order of colors
 

	
 
    :returns: RGB tuple
 
    """
 

	
 
    golden_ratio = 0.618033988749895
 
    h = 0.22717784590367374
 

	
 
    for _unused in xrange(n):
 
        h += golden_ratio
 
        h %= 1
 
        HSV_tuple = [h, 0.95, 0.95]
 
        RGB_tuple = hsv_to_rgb(*HSV_tuple)
 
        yield [str(int(x * 256)) for x in RGB_tuple]
 

	
 

	
 
def pygmentize_annotation(repo_name, filenode, **kwargs):
 
    """
 
    pygmentize function for annotation
 

	
 
    :param filenode:
 
    """
 
    cgenerator = gen_color()
 
    color_dict = {}
 

	
 
    def get_color_string(cs):
 
        if cs in color_dict:
 
            col = color_dict[cs]
 
        else:
 
            col = color_dict[cs] = cgenerator.next()
 
            col = color_dict[cs] = next(cgenerator)
 
        return "color: rgb(%s)! important;" % (', '.join(col))
 

	
 
    def url_func(changeset):
 
        author = escape(changeset.author)
 
        date = changeset.date
 
        message = escape(changeset.message)
 
        tooltip_html = ("<b>Author:</b> %s<br/>"
 
                        "<b>Date:</b> %s</b><br/>"
 
                        "<b>Message:</b> %s") % (author, date, message)
 

	
 
        lnk_format = show_id(changeset)
 
        uri = link_to(
 
                lnk_format,
 
                url('changeset_home', repo_name=repo_name,
 
                    revision=changeset.raw_id),
 
                style=get_color_string(changeset.raw_id),
 
                **{'data-toggle': 'popover',
 
                   'data-content': tooltip_html}
 
              )
 

	
 
        uri += '\n'
 
        return uri
 

	
 
    return literal(markup_whitespace(annotate_highlight(filenode, url_func, **kwargs)))
 

	
 

	
 
class _Message(object):
 
    """A message returned by ``pop_flash_messages()``.
 

	
 
    Converting the message to a string returns the message text. Instances
 
    also have the following attributes:
 

	
 
    * ``category``: the category specified when the message was created.
 
    * ``message``: the html-safe message text.
 
    """
 

	
 
    def __init__(self, category, message):
 
        self.category = category
 
        self.message = message
 

	
 

	
 
def _session_flash_messages(append=None, clear=False):
 
    """Manage a message queue in tg.session: return the current message queue
 
    after appending the given message, and possibly clearing the queue."""
 
    key = 'flash'
 
    from tg import session
 
    if key in session:
 
        flash_messages = session[key]
 
    else:
 
        if append is None:  # common fast path - also used for clearing empty queue
 
            return []  # don't bother saving
 
        flash_messages = []
 
        session[key] = flash_messages
 
    if append is not None and append not in flash_messages:
 
        flash_messages.append(append)
 
    if clear:
 
        session.pop(key, None)
 
    session.save()
 
    return flash_messages
 

	
 

	
 
def flash(message, category, logf=None):
 
    """
 
    Show a message to the user _and_ log it through the specified function
 

	
 
    category: notice (default), warning, error, success
 
    logf: a custom log function - such as log.debug
 

	
 
    logf defaults to log.info, unless category equals 'success', in which
 
    case logf defaults to log.debug.
 
    """
 
    assert category in ('error', 'success', 'warning'), category
 
    if hasattr(message, '__html__'):
 
        # render to HTML for storing in cookie
 
        safe_message = unicode(message)
 
    else:
 
        # Apply str - the message might be an exception with __str__
 
        # Escape, so we can trust the result without further escaping, without any risk of injection
 
        safe_message = html_escape(unicode(message))
 
    if logf is None:
 
        logf = log.info
 
        if category == 'success':
 
            logf = log.debug
 

	
 
    logf('Flash %s: %s', category, safe_message)
 

	
 
    _session_flash_messages(append=(category, safe_message))
 

	
 

	
 
def pop_flash_messages():
 
    """Return all accumulated messages and delete them from the session.
 

	
 
    The return value is a list of ``Message`` objects.
 
    """
 
    return [_Message(category, message) for category, message in _session_flash_messages(clear=True)]
 

	
 

	
 
age = lambda x, y=False: _age(x, y)
 
capitalize = lambda x: x.capitalize()
 
email = author_email
 
short_id = lambda x: x[:12]
 
hide_credentials = lambda x: ''.join(credentials_filter(x))
 

	
 

	
 
def show_id(cs):
 
    """
 
    Configurable function that shows ID
 
    by default it's r123:fffeeefffeee
 

	
 
    :param cs: changeset instance
 
    """
 
    from kallithea import CONFIG
 
    def_len = safe_int(CONFIG.get('show_sha_length', 12))
 
    show_rev = str2bool(CONFIG.get('show_revision_number', False))
 

	
 
    raw_id = cs.raw_id[:def_len]
 
    if show_rev:
 
        return 'r%s:%s' % (cs.revision, raw_id)
 
    else:
 
        return raw_id
 

	
 

	
 
def fmt_date(date):
 
    if date:
 
        return date.strftime("%Y-%m-%d %H:%M:%S")
 
    return ""
 

	
 

	
 
def is_git(repository):
 
    if hasattr(repository, 'alias'):
 
        _type = repository.alias
 
    elif hasattr(repository, 'repo_type'):
 
        _type = repository.repo_type
 
    else:
 
        _type = repository
 
    return _type == 'git'
 

	
 

	
 
def is_hg(repository):
 
    if hasattr(repository, 'alias'):
 
        _type = repository.alias
 
    elif hasattr(repository, 'repo_type'):
 
        _type = repository.repo_type
 
    else:
 
        _type = repository
 
    return _type == 'hg'
 

	
 

	
 
@cache_region('long_term', 'user_attr_or_none')
 
def user_attr_or_none(author, show_attr):
 
    """Try to match email part of VCS committer string with a local user and return show_attr
 
    - or return None if user not found"""
 
    email = author_email(author)
 
    if email:
 
        from kallithea.model.db import User
 
        user = User.get_by_email(email, cache=True) # cache will only use sql_cache_short
 
        if user is not None:
 
            return getattr(user, show_attr)
 
    return None
 

	
 

	
 
def email_or_none(author):
 
    """Try to match email part of VCS committer string with a local user.
 
    Return primary email of user, email part of the specified author name, or None."""
 
    if not author:
 
        return None
 
    email = user_attr_or_none(author, 'email')
 
    if email is not None:
 
        return email # always use user's main email address - not necessarily the one used to find user
 

	
 
    # extract email from the commit string
 
    email = author_email(author)
 
    if email:
 
        return email
 

	
 
    # No valid email, not a valid user in the system, none!
 
    return None
 

	
 

	
 
def person(author, show_attr="username"):
 
    """Find the user identified by 'author', return one of the users attributes,
 
    default to the username attribute, None if there is no user"""
 
    from kallithea.model.db import User
 
    # if author is already an instance use it for extraction
 
    if isinstance(author, User):
 
        return getattr(author, show_attr)
 

	
 
    value = user_attr_or_none(author, show_attr)
 
    if value is not None:
 
        return value
 

	
 
    # Still nothing?  Just pass back the author name if any, else the email
kallithea/lib/indexers/__init__.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.indexers
 
~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Whoosh indexing module for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Aug 17, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import os
 
import sys
 
from os.path import dirname
 

	
 
from whoosh.analysis import IDTokenizer, LowercaseFilter, RegexTokenizer
 
from whoosh.fields import BOOLEAN, DATETIME, ID, NUMERIC, STORED, TEXT, FieldType, Schema
 
from whoosh.formats import Characters
 
from whoosh.highlight import ContextFragmenter, HtmlFormatter
 
from whoosh.highlight import highlight as whoosh_highlight
 

	
 
from kallithea.lib.utils2 import LazyProperty
 

	
 

	
 
# Add location of top level folder to sys.path
 
sys.path.append(dirname(dirname(dirname(os.path.realpath(__file__)))))
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 
# CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 

	
 
# CUSTOM ANALYZER wordsplit + lowercase filter, for emailaddr-like text
 
#
 
# This is useful to:
 
# - avoid removing "stop words" from text
 
# - search case-insensitively
 
#
 
EMAILADDRANALYZER = RegexTokenizer() | LowercaseFilter()
 

	
 
# CUSTOM ANALYZER raw-string + lowercase filter
 
#
 
# This is useful to:
 
# - avoid tokenization
 
# - avoid removing "stop words" from text
 
# - search case-insensitively
 
#
 
ICASEIDANALYZER = IDTokenizer() | LowercaseFilter()
 

	
 
# CUSTOM ANALYZER raw-string
 
#
 
# This is useful to:
 
# - avoid tokenization
 
# - avoid removing "stop words" from text
 
#
 
IDANALYZER = IDTokenizer()
 

	
 
# CUSTOM ANALYZER wordsplit + lowercase filter, for pathname-like text
 
#
 
# This is useful to:
 
# - avoid removing "stop words" from text
 
# - search case-insensitively
 
#
 
PATHANALYZER = RegexTokenizer() | LowercaseFilter()
 

	
 
# INDEX SCHEMA DEFINITION
 
SCHEMA = Schema(
 
    fileid=ID(unique=True),
 
    owner=TEXT(analyzer=EMAILADDRANALYZER),
 
    # this field preserves case of repository name for exact matching
 
    repository_rawname=TEXT(analyzer=IDANALYZER),
 
    repository=TEXT(stored=True, analyzer=ICASEIDANALYZER),
 
    path=TEXT(stored=True, analyzer=PATHANALYZER),
 
    content=FieldType(format=Characters(), analyzer=ANALYZER,
 
                      scorable=True, stored=True),
 
    modtime=STORED(),
 
    extension=TEXT(stored=True, analyzer=PATHANALYZER)
 
)
 

	
 
IDX_NAME = 'HG_INDEX'
 
FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 
FRAGMENTER = ContextFragmenter(200)
 

	
 
CHGSETS_SCHEMA = Schema(
 
    raw_id=ID(unique=True, stored=True),
 
    date=NUMERIC(stored=True),
 
    last=BOOLEAN(),
 
    owner=TEXT(analyzer=EMAILADDRANALYZER),
 
    # this field preserves case of repository name for exact matching
 
    # and unique-ness in index table
 
    repository_rawname=ID(unique=True),
 
    repository=ID(stored=True, analyzer=ICASEIDANALYZER),
 
    author=TEXT(stored=True, analyzer=EMAILADDRANALYZER),
 
    message=FieldType(format=Characters(), analyzer=ANALYZER,
 
                      scorable=True, stored=True),
 
    parents=TEXT(),
 
    added=TEXT(analyzer=PATHANALYZER),
 
    removed=TEXT(analyzer=PATHANALYZER),
 
    changed=TEXT(analyzer=PATHANALYZER),
 
)
 

	
 
CHGSET_IDX_NAME = 'CHGSET_INDEX'
 

	
 
# used only to generate queries in journal
 
JOURNAL_SCHEMA = Schema(
 
    username=ID(),
 
    date=DATETIME(),
 
    action=TEXT(),
 
    repository=ID(),
 
    ip=TEXT(),
 
)
 

	
 

	
 
class WhooshResultWrapper(object):
 
    def __init__(self, search_type, searcher, matcher, highlight_items,
 
                 repo_location):
 
        self.search_type = search_type
 
        self.searcher = searcher
 
        self.matcher = matcher
 
        self.highlight_items = highlight_items
 
        self.fragment_size = 200
 
        self.repo_location = repo_location
 

	
 
    @LazyProperty
 
    def doc_ids(self):
 
        docs_id = []
 
        while self.matcher.is_active():
 
            docnum = self.matcher.id()
 
            chunks = [offsets for offsets in self.get_chunks()]
 
            docs_id.append([docnum, chunks])
 
            self.matcher.next()
 
            self.matcher.next()  # this looks like a py2 iterator ... but it isn't
 
        return docs_id
 

	
 
    def __str__(self):
 
        return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
 

	
 
    def __repr__(self):
 
        return self.__str__()
 

	
 
    def __len__(self):
 
        return len(self.doc_ids)
 

	
 
    def __iter__(self):
 
        """
 
        Allows Iteration over results,and lazy generate content
 

	
 
        *Requires* implementation of ``__getitem__`` method.
 
        """
 
        for docid in self.doc_ids:
 
            yield self.get_full_content(docid)
 

	
 
    def __getitem__(self, key):
 
        """
 
        Slicing of resultWrapper
 
        """
 
        i, j = key.start, key.stop
 

	
 
        slices = []
 
        for docid in self.doc_ids[i:j]:
 
            slices.append(self.get_full_content(docid))
 
        return slices
 

	
 
    def get_full_content(self, docid):
 
        res = self.searcher.stored_fields(docid[0])
 
        log.debug('result: %s', res)
 
        if self.search_type == 'content':
 
            full_repo_path = os.path.join(self.repo_location, res['repository'])
 
            f_path = res['path'].split(full_repo_path)[-1]
 
            f_path = f_path.lstrip(os.sep)
 
            content_short = self.get_short_content(res, docid[1])
 
            res.update({'content_short': content_short,
 
                        'content_short_hl': self.highlight(content_short),
 
                        'f_path': f_path
 
            })
 
        elif self.search_type == 'path':
 
            full_repo_path = os.path.join(self.repo_location, res['repository'])
 
            f_path = res['path'].split(full_repo_path)[-1]
 
            f_path = f_path.lstrip(os.sep)
 
            res.update({'f_path': f_path})
 
        elif self.search_type == 'message':
 
            res.update({'message_hl': self.highlight(res['message'])})
 

	
 
        log.debug('result: %s', res)
 

	
 
        return res
 

	
 
    def get_short_content(self, res, chunks):
 
        return u''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
 

	
 
    def get_chunks(self):
 
        """
 
        Smart function that implements chunking the content
 
        but not overlap chunks so it doesn't highlight the same
 
        close occurrences twice.
 
        """
 
        memory = [(0, 0)]
 
        try:
 
            supports_positions = self.matcher.supports('positions')
 
        except AttributeError:  # 'NoneType' object has no attribute 'supports' (because matcher never get a format)
 
            supports_positions = False
 
        if supports_positions:
 
            for span in self.matcher.spans():
 
                start = span.startchar or 0
 
                end = span.endchar or 0
 
                start_offseted = max(0, start - self.fragment_size)
 
                end_offseted = end + self.fragment_size
 

	
 
                if start_offseted < memory[-1][1]:
 
                    start_offseted = memory[-1][1]
 
                memory.append((start_offseted, end_offseted,))
 
                yield (start_offseted, end_offseted,)
 

	
 
    def highlight(self, content, top=5):
 
        if self.search_type not in ['content', 'message']:
 
            return ''
 
        hl = whoosh_highlight(
 
            text=content,
 
            terms=self.highlight_items,
 
            analyzer=ANALYZER,
 
            fragmenter=FRAGMENTER,
 
            formatter=FORMATTER,
 
            top=top
 
        )
 
        return hl
kallithea/lib/middleware/wrapper.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.middleware.wrapper
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Wrap app to measure request and response time ... all the way to the response
 
WSGI iterator has been closed.
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: May 23, 2013
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import time
 

	
 
from kallithea.lib.base import _get_ip_addr, get_path_info
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class Meter:
 

	
 
    def __init__(self, start_response):
 
        self._start_response = start_response
 
        self._start = time.time()
 
        self._size = 0
 

	
 
    def duration(self):
 
        return time.time() - self._start
 

	
 
    def start_response(self, status, response_headers, exc_info=None):
 
        write = self._start_response(status, response_headers, exc_info)
 
        def metered_write(s):
 
            self.measure(s)
 
            write(s)
 
        return metered_write
 

	
 
    def measure(self, chunk):
 
        self._size += len(chunk)
 

	
 
    def size(self):
 
        return self._size
 

	
 

	
 
class ResultIter:
 

	
 
    def __init__(self, result, meter, description):
 
        self._result_close = getattr(result, 'close', None) or (lambda: None)
 
        self._next = iter(result).next
 
        self._next = iter(result).__next__
 
        self._meter = meter
 
        self._description = description
 

	
 
    def __iter__(self):
 
        return self
 

	
 
    def next(self):
 
    def __next__(self):
 
        chunk = self._next()
 
        self._meter.measure(chunk)
 
        return chunk
 

	
 
    def close(self):
 
        self._result_close()
 
        log.info("%s responded after %.3fs with %s bytes", self._description, self._meter.duration(), self._meter.size())
 

	
 

	
 
class RequestWrapper(object):
 

	
 
    def __init__(self, app, config):
 
        self.application = app
 
        self.config = config
 

	
 
    def __call__(self, environ, start_response):
 
        meter = Meter(start_response)
 
        description = "Request from %s for %s" % (
 
            _get_ip_addr(environ),
 
            get_path_info(environ),
 
        )
 
        try:
 
            result = self.application(environ, meter.start_response)
 
        finally:
 
            log.info("%s responding after %.3fs", description, meter.duration())
 
        return ResultIter(result, meter, description)
kallithea/lib/vcs/subprocessio.py
Show inline comments
 
"""
 
Module provides a class allowing to wrap communication over subprocess.Popen
 
input, output, error streams into a meaningful, non-blocking, concurrent
 
stream processor exposing the output data as an iterator fitting to be a
 
return value passed by a WSGI application to a WSGI server per PEP 3333.
 

	
 
Copyright (c) 2011  Daniel Dotsenko <dotsa[at]hotmail.com>
 

	
 
This file is part of git_http_backend.py Project.
 

	
 
git_http_backend.py Project is free software: you can redistribute it and/or
 
modify it under the terms of the GNU Lesser General Public License as
 
published by the Free Software Foundation, either version 2.1 of the License,
 
or (at your option) any later version.
 

	
 
git_http_backend.py Project is distributed in the hope that it will be useful,
 
but WITHOUT ANY WARRANTY; without even the implied warranty of
 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 
GNU Lesser General Public License for more details.
 

	
 
You should have received a copy of the GNU Lesser General Public License
 
along with git_http_backend.py Project.
 
If not, see <http://www.gnu.org/licenses/>.
 
"""
 
import collections
 
import os
 
import subprocess
 
import threading
 

	
 

	
 
class StreamFeeder(threading.Thread):
 
    """
 
    Normal writing into pipe-like is blocking once the buffer is filled.
 
    This thread allows a thread to seep data from a file-like into a pipe
 
    without blocking the main thread.
 
    We close inpipe once the end of the source stream is reached.
 
    """
 

	
 
    def __init__(self, source):
 
        super(StreamFeeder, self).__init__()
 
        self.daemon = True
 
        filelike = False
 
        self.bytes = bytes()
 
        if type(source) in (type(''), bytes, bytearray):  # string-like
 
            self.bytes = bytes(source)
 
        else:  # can be either file pointer or file-like
 
            if isinstance(source, int):  # file pointer it is
 
                # converting file descriptor (int) stdin into file-like
 
                source = os.fdopen(source, 'rb', 16384)
 
            # let's see if source is file-like by now
 
            filelike = hasattr(source, 'read')
 
        if not filelike and not self.bytes:
 
            raise TypeError("StreamFeeder's source object must be a readable "
 
                            "file-like, a file descriptor, or a string-like.")
 
        self.source = source
 
        self.readiface, self.writeiface = os.pipe()
 

	
 
    def run(self):
 
        t = self.writeiface
 
        if self.bytes:
 
            os.write(t, self.bytes)
 
        else:
 
            s = self.source
 
            b = s.read(4096)
 
            while b:
 
                os.write(t, b)
 
                b = s.read(4096)
 
        os.close(t)
 

	
 
    @property
 
    def output(self):
 
        return self.readiface
 

	
 

	
 
class InputStreamChunker(threading.Thread):
 
    def __init__(self, source, target, buffer_size, chunk_size):
 

	
 
        super(InputStreamChunker, self).__init__()
 

	
 
        self.daemon = True  # die die die.
 

	
 
        self.source = source
 
        self.target = target
 
        self.chunk_count_max = int(buffer_size / chunk_size) + 1
 
        self.chunk_size = chunk_size
 

	
 
        self.data_added = threading.Event()
 
        self.data_added.clear()
 

	
 
        self.keep_reading = threading.Event()
 
        self.keep_reading.set()
 

	
 
        self.EOF = threading.Event()
 
        self.EOF.clear()
 

	
 
        self.go = threading.Event()
 
        self.go.set()
 

	
 
    def stop(self):
 
        self.go.clear()
 
        self.EOF.set()
 
        try:
 
            # this is not proper, but is done to force the reader thread let
 
            # go of the input because, if successful, .close() will send EOF
 
            # down the pipe.
 
            self.source.close()
 
        except:
 
            pass
 

	
 
    def run(self):
 
        s = self.source
 
        t = self.target
 
        cs = self.chunk_size
 
        ccm = self.chunk_count_max
 
        kr = self.keep_reading
 
        da = self.data_added
 
        go = self.go
 

	
 
        try:
 
            b = s.read(cs)
 
        except ValueError:
 
            b = ''
 

	
 
        while b and go.is_set():
 
            if len(t) > ccm:
 
                kr.clear()
 
                kr.wait(2)
 
                # # this only works on 2.7.x and up
 
                # if not kr.wait(10):
 
                #     raise Exception("Timed out while waiting for input to be read.")
 
                # instead we'll use this
 
                if len(t) > ccm + 3:
 
                    raise IOError(
 
                        "Timed out while waiting for input from subprocess.")
 
            t.append(b)
 
            da.set()
 
            try:
 
                b = s.read(cs)
 
            except ValueError: # probably "I/O operation on closed file"
 
                b = ''
 

	
 
        self.EOF.set()
 
        da.set()  # for cases when done but there was no input.
 

	
 

	
 
class BufferedGenerator(object):
 
    """
 
    Class behaves as a non-blocking, buffered pipe reader.
 
    Reads chunks of data (through a thread)
 
    from a blocking pipe, and attaches these to an array (Deque) of chunks.
 
    Reading is halted in the thread when max chunks is internally buffered.
 
    The .next() may operate in blocking or non-blocking fashion by yielding
 
    '' if no data is ready
 
    to be sent or by not returning until there is some data to send
 
    When we get EOF from underlying source pipe we raise the marker to raise
 
    StopIteration after the last chunk of data is yielded.
 
    """
 

	
 
    def __init__(self, source, buffer_size=65536, chunk_size=4096,
 
                 starting_values=None, bottomless=False):
 
        starting_values = starting_values or []
 
        if bottomless:
 
            maxlen = int(buffer_size / chunk_size)
 
        else:
 
            maxlen = None
 

	
 
        self.data = collections.deque(starting_values, maxlen)
 
        self.worker = InputStreamChunker(source, self.data, buffer_size,
 
                                         chunk_size)
 
        if starting_values:
 
            self.worker.data_added.set()
 
        self.worker.start()
 

	
 
    ####################
 
    # Generator's methods
 
    ####################
 

	
 
    def __iter__(self):
 
        return self
 

	
 
    def next(self):
 
    def __next__(self):
 
        while not len(self.data) and not self.worker.EOF.is_set():
 
            self.worker.data_added.clear()
 
            self.worker.data_added.wait(0.2)
 
        if len(self.data):
 
            self.worker.keep_reading.set()
 
            return bytes(self.data.popleft())
 
        elif self.worker.EOF.is_set():
 
            raise StopIteration
 

	
 
    def throw(self, type, value=None, traceback=None):
 
        if not self.worker.EOF.is_set():
 
            raise type(value)
 

	
 
    def start(self):
 
        self.worker.start()
 

	
 
    def stop(self):
 
        self.worker.stop()
 

	
 
    def close(self):
 
        try:
 
            self.worker.stop()
 
            self.throw(GeneratorExit)
 
        except (GeneratorExit, StopIteration):
 
            pass
 

	
 
    ####################
 
    # Threaded reader's infrastructure.
 
    ####################
 
    @property
 
    def input(self):
 
        return self.worker.w
 

	
 
    @property
 
    def data_added_event(self):
 
        return self.worker.data_added
 

	
 
    @property
 
    def data_added(self):
 
        return self.worker.data_added.is_set()
 

	
 
    @property
 
    def reading_paused(self):
 
        return not self.worker.keep_reading.is_set()
 

	
 
    @property
 
    def done_reading_event(self):
 
        """
 
        Done_reading does not mean that the iterator's buffer is empty.
 
        Iterator might have done reading from underlying source, but the read
 
        chunks might still be available for serving through .next() method.
 

	
 
        :returns: An threading.Event class instance.
 
        """
 
        return self.worker.EOF
 

	
 
    @property
 
    def done_reading(self):
 
        """
 
        Done_reading does not mean that the iterator's buffer is empty.
 
        Iterator might have done reading from underlying source, but the read
 
        chunks might still be available for serving through .next() method.
 

	
 
        :returns: An Bool value.
 
        """
 
        return self.worker.EOF.is_set()
 

	
 
    @property
 
    def length(self):
 
        """
 
        returns int.
 

	
 
        This is the length of the queue of chunks, not the length of
 
        the combined contents in those chunks.
 

	
 
        __len__() cannot be meaningfully implemented because this
 
        reader is just flying through a bottomless pit content and
 
        can only know the length of what it already saw.
 

	
 
        If __len__() on WSGI server per PEP 3333 returns a value,
 
        the response's length will be set to that. In order not to
 
        confuse WSGI PEP3333 servers, we will not implement __len__
 
        at all.
 
        """
 
        return len(self.data)
 

	
 
    def prepend(self, x):
 
        self.data.appendleft(x)
 

	
 
    def append(self, x):
 
        self.data.append(x)
 

	
 
    def extend(self, o):
 
        self.data.extend(o)
 

	
 
    def __getitem__(self, i):
 
        return self.data[i]
 

	
 

	
 
class SubprocessIOChunker(object):
 
    """
 
    Processor class wrapping handling of subprocess IO.
 

	
 
    In a way, this is a "communicate()" replacement with a twist.
 

	
 
    - We are multithreaded. Writing in and reading out, err are all sep threads.
 
    - We support concurrent (in and out) stream processing.
 
    - The output is not a stream. It's a queue of read string (bytes, not unicode)
 
      chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
 
    - We are non-blocking in more respects than communicate()
 
      (reading from subprocess out pauses when internal buffer is full, but
 
       does not block the parent calling code. On the flip side, reading from
 
       slow-yielding subprocess may block the iteration until data shows up. This
 
       does not block the parallel inpipe reading occurring parallel thread.)
 

	
 
    The purpose of the object is to allow us to wrap subprocess interactions into
 
    an iterable that can be passed to a WSGI server as the application's return
 
    value. Because of stream-processing-ability, WSGI does not have to read ALL
 
    of the subprocess's output and buffer it, before handing it to WSGI server for
 
    HTTP response. Instead, the class initializer reads just a bit of the stream
 
    to figure out if error occurred or likely to occur and if not, just hands the
 
    further iteration over subprocess output to the server for completion of HTTP
 
    response.
 

	
 
    The real or perceived subprocess error is trapped and raised as one of
 
    EnvironmentError family of exceptions
 

	
 
    Example usage:
 
    #    try:
 
    #        answer = SubprocessIOChunker(
 
    #            cmd,
 
    #            input,
 
    #            buffer_size = 65536,
 
    #            chunk_size = 4096
 
    #            )
 
    #    except (EnvironmentError) as e:
 
    #        print str(e)
 
    #        raise e
 
    #
 
    #    return answer
 

	
 

	
 
    """
 

	
 
    def __init__(self, cmd, inputstream=None, buffer_size=65536,
 
                 chunk_size=4096, starting_values=None, **kwargs):
 
        """
 
        Initializes SubprocessIOChunker
 

	
 
        :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
 
        :param inputstream: (Default: None) A file-like, string, or file pointer.
 
        :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
 
        :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
 
        :param starting_values: (Default: []) An array of strings to put in front of output que.
 
        """
 
        starting_values = starting_values or []
 
        if inputstream:
 
            input_streamer = StreamFeeder(inputstream)
 
            input_streamer.start()
 
            inputstream = input_streamer.output
 

	
 
        # Note: fragile cmd mangling has been removed for use in Kallithea
 
        assert isinstance(cmd, list), cmd
 

	
 
        _p = subprocess.Popen(cmd, bufsize=-1,
 
                              stdin=inputstream,
 
                              stdout=subprocess.PIPE,
 
                              stderr=subprocess.PIPE,
 
                              **kwargs)
 

	
 
        bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size,
 
                                   starting_values)
 
        bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
 

	
 
        while not bg_out.done_reading and not bg_out.reading_paused:
 
            # doing this until we reach either end of file, or end of buffer.
 
            bg_out.data_added_event.wait(1)
 
            bg_out.data_added_event.clear()
 

	
 
        # at this point it's still ambiguous if we are done reading or just full buffer.
 
        # Either way, if error (returned by ended process, or implied based on
 
        # presence of stuff in stderr output) we error out.
 
        # Else, we are happy.
 
        returncode = _p.poll()
 
        if (returncode is not None # process has terminated
 
            and returncode != 0
 
        ): # and it failed
 
            bg_out.stop()
 
            out = b''.join(bg_out)
 
            bg_err.stop()
 
            err = b''.join(bg_err)
 
            if (err.strip() == b'fatal: The remote end hung up unexpectedly' and
 
                out.startswith(b'0034shallow ')
 
            ):
 
                # hack inspired by https://github.com/schacon/grack/pull/7
 
                bg_out = iter([out])
 
                _p = None
 
            elif err:
 
                raise EnvironmentError("Subprocess exited due to an error: %s" % err)
 
            else:
 
                raise EnvironmentError(
 
                    "Subprocess exited with non 0 ret code: %s" % returncode)
 
        self.process = _p
 
        self.output = bg_out
 
        self.error = bg_err
 
        self.inputstream = inputstream
 

	
 
    def __iter__(self):
 
        return self
 

	
 
    def next(self):
 
    def __next__(self):
 
        if self.process:
 
            returncode = self.process.poll()
 
            if (returncode is not None # process has terminated
 
                and returncode != 0
 
            ): # and it failed
 
                self.output.stop()
 
                self.error.stop()
 
                err = ''.join(self.error)
 
                raise EnvironmentError("Subprocess exited due to an error:\n" + err)
 
        return self.output.next()
 
        return next(self.output)
 

	
 
    def throw(self, type, value=None, traceback=None):
 
        if self.output.length or not self.output.done_reading:
 
            raise type(value)
 

	
 
    def close(self):
 
        try:
 
            self.process.terminate()
 
        except:
 
            pass
 
        try:
 
            self.output.close()
 
        except:
 
            pass
 
        try:
 
            self.error.close()
 
        except:
 
            pass
 
        try:
 
            os.close(self.inputstream)
 
        except:
 
            pass
kallithea/tests/other/test_vcs_operations.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
Test suite for vcs push/pull operations.
 

	
 
The tests need Git > 1.8.1.
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 30, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 

	
 
"""
 

	
 
from __future__ import print_function
 

	
 
import json
 
import os
 
import re
 
import tempfile
 
import time
 
import urllib2
 
from subprocess import PIPE, Popen
 
from tempfile import _RandomNameSequence
 

	
 
import pytest
 

	
 
from kallithea import CONFIG
 
from kallithea.lib.utils2 import ascii_bytes
 
from kallithea.model.db import CacheInvalidation, Repository, Ui, User, UserIpMap, UserLog
 
from kallithea.model.meta import Session
 
from kallithea.model.ssh_key import SshKeyModel
 
from kallithea.model.user import UserModel
 
from kallithea.tests import base
 
from kallithea.tests.fixture import Fixture
 

	
 

	
 
DEBUG = True
 
HOST = '127.0.0.1:4999'  # test host
 

	
 
fixture = Fixture()
 

	
 

	
 
# Parameterize different kinds of VCS testing - both the kind of VCS and the
 
# access method (HTTP/SSH)
 

	
 
# Mixin for using HTTP and SSH URLs
 
class HttpVcsTest(object):
 
    @staticmethod
 
    def repo_url_param(webserver, repo_name, **kwargs):
 
        return webserver.repo_url(repo_name, **kwargs)
 

	
 
class SshVcsTest(object):
 
    public_keys = {
 
        base.TEST_USER_REGULAR_LOGIN: u'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC6Ycnc2oUZHQnQwuqgZqTTdMDZD7ataf3JM7oG2Fw8JR6cdmz4QZLe5mfDwaFwG2pWHLRpVqzfrD/Pn3rIO++bgCJH5ydczrl1WScfryV1hYMJ/4EzLGM657J1/q5EI+b9SntKjf4ax+KP322L0TNQGbZUHLbfG2MwHMrYBQpHUQ== kallithea@localhost',
 
        base.TEST_USER_ADMIN_LOGIN: u'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC6Ycnc2oUZHQnQwuqgZqTTdMDZD7ataf3JM7oG2Fw8JR6cdmz4QZLe5mfDwaFwG2pWHLRpVqzfrD/Pn3rIO++bgCJH5ydczrl1WScfryV1hYMJ/4EzLGM657J1/q5EI+b9SntKjf4ax+KP322L0TNQGbZUHLbfG2MwHMrYBQpHUq== kallithea@localhost',
 
    }
 

	
 
    @classmethod
 
    def repo_url_param(cls, webserver, repo_name, username=base.TEST_USER_ADMIN_LOGIN, password=base.TEST_USER_ADMIN_PASS, client_ip=base.IP_ADDR):
 
        user = User.get_by_username(username)
 
        if user.ssh_keys:
 
            ssh_key = user.ssh_keys[0]
 
        else:
 
            sshkeymodel = SshKeyModel()
 
            ssh_key = sshkeymodel.create(user, u'test key', cls.public_keys[user.username])
 
            Session().commit()
 

	
 
        return cls._ssh_param(repo_name, user, ssh_key, client_ip)
 

	
 
# Mixins for using Mercurial and Git
 
class HgVcsTest(object):
 
    repo_type = 'hg'
 
    repo_name = base.HG_REPO
 

	
 
class GitVcsTest(object):
 
    repo_type = 'git'
 
    repo_name = base.GIT_REPO
 

	
 
# Combine mixins to give the combinations we want to parameterize tests with
 
class HgHttpVcsTest(HgVcsTest, HttpVcsTest):
 
    pass
 

	
 
class GitHttpVcsTest(GitVcsTest, HttpVcsTest):
 
    pass
 

	
 
class HgSshVcsTest(HgVcsTest, SshVcsTest):
 
    @staticmethod
 
    def _ssh_param(repo_name, user, ssh_key, client_ip):
 
        # Specify a custom ssh command on the command line
 
        return r"""--config ui.ssh="bash -c 'SSH_ORIGINAL_COMMAND=\"\$2\" SSH_CONNECTION=\"%s 1024 127.0.0.1 22\" kallithea-cli ssh-serve -c %s %s %s' --" ssh://someuser@somehost/%s""" % (
 
            client_ip,
 
            CONFIG['__file__'],
 
            user.user_id,
 
            ssh_key.user_ssh_key_id,
 
            repo_name)
 

	
 
class GitSshVcsTest(GitVcsTest, SshVcsTest):
 
    @staticmethod
 
    def _ssh_param(repo_name, user, ssh_key, client_ip):
 
        # Set a custom ssh command in the global environment
 
        os.environ['GIT_SSH_COMMAND'] = r"""bash -c 'SSH_ORIGINAL_COMMAND="$2" SSH_CONNECTION="%s 1024 127.0.0.1 22" kallithea-cli ssh-serve -c %s %s %s' --""" % (
 
            client_ip,
 
            CONFIG['__file__'],
 
            user.user_id,
 
            ssh_key.user_ssh_key_id)
 
        return "ssh://someuser@somehost/%s""" % repo_name
 

	
 
parametrize_vcs_test = base.parametrize('vt', [
 
    HgHttpVcsTest,
 
    GitHttpVcsTest,
 
    HgSshVcsTest,
 
    GitSshVcsTest,
 
])
 
parametrize_vcs_test_hg = base.parametrize('vt', [
 
    HgHttpVcsTest,
 
    HgSshVcsTest,
 
])
 
parametrize_vcs_test_http = base.parametrize('vt', [
 
    HgHttpVcsTest,
 
    GitHttpVcsTest,
 
])
 

	
 
class Command(object):
 

	
 
    def __init__(self, cwd):
 
        self.cwd = cwd
 

	
 
    def execute(self, *args, **environ):
 
        """
 
        Runs command on the system with given ``args`` using simple space
 
        join without safe quoting.
 
        """
 
        command = ' '.join(args)
 
        ignoreReturnCode = environ.pop('ignoreReturnCode', False)
 
        if DEBUG:
 
            print('*** CMD %s ***' % command)
 
        testenv = dict(os.environ)
 
        testenv['LANG'] = 'en_US.UTF-8'
 
        testenv['LANGUAGE'] = 'en_US:en'
 
        testenv['HGPLAIN'] = ''
 
        testenv['HGRCPATH'] = ''
 
        testenv.update(environ)
 
        p = Popen(command, shell=True, stdout=PIPE, stderr=PIPE, cwd=self.cwd, env=testenv)
 
        stdout, stderr = p.communicate()
 
        if DEBUG:
 
            if stdout:
 
                print('stdout:', stdout)
 
            if stderr:
 
                print('stderr:', stderr)
 
        if not ignoreReturnCode:
 
            assert p.returncode == 0
 
        return stdout, stderr
 

	
 

	
 
def _get_tmp_dir(prefix='vcs_operations-', suffix=''):
 
    return tempfile.mkdtemp(dir=base.TESTS_TMP_PATH, prefix=prefix, suffix=suffix)
 

	
 

	
 
def _add_files(vcs, dest_dir, files_no=3):
 
    """
 
    Generate some files, add it to dest_dir repo and push back
 
    vcs is git or hg and defines what VCS we want to make those files for
 

	
 
    :param vcs:
 
    :param dest_dir:
 
    """
 
    added_file = '%ssetup.py' % _RandomNameSequence().next()
 
    added_file = '%ssetup.py' % next(_RandomNameSequence())
 
    open(os.path.join(dest_dir, added_file), 'a').close()
 
    Command(dest_dir).execute(vcs, 'add', added_file)
 

	
 
    email = 'me@example.com'
 
    if os.name == 'nt':
 
        author_str = 'User <%s>' % email
 
    else:
 
        author_str = 'User ǝɯɐᴎ <%s>' % email
 
    for i in xrange(files_no):
 
        cmd = """echo "added_line%s" >> %s""" % (i, added_file)
 
        Command(dest_dir).execute(cmd)
 
        if vcs == 'hg':
 
            cmd = """hg commit -m "committed new %s" -u "%s" "%s" """ % (
 
                i, author_str, added_file
 
            )
 
        elif vcs == 'git':
 
            cmd = """git commit -m "committed new %s" --author "%s" "%s" """ % (
 
                i, author_str, added_file
 
            )
 
        # git commit needs EMAIL on some machines
 
        Command(dest_dir).execute(cmd, EMAIL=email)
 

	
 
def _add_files_and_push(webserver, vt, dest_dir, clone_url, ignoreReturnCode=False, files_no=3):
 
    _add_files(vt.repo_type, dest_dir, files_no=files_no)
 
    # PUSH it back
 
    stdout = stderr = None
 
    if vt.repo_type == 'hg':
 
        stdout, stderr = Command(dest_dir).execute('hg push -f --verbose', clone_url, ignoreReturnCode=ignoreReturnCode)
 
    elif vt.repo_type == 'git':
 
        stdout, stderr = Command(dest_dir).execute('git push -f --verbose', clone_url, "master", ignoreReturnCode=ignoreReturnCode)
 

	
 
    return stdout, stderr
 

	
 

	
 
def _check_outgoing(vcs, cwd, clone_url):
 
    if vcs == 'hg':
 
        # hg removes the password from default URLs, so we have to provide it here via the clone_url
 
        return Command(cwd).execute('hg -q outgoing', clone_url, ignoreReturnCode=True)
 
    elif vcs == 'git':
 
        Command(cwd).execute('git remote update')
 
        return Command(cwd).execute('git log origin/master..master')
 

	
 

	
 
def set_anonymous_access(enable=True):
 
    user = User.get_default_user()
 
    user.active = enable
 
    Session().commit()
 
    if enable != User.get_default_user().active:
 
        raise Exception('Cannot set anonymous access')
 

	
 

	
 
#==============================================================================
 
# TESTS
 
#==============================================================================
 

	
 

	
 
def _check_proper_git_push(stdout, stderr):
 
    assert 'fatal' not in stderr
 
    assert 'rejected' not in stderr
 
    assert 'Pushing to' in stderr
 
    assert 'master -> master' in stderr
 

	
 

	
 
@pytest.mark.usefixtures("test_context_fixture")
 
class TestVCSOperations(base.TestController):
 

	
 
    @classmethod
 
    def setup_class(cls):
 
        # DISABLE ANONYMOUS ACCESS
 
        set_anonymous_access(False)
 

	
 
    @pytest.fixture()
 
    def testhook_cleanup(self):
 
        yield
 
        # remove hook
 
        for hook in ['prechangegroup', 'pretxnchangegroup', 'preoutgoing', 'changegroup', 'outgoing', 'incoming']:
 
            entry = Ui.get_by_key('hooks', '%s.testhook' % hook)
 
            if entry:
 
                Session().delete(entry)
 
        Session().commit()
 

	
 
    @pytest.fixture(scope="module")
 
    def testfork(self):
 
        # create fork so the repo stays untouched
 
        git_fork_name = u'%s_fork%s' % (base.GIT_REPO, _RandomNameSequence().next())
 
        git_fork_name = u'%s_fork%s' % (base.GIT_REPO, next(_RandomNameSequence()))
 
        fixture.create_fork(base.GIT_REPO, git_fork_name)
 
        hg_fork_name = u'%s_fork%s' % (base.HG_REPO, _RandomNameSequence().next())
 
        hg_fork_name = u'%s_fork%s' % (base.HG_REPO, next(_RandomNameSequence()))
 
        fixture.create_fork(base.HG_REPO, hg_fork_name)
 
        return {'git': git_fork_name, 'hg': hg_fork_name}
 

	
 
    @parametrize_vcs_test
 
    def test_clone_repo_by_admin(self, webserver, vt):
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, _get_tmp_dir())
 

	
 
        if vt.repo_type == 'git':
 
            assert 'Cloning into' in stdout + stderr
 
            assert stderr == '' or stdout == ''
 
        elif vt.repo_type == 'hg':
 
            assert 'requesting all changes' in stdout
 
            assert 'adding changesets' in stdout
 
            assert 'adding manifests' in stdout
 
            assert 'adding file changes' in stdout
 
            assert stderr == ''
 

	
 
    @parametrize_vcs_test_http
 
    def test_clone_wrong_credentials(self, webserver, vt):
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name, password='bad!')
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, _get_tmp_dir(), ignoreReturnCode=True)
 
        if vt.repo_type == 'git':
 
            assert 'fatal: Authentication failed' in stderr
 
        elif vt.repo_type == 'hg':
 
            assert 'abort: authorization failed' in stderr
 

	
 
    def test_clone_git_dir_as_hg(self, webserver):
 
        clone_url = HgHttpVcsTest.repo_url_param(webserver, base.GIT_REPO)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute('hg clone', clone_url, _get_tmp_dir(), ignoreReturnCode=True)
 
        assert 'HTTP Error 404: Not Found' in stderr or "not a valid repository" in stdout and 'abort:' in stderr
 

	
 
    def test_clone_hg_repo_as_git(self, webserver):
 
        clone_url = GitHttpVcsTest.repo_url_param(webserver, base.HG_REPO)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute('git clone', clone_url, _get_tmp_dir(), ignoreReturnCode=True)
 
        assert 'not found' in stderr
 

	
 
    @parametrize_vcs_test
 
    def test_clone_non_existing_path(self, webserver, vt):
 
        clone_url = vt.repo_url_param(webserver, 'trololo')
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, _get_tmp_dir(), ignoreReturnCode=True)
 
        if vt.repo_type == 'git':
 
            assert 'not found' in stderr or 'abort: Access to %r denied' % 'trololo' in stderr
 
        elif vt.repo_type == 'hg':
 
            assert 'HTTP Error 404: Not Found' in stderr or 'abort: no suitable response from remote hg' in stderr and 'remote: abort: Access to %r denied' % 'trololo' in stdout
 

	
 
    @parametrize_vcs_test
 
    def test_push_new_repo(self, webserver, vt):
 
        # Clear the log so we know what is added
 
        UserLog.query().delete()
 
        Session().commit()
 

	
 
        # Create an empty server repo using the API
 
        repo_name = u'new_%s_%s' % (vt.repo_type, _RandomNameSequence().next())
 
        repo_name = u'new_%s_%s' % (vt.repo_type, next(_RandomNameSequence()))
 
        usr = User.get_by_username(base.TEST_USER_ADMIN_LOGIN)
 
        params = {
 
            "id": 7,
 
            "api_key": usr.api_key,
 
            "method": 'create_repo',
 
            "args": dict(repo_name=repo_name,
 
                         owner=base.TEST_USER_ADMIN_LOGIN,
 
                         repo_type=vt.repo_type),
 
        }
 
        req = urllib2.Request(
 
            'http://%s:%s/_admin/api' % webserver.server_address,
 
            data=ascii_bytes(json.dumps(params)),
 
            headers={'content-type': 'application/json'})
 
        response = urllib2.urlopen(req)
 
        result = json.loads(response.read())
 
        # Expect something like:
 
        # {u'result': {u'msg': u'Created new repository `new_XXX`', u'task': None, u'success': True}, u'id': 7, u'error': None}
 
        assert result[u'result'][u'success']
 

	
 
        # Create local clone of the empty server repo
 
        local_clone_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, repo_name)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, local_clone_dir)
 

	
 
        # Make 3 commits and push to the empty server repo.
 
        # The server repo doesn't have any other heads than the
 
        # refs/heads/master we are pushing, but the `git log` in the push hook
 
        # should still list the 3 commits.
 
        stdout, stderr = _add_files_and_push(webserver, vt, local_clone_dir, clone_url=clone_url)
 
        if vt.repo_type == 'git':
 
            _check_proper_git_push(stdout, stderr)
 
        elif vt.repo_type == 'hg':
 
            assert 'pushing to ' in stdout
 
            assert 'remote: added ' in stdout
 

	
 
        # Verify that we got the right events in UserLog. Expect something like:
 
        # <UserLog('id:new_git_XXX:started_following_repo')>
 
        # <UserLog('id:new_git_XXX:user_created_repo')>
 
        # <UserLog('id:new_git_XXX:pull')>
 
        # <UserLog('id:new_git_XXX:push:aed9d4c1732a1927da3be42c47eb9afdc200d427,d38b083a07af10a9f44193486959a96a23db78da,4841ff9a2b385bec995f4679ef649adb3f437622')>
 
        action_parts = [ul.action.split(':', 1) for ul in UserLog.query().order_by(UserLog.user_log_id)]
 
        assert [(t[0], (t[1].count(',') + 1) if len(t) == 2 else 0) for t in action_parts] == ([
 
            (u'started_following_repo', 0),
 
            (u'user_created_repo', 0),
 
            (u'pull', 0),
 
            (u'push', 3)]
 
            if vt.repo_type == 'git' else [
 
            (u'started_following_repo', 0),
 
            (u'user_created_repo', 0),
 
            # (u'pull', 0), # Mercurial outgoing hook is not called for empty clones
 
            (u'push', 3)])
 

	
 
    @parametrize_vcs_test
 
    def test_push_new_file(self, webserver, testfork, vt):
 
        UserLog.query().delete()
 
        Session().commit()
 

	
 
        dest_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 

	
 
        clone_url = vt.repo_url_param(webserver, testfork[vt.repo_type])
 
        stdout, stderr = _add_files_and_push(webserver, vt, dest_dir, clone_url=clone_url)
 

	
 
        if vt.repo_type == 'git':
 
            _check_proper_git_push(stdout, stderr)
 
        elif vt.repo_type == 'hg':
 
            assert 'pushing to' in stdout
 
            assert 'Repository size' in stdout
 
            assert 'Last revision is now' in stdout
 

	
 
        action_parts = [ul.action.split(':', 1) for ul in UserLog.query().order_by(UserLog.user_log_id)]
 
        assert [(t[0], (t[1].count(',') + 1) if len(t) == 2 else 0) for t in action_parts] == \
 
            [(u'pull', 0), (u'push', 3)]
 

	
 
    @parametrize_vcs_test
 
    def test_pull(self, webserver, testfork, vt):
 
        UserLog.query().delete()
 
        Session().commit()
 

	
 
        dest_dir = _get_tmp_dir()
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'init', dest_dir)
 

	
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name)
 
        stdout, stderr = Command(dest_dir).execute(vt.repo_type, 'pull', clone_url)
 

	
 
        if vt.repo_type == 'git':
 
            assert 'FETCH_HEAD' in stderr
 
        elif vt.repo_type == 'hg':
 
            assert 'new changesets' in stdout
 

	
 
        action_parts = [ul.action for ul in UserLog.query().order_by(UserLog.user_log_id)]
 
        assert action_parts == [u'pull']
 

	
 
        # Test handling of URLs with extra '/' around repo_name
 
        stdout, stderr = Command(dest_dir).execute(vt.repo_type, 'pull', clone_url.replace('/' + vt.repo_name, '/./%s/' % vt.repo_name), ignoreReturnCode=True)
 
        if issubclass(vt, HttpVcsTest):
 
            if vt.repo_type == 'git':
 
                # NOTE: when pulling from http://hostname/./vcs_test_git/ , the git client will normalize that and issue an HTTP request to /vcs_test_git/info/refs
 
                assert 'Already up to date.' in stdout
 
            else:
 
                assert vt.repo_type == 'hg'
 
                assert "abort: HTTP Error 404: Not Found" in stderr
 
        else:
 
            assert issubclass(vt, SshVcsTest)
 
            if vt.repo_type == 'git':
 
                assert "abort: Access to './%s' denied" % vt.repo_name in stderr
 
            else:
 
                assert "abort: Access to './%s' denied" % vt.repo_name in stdout
 

	
 
        stdout, stderr = Command(dest_dir).execute(vt.repo_type, 'pull', clone_url.replace('/' + vt.repo_name, '/%s/' % vt.repo_name), ignoreReturnCode=True)
 
        if vt.repo_type == 'git':
 
            assert 'Already up to date.' in stdout
 
        else:
 
            assert vt.repo_type == 'hg'
 
            assert "no changes found" in stdout
 
        assert "denied" not in stderr
 
        assert "denied" not in stdout
 
        assert "404" not in stdout
 

	
 
    @parametrize_vcs_test
 
    def test_push_invalidates_cache(self, webserver, testfork, vt):
 
        pre_cached_tip = [repo.get_api_data()['last_changeset']['short_id'] for repo in Repository.query().filter(Repository.repo_name == testfork[vt.repo_type])]
 

	
 
        key = CacheInvalidation.query().filter(CacheInvalidation.cache_key
 
                                               == testfork[vt.repo_type]).scalar()
 
        if not key:
 
            key = CacheInvalidation(testfork[vt.repo_type], testfork[vt.repo_type])
 
            Session().add(key)
 

	
 
        key.cache_active = True
 
        Session().commit()
 

	
 
        dest_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, testfork[vt.repo_type])
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 

	
 
        stdout, stderr = _add_files_and_push(webserver, vt, dest_dir, files_no=1, clone_url=clone_url)
 

	
 
        if vt.repo_type == 'git':
 
            _check_proper_git_push(stdout, stderr)
 

	
 
        post_cached_tip = [repo.get_api_data()['last_changeset']['short_id'] for repo in Repository.query().filter(Repository.repo_name == testfork[vt.repo_type])]
 
        assert pre_cached_tip != post_cached_tip
 

	
 
        key = CacheInvalidation.query().filter(CacheInvalidation.cache_key
 
                                               == testfork[vt.repo_type]).all()
 
        assert key == []
 

	
 
    @parametrize_vcs_test_http
 
    def test_push_wrong_credentials(self, webserver, vt):
 
        dest_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 

	
 
        clone_url = webserver.repo_url(vt.repo_name, username='bad', password='name')
 
        stdout, stderr = _add_files_and_push(webserver, vt, dest_dir,
 
                                             clone_url=clone_url, ignoreReturnCode=True)
 

	
 
        if vt.repo_type == 'git':
 
            assert 'fatal: Authentication failed' in stderr
 
        elif vt.repo_type == 'hg':
 
            assert 'abort: authorization failed' in stderr
 

	
 
    @parametrize_vcs_test
 
    def test_push_with_readonly_credentials(self, webserver, vt):
 
        UserLog.query().delete()
 
        Session().commit()
 

	
 
        dest_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name, username=base.TEST_USER_REGULAR_LOGIN, password=base.TEST_USER_REGULAR_PASS)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 

	
 
        stdout, stderr = _add_files_and_push(webserver, vt, dest_dir, ignoreReturnCode=True, clone_url=clone_url)
 

	
 
        if vt.repo_type == 'git':
 
            assert 'The requested URL returned error: 403' in stderr or 'abort: Push access to %r denied' % str(vt.repo_name) in stderr
 
        elif vt.repo_type == 'hg':
 
            assert 'abort: HTTP Error 403: Forbidden' in stderr or 'abort: push failed on remote' in stderr and 'remote: Push access to %r denied' % str(vt.repo_name) in stdout
 

	
 
        action_parts = [ul.action.split(':', 1) for ul in UserLog.query().order_by(UserLog.user_log_id)]
 
        assert [(t[0], (t[1].count(',') + 1) if len(t) == 2 else 0) for t in action_parts] == \
 
            [(u'pull', 0)]
 

	
 
    @parametrize_vcs_test
 
    def test_push_back_to_wrong_url(self, webserver, vt):
 
        dest_dir = _get_tmp_dir()
 
        clone_url = vt.repo_url_param(webserver, vt.repo_name)
 
        stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 

	
 
        stdout, stderr = _add_files_and_push(
 
            webserver, vt, dest_dir, clone_url='http://%s:%s/tmp' % (
kallithea/tests/scripts/manual_test_concurrency.py
Show inline comments
 
@@ -13,203 +13,203 @@
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.tests.scripts.manual_test_concurrency
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Test suite for making push/pull operations
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 30, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 

	
 
"""
 

	
 
from __future__ import print_function
 

	
 
import logging
 
import os
 
import shutil
 
import sys
 
import tempfile
 
from os.path import dirname
 
from subprocess import PIPE, Popen
 

	
 
from paste.deploy import appconfig
 
from sqlalchemy import engine_from_config
 

	
 
from kallithea.config.environment import load_environment
 
from kallithea.lib.auth import get_crypt_password
 
from kallithea.model import meta
 
from kallithea.model.base import init_model
 
from kallithea.model.db import Repository, Ui, User
 
from kallithea.tests.base import HG_REPO, TEST_USER_ADMIN_LOGIN, TEST_USER_ADMIN_PASS
 

	
 

	
 
rel_path = dirname(dirname(dirname(dirname(os.path.abspath(__file__)))))
 
conf = appconfig('config:development.ini', relative_to=rel_path)
 
load_environment(conf.global_conf, conf.local_conf)
 

	
 
USER = TEST_USER_ADMIN_LOGIN
 
PASS = TEST_USER_ADMIN_PASS
 
HOST = 'server.local'
 
METHOD = 'pull'
 
DEBUG = True
 
log = logging.getLogger(__name__)
 

	
 

	
 
class Command(object):
 

	
 
    def __init__(self, cwd):
 
        self.cwd = cwd
 

	
 
    def execute(self, cmd, *args):
 
        """Runs command on the system with given ``args``.
 
        """
 

	
 
        command = cmd + ' ' + ' '.join(args)
 
        log.debug('Executing %s', command)
 
        if DEBUG:
 
            print(command)
 
        p = Popen(command, shell=True, stdout=PIPE, stderr=PIPE, cwd=self.cwd)
 
        stdout, stderr = p.communicate()
 
        if DEBUG:
 
            print(stdout, stderr)
 
        return stdout, stderr
 

	
 

	
 
def get_session():
 
    engine = engine_from_config(conf, 'sqlalchemy.')
 
    init_model(engine)
 
    sa = meta.Session
 
    return sa
 

	
 

	
 
def create_test_user(force=True):
 
    print('creating test user')
 
    sa = get_session()
 

	
 
    user = sa.query(User).filter(User.username == USER).scalar()
 

	
 
    if force and user is not None:
 
        print('removing current user')
 
        for repo in sa.query(Repository).filter(Repository.user == user).all():
 
            sa.delete(repo)
 
        sa.delete(user)
 
        sa.commit()
 

	
 
    if user is None or force:
 
        print('creating new one')
 
        new_usr = User()
 
        new_usr.username = USER
 
        new_usr.password = get_crypt_password(PASS)
 
        new_usr.email = 'mail@example.com'
 
        new_usr.name = 'test'
 
        new_usr.lastname = 'lasttestname'
 
        new_usr.active = True
 
        new_usr.admin = True
 
        sa.add(new_usr)
 
        sa.commit()
 

	
 
    print('done')
 

	
 

	
 
def create_test_repo(force=True):
 
    print('creating test repo')
 
    from kallithea.model.repo import RepoModel
 
    sa = get_session()
 

	
 
    user = sa.query(User).filter(User.username == USER).scalar()
 
    if user is None:
 
        raise Exception('user not found')
 

	
 
    repo = sa.query(Repository).filter(Repository.repo_name == HG_REPO).scalar()
 

	
 
    if repo is None:
 
        print('repo not found creating')
 

	
 
        form_data = {'repo_name': HG_REPO,
 
                     'repo_type': 'hg',
 
                     'private': False,
 
                     'clone_uri': ''}
 
        rm = RepoModel()
 
        rm.base_path = '/home/hg'
 
        rm.create(form_data, user)
 

	
 
    print('done')
 

	
 

	
 
def set_anonymous_access(enable=True):
 
    sa = get_session()
 
    user = sa.query(User).filter(User.username == 'default').one()
 
    user.active = enable
 
    sa.add(user)
 
    sa.commit()
 

	
 

	
 
def get_anonymous_access():
 
    sa = get_session()
 
    return sa.query(User).filter(User.username == 'default').one().active
 

	
 

	
 
#==============================================================================
 
# TESTS
 
#==============================================================================
 
def test_clone_with_credentials(no_errors=False, repo=HG_REPO, method=METHOD,
 
                                backend='hg'):
 
    cwd = path = os.path.join(Ui.get_by_key('paths', '/').ui_value, repo)
 

	
 
    try:
 
        shutil.rmtree(path, ignore_errors=True)
 
        os.makedirs(path)
 
        #print 'made dirs %s' % os.path.join(path)
 
    except OSError:
 
        raise
 

	
 
    clone_url = 'http://%(user)s:%(pass)s@%(host)s/%(cloned_repo)s' % \
 
                  {'user': USER,
 
                   'pass': PASS,
 
                   'host': HOST,
 
                   'cloned_repo': repo, }
 

	
 
    dest = tempfile.mktemp(dir=path, prefix='dest-')
 
    if method == 'pull':
 
        stdout, stderr = Command(cwd).execute(backend, method, '--cwd', dest, clone_url)
 
    else:
 
        stdout, stderr = Command(cwd).execute(backend, method, clone_url, dest)
 
        if not no_errors:
 
            if backend == 'hg':
 
                assert """adding file changes""" in stdout, 'no messages about cloning'
 
                assert """abort""" not in stderr, 'got error from clone'
 
            elif backend == 'git':
 
                assert """Cloning into""" in stdout, 'no messages about cloning'
 

	
 

	
 
if __name__ == '__main__':
 
    try:
 
        create_test_user(force=False)
 
        import time
 

	
 
        try:
 
            METHOD = sys.argv[3]
 
        except IndexError:
 
            pass
 

	
 
        try:
 
            backend = sys.argv[4]
 
        except IndexError:
 
            backend = 'hg'
 

	
 
        if METHOD == 'pull':
 
            seq = tempfile._RandomNameSequence().next()
 
            seq = next(tempfile._RandomNameSequence())
 
            test_clone_with_credentials(repo=sys.argv[1], method='clone',
 
                                        backend=backend)
 
        s = time.time()
 
        for i in range(1, int(sys.argv[2]) + 1):
 
            print('take', i)
 
            test_clone_with_credentials(repo=sys.argv[1], method=METHOD,
 
                                        backend=backend)
 
        print('time taken %.3f' % (time.time() - s))
 
    except Exception as e:
 
        sys.exit('stop on %s' % e)
0 comments (0 inline, 0 general)