Changeset - 0d22458bd360
[Not reviewed]
beta
0 1 0
Mads Kiilerich - 13 years ago 2013-04-10 02:55:38
madski@unity3d.com
diff parser: more correct detection and reporting of binary git diffs

git mode is not just used for binary diffs.
1 file changed with 17 insertions and 13 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/diffs.py
Show inline comments
 
@@ -309,150 +309,154 @@ class DiffProcessor(object):
 
                else:
 
                    tag = 'del'
 
                l['line'] = '%s<%s>%s</%s>%s' % (
 
                    l['line'][:start],
 
                    tag,
 
                    l['line'][start:last],
 
                    tag,
 
                    l['line'][last:]
 
                )
 
            do(line)
 
            do(next_)
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        parses the diff header, and returns parts, and leftover diff
 
        parts consists of 14 elements::
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        :param diff_chunk:
 
        :type diff_chunk:
 
        """
 

	
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
            diff = diff_chunk[match.end():]
 
            return match.groupdict(), imap(self._escaper, diff.splitlines(1))
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
            diff = diff_chunk[match.end():]
 
            return match.groupdict(), imap(self._escaper, diff.splitlines(1))
 
        else:
 
            raise Exception('VCS type %s is not supported' % self.vcs)
 

	
 
    def _clean_line(self, line, command):
 
        if command in ['+', '-', ' ']:
 
            #only modify the line if it's actually a diff thing
 
            line = line[1:]
 
        return line
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
        _files = []
 
        diff_container = lambda arg: arg
 

	
 
        ##split the diff in chunks of separate --git a/file b/file chunks
 
        for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
 
            binary = False
 
            binary_msg = 'unknown binary'
 
            head, diff = self._get_header(raw_diff)
 

	
 
            op = None
 
            stats = None
 
            msg = None
 

	
 
            if not head['a_file'] and head['b_file']:
 
                op = 'A'
 
            elif head['a_file'] and head['b_file']:
 
                op = 'M'
 
            elif head['a_file'] and not head['b_file']:
 
                op = 'D'
 
            else:
 
                #probably we're dealing with a binary file 1
 
                binary = True
 
                if head['deleted_file_mode']:
 
                    op = 'D'
 
                    stats = ['b', DEL_FILENODE]
 
                    binary_msg = 'deleted binary file'
 
                    msg = 'deleted file'
 
                elif head['new_file_mode']:
 
                    op = 'A'
 
                    stats = ['b', NEW_FILENODE]
 
                    binary_msg = 'new binary file %s' % head['new_file_mode']
 
                    msg = 'new file %s' % head['new_file_mode']
 
                else:
 
                    if head['new_mode'] and head['old_mode']:
 
                        stats = ['b', CHMOD_FILENODE]
 
                        op = 'M'
 
                        binary_msg = ('modified binary file chmod %s => %s'
 
                        msg = ('modified file chmod %s => %s'
 
                                      % (head['old_mode'], head['new_mode']))
 
                    elif (head['rename_from'] and head['rename_to']
 
                          and head['rename_from'] != head['rename_to']):
 
                        stats = ['b', RENAMED_FILENODE]
 
                        op = 'M'
 
                        binary_msg = ('file renamed from %s to %s'
 
                        msg = ('file renamed from %s to %s'
 
                                      % (head['rename_from'], head['rename_to']))
 
                    else:
 
                        stats = ['b', MOD_FILENODE]
 
                        op = 'M'
 
                        binary_msg = 'modified binary file'
 
                        msg = 'modified file'
 

	
 
            if not binary:
 
            if head['a_file'] or head['b_file']: # a real diff
 
                try:
 
                    chunks, stats = self._parse_lines(diff)
 
                except DiffLimitExceeded:
 
                    diff_container = lambda _diff: LimitedDiffContainer(
 
                                                self.diff_limit,
 
                                                self.cur_diff_size,
 
                                                _diff)
 
                    break
 
            else:
 
            else: # GIT binary patch (or empty diff)
 
                chunks = []
 
                chunks.append([{
 
                if not msg: # don't overwrite more important message
 
                    msg = 'binary diff not shown'
 

	
 
            if msg:
 
                chunks.insert(0, [{
 
                    'old_lineno': '',
 
                    'new_lineno': '',
 
                    'action':     'binary',
 
                    'line':       binary_msg,
 
                    'line':       msg,
 
                }])
 

	
 
            _files.append({
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
 

	
 
        if not inline_diff:
 
            return diff_container(sorted(_files, key=sorter))
 

	
 
        # highlight inline changes
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    while 1:
 
                        line = lineiter.next()
 
                        if line['action'] not in ['unmod', 'context']:
 
                            nextline = lineiter.next()
 
                            if nextline['action'] in ['unmod', 'context'] or \
 
                               nextline['action'] == line['action']:
 
                                continue
 
                            self.differ(line, nextline)
 
                except StopIteration:
 
                    pass
 

	
 
        return diff_container(sorted(_files, key=sorter))
 

	
 
    def _parse_udiff(self, inline_diff=True):
 
        raise NotImplementedError()
 

	
 
    def _parse_lines(self, diff):
 
        """
 
        Parse the diff an return data for the template.
 
        """
 

	
 
        lineiter = iter(diff)
 
        stats = [0, 0]
 

	
 
        try:
 
            chunks = []
 
            line = lineiter.next()
0 comments (0 inline, 0 general)