Changeset - e025863fae9c
[Not reviewed]
default
0 1 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: avoid extra copy of diff when trying to create a lazy diff line iterator

Make the generator actually be lazy and avoid creating an extra full copy of
all the lines in a file diff at once.

And consistently call it diff_lines.
1 file changed with 9 insertions and 11 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -20,26 +20,24 @@ Set of diffing helpers, previously part 
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 4, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import re
 
import difflib
 
import logging
 

	
 
from itertools import imap
 

	
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def wrap_to_table(html):
 
@@ -297,40 +295,40 @@ class DiffProcessor(object):
 
        """
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        meta_info = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, difflines
 
        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, diff_lines
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        If diff is truncated, wrap it in LimitedDiffContainer.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 
        diff_container = lambda arg: arg
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            head, diff = self._get_header(buffer(self._diff, start, end - start))
 
            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'D'
 
                stats['binary'] = True
 
@@ -372,25 +370,25 @@ class DiffProcessor(object):
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                try:
 
                    chunks, added, deleted = self._parse_lines(diff)
 
                    chunks, added, deleted = self._parse_lines(diff_lines)
 
                    stats['binary'] = False
 
                    stats['added'] = added
 
                    stats['deleted'] = deleted
 
                    # explicit mark that it's a modified file
 
                    if op == 'M':
 
                        stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
                except DiffLimitExceeded:
 
                    diff_container = lambda _diff: \
 
                        LimitedDiffContainer(self.diff_limit,
 
                                            self.cur_diff_size, _diff)
 
                    break
 
@@ -450,35 +448,35 @@ class DiffProcessor(object):
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            self._highlight_inline_diff(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            self._highlight_inline_diff(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return diff_container(_files)
 

	
 
    def _parse_lines(self, diff):
 
    def _parse_lines(self, diff_lines):
 
        """
 
        Given an iterator of diff body lines, parse them and return a dict per
 
        line and added/removed totals.
 
        """
 
        added = deleted = 0
 
        old_line = old_end = new_line = new_end = None
 

	
 
        try:
 
            chunks = []
 
            line = diff.next()
 
            line = diff_lines.next()
 

	
 
            while True:
 
                lines = []
 
                chunks.append(lines)
 

	
 
                match = self._chunk_re.match(line)
 

	
 
                if not match:
 
                    raise Exception('error parsing diff @@ line %r' % line)
 

	
 
                gr = match.groups()
 
                (old_line, old_end,
 
@@ -491,25 +489,25 @@ class DiffProcessor(object):
 
                new_end += new_line
 

	
 
                if context:
 
                    # skip context only if it's first line
 
                    if int(gr[0]) > 1:
 
                        lines.append({
 
                            'old_lineno': '...',
 
                            'new_lineno': '...',
 
                            'action':     'context',
 
                            'line':       line,
 
                        })
 

	
 
                line = diff.next()
 
                line = diff_lines.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    if not line:
 
                        raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                    affects_old = affects_new = False
 

	
 
                    command = line[0]
 
                    if command == '+':
 
                        affects_new = True
 
                        action = 'add'
 
                        added += 1
 
@@ -524,36 +522,36 @@ class DiffProcessor(object):
 
                        raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         line[1:],
 
                        })
 

	
 
                    line = diff.next()
 
                    line = diff_lines.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line,
 
                        })
 
                        line = diff.next()
 
                        line = diff_lines.next()
 
                if old_line > old_end:
 
                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, added, deleted
 

	
 
    def _safe_id(self, idstring):
0 comments (0 inline, 0 general)