kallithea Changeset - e025863fae9c

Changeset - e025863fae9c

Parent rev.

Child rev.

[Not reviewed]

default

0 1 0

Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com

diffs: avoid extra copy of diff when trying to create a lazy diff line iterator

Make the generator actually be lazy and avoid creating an extra full copy of
all the lines in a file diff at once.

And consistently call it diff_lines.

1 file changed with 9 insertions and 11 deletions:

kallithea/lib/diffs.py

0 comments (0 inline, 0 general)

kallithea/lib/diffs.py

➞

Show inline comments

@@ @@ -8,50 +8,48 @@ @@
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.diffs
 ~~~~~~~~~~~~~~~~~~~
 Set of diffing helpers, previously part of vcs
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Dec 4, 2011
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import re
 import difflib
 import logging
 from itertools import imap
 from tg.i18n import ugettext as _
 from kallithea.lib.vcs.exceptions import VCSError
 from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 from kallithea.lib.vcs.backends.base import EmptyChangeset
 from kallithea.lib.helpers import escape
 from kallithea.lib.utils2 import safe_unicode
 log = logging.getLogger(__name__)
 def wrap_to_table(html):
     """Given a string with html, return it wrapped in a table, similar to what
     DiffProcessor returns."""
     return '''\
               <table class="code-difftable">
                 <tr class="line no-comment">
                 <td class="lineno new"></td>
                 <td class="code no-comment"><pre>%s</pre></td>
                 </tr>
               </table>''' % html
 def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
@@ @@ -285,64 +283,64 @@ class DiffProcessor(object): @@
     def _get_header(self, diff_chunk):
         """
         Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
             a_path, b_path, similarity_index, rename_from, rename_to,
             old_mode, new_mode, new_file_mode, deleted_file_mode,
             a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
         """
         match = None
         if self.vcs == 'git':
             match = self._git_header_re.match(diff_chunk)
         elif self.vcs == 'hg':
             match = self._hg_header_re.match(diff_chunk)
         if match is None:
             raise Exception('diff not recognized as valid %s diff' % self.vcs)
         meta_info = match.groupdict()
         rest = diff_chunk[match.end():]
         if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
         difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
         return meta_info, difflines
         diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
         return meta_info, diff_lines
     def _parse_gitdiff(self, inline_diff=True):
         """Parse self._diff and return a list of dicts with meta info and chunks for each file.
         If diff is truncated, wrap it in LimitedDiffContainer.
         Optionally, do an extra pass and to extra markup of one-liner changes.
         """
         _files = [] # list of dicts with meta info and chunks
         diff_container = lambda arg: arg
         starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
         starts.append(len(self._diff))
         for start, end in zip(starts, starts[1:]):
-            head, diff = self._get_header(buffer(self._diff, start, end - start))
+            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
             op = None
             stats = {
                 'added': 0,
                 'deleted': 0,
                 'binary': False,
                 'ops': {},
+            }
             if head['deleted_file_mode']:
                 op = 'D'
                 stats['binary'] = True
                 stats['ops'][DEL_FILENODE] = 'deleted file'
             elif head['new_file_mode']:
                 op = 'A'
                 stats['binary'] = True
                 stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
             else:  # modify operation, can be cp, rename, chmod
                 # CHMOD
                 if head['new_mode'] and head['old_mode']:
                     op = 'M'
                     stats['binary'] = True
                     stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
@@ @@ -360,49 +358,49 @@ class DiffProcessor(object): @@
                     stats['binary'] = True
                     stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
                                         % (head['copy_from'], head['copy_to']))
                 # FALL BACK: detect missed old style add or remove
                 if op is None:
                     if not head['a_file'] and head['b_file']:
                         op = 'A'
                         stats['binary'] = True
                         stats['ops'][NEW_FILENODE] = 'new file'
                     elif head['a_file'] and not head['b_file']:
                         op = 'D'
                         stats['binary'] = True
                         stats['ops'][DEL_FILENODE] = 'deleted file'
                 # it's not ADD not DELETE
                 if op is None:
                     op = 'M'
                     stats['binary'] = True
                     stats['ops'][MOD_FILENODE] = 'modified file'
             # a real non-binary diff
             if head['a_file'] or head['b_file']:
                 try:
-                    chunks, added, deleted = self._parse_lines(diff)
+                    chunks, added, deleted = self._parse_lines(diff_lines)
                     stats['binary'] = False
                     stats['added'] = added
                     stats['deleted'] = deleted
                     # explicit mark that it's a modified file
                     if op == 'M':
                         stats['ops'][MOD_FILENODE] = 'modified file'
                 except DiffLimitExceeded:
                     diff_container = lambda _diff: \
                         LimitedDiffContainer(self.diff_limit,
                                             self.cur_diff_size, _diff)
                     break
             else:  # Git binary patch (or empty diff)
                 # Git binary patch
                 if head['bin_patch']:
                     stats['ops'][BIN_FILENODE] = 'binary diff not shown'
                 chunks = []
             if op == 'D' and chunks:
                 # a way of seeing deleted content could perhaps be nice - but
                 # not with the current UI
                 chunks = []
             chunks.insert(0, [{
@@ @@ -438,134 +436,134 @@ class DiffProcessor(object): @@
                             peekline = lineiter.next()
                         delline = peekline
                         peekline = lineiter.next()
                         # if not followed by add, eat all following del lines
                         if peekline['action'] != 'add':
                             while peekline['action'] == 'del':
                                 peekline = lineiter.next()
                             continue
                         # found an add - make sure it is the only one
                         addline = peekline
                         try:
                             peekline = lineiter.next()
                         except StopIteration:
                             # add was last line - ok
                             self._highlight_inline_diff(delline, addline)
                             raise
                         if peekline['action'] != 'add':
                             # there was only one add line - ok
                             self._highlight_inline_diff(delline, addline)
                 except StopIteration:
                     pass
         return diff_container(_files)
-    def _parse_lines(self, diff):
+    def _parse_lines(self, diff_lines):
         """
         Given an iterator of diff body lines, parse them and return a dict per
         line and added/removed totals.
         """
         added = deleted = 0
         old_line = old_end = new_line = new_end = None
         try:
             chunks = []
-            line = diff.next()
+            line = diff_lines.next()
             while True:
                 lines = []
                 chunks.append(lines)
                 match = self._chunk_re.match(line)
                 if not match:
                     raise Exception('error parsing diff @@ line %r' % line)
                 gr = match.groups()
                 (old_line, old_end,
                  new_line, new_end) = [int(x or 1) for x in gr[:-1]]
                 old_line -= 1
                 new_line -= 1
                 context = len(gr) == 5
                 old_end += old_line
                 new_end += new_line
                 if context:
                     # skip context only if it's first line
                     if int(gr[0]) > 1:
                         lines.append({
                             'old_lineno': '...',
                             'new_lineno': '...',
                             'action':     'context',
                             'line':       line,
                         })
-                line = diff.next()
+                line = diff_lines.next()
                 while old_line < old_end or new_line < new_end:
                     if not line:
                         raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
                     affects_old = affects_new = False
                     command = line[0]
                     if command == '+':
                         affects_new = True
                         action = 'add'
                         added += 1
                     elif command == '-':
                         affects_old = True
                         action = 'del'
                         deleted += 1
                     elif command == ' ':
                         affects_old = affects_new = True
                         action = 'unmod'
                     else:
                         raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
                     if not self._newline_marker.match(line):
                         old_line += affects_old
                         new_line += affects_new
                         lines.append({
                             'old_lineno':   affects_old and old_line or '',
                             'new_lineno':   affects_new and new_line or '',
                             'action':       action,
                             'line':         line[1:],
                         })
-                    line = diff.next()
+                    line = diff_lines.next()
                     if self._newline_marker.match(line):
                         # we need to append to lines, since this is not
                         # counted in the line specs of diff
                         lines.append({
                             'old_lineno':   '...',
                             'new_lineno':   '...',
                             'action':       'context',
                             'line':         line,
                         })
-                        line = diff.next()
+                        line = diff_lines.next()
                 if old_line > old_end:
                     raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
                 if new_line > new_end:
                     raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
         except StopIteration:
             pass
         if old_line != old_end or new_line != new_end:
             raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
         return chunks, added, deleted
     def _safe_id(self, idstring):
         """Make a string safe for including in an id attribute.
         The HTML spec says that id attributes 'must begin with
         a letter ([A-Za-z]) and may be followed by any number
         of letters, digits ([0-9]), hyphens ("-"), underscores
         ("_"), colons (":"), and periods (".")'. These regexps
         are slightly over-zealous, in that they remove colons
         and periods unnecessarily.
         Whitespace is transformed into underscores, and then
         anything which is not a hyphen or a character that
         matches \w (alphanumerics and underscore) is removed.

0 comments (0 inline, 0 general)