Changeset - 24a9bec8138c
[Not reviewed]
default
0 6 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: inline prepare() into __init__ and make the result available as .parsed

Make it more clear what the DiffProcessor is: Something that works on a raw
diff as input, mainly compute when initialized, and returns an object where the
result is available in different ways.
6 files changed with 29 insertions and 49 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/changeset.py
Show inline comments
 
@@ -270,29 +270,28 @@ class ChangesetController(BaseRepoContro
 
            cs1 = changeset.parents[0].raw_id if changeset.parents else EmptyChangeset().raw_id
 
            context_lcl = get_line_ctx('', request.GET)
 
            ign_whitespace_lcl = get_ignore_ws('', request.GET)
 

	
 
            raw_diff = c.db_repo_scm_instance.get_diff(cs1, cs2,
 
                ignore_whitespace=ign_whitespace_lcl, context=context_lcl)
 
            diff_limit = None if c.fulldiff else self.cut_off_limit
 
            file_diff_data = []
 
            if method == 'show':
 
                diff_processor = diffs.DiffProcessor(raw_diff,
 
                                                     vcs=c.db_repo_scm_instance.alias,
 
                                                     diff_limit=diff_limit)
 
                _parsed = diff_processor.prepare()
 
                c.limited_diff = False
 
                if isinstance(_parsed, LimitedDiffContainer):
 
                if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
                    c.limited_diff = True
 
                for f in _parsed:
 
                for f in diff_processor.parsed:
 
                    st = f['stats']
 
                    c.lines_added += st['added']
 
                    c.lines_deleted += st['deleted']
 
                    filename = f['filename']
 
                    fid = h.FID(changeset.raw_id, filename)
 
                    url_fid = h.FID('', filename)
 
                    diff = diff_processor.as_html(enable_comments=enable_comments,
 
                                                  parsed_lines=[f])
 
                    file_diff_data.append((fid, url_fid, f['operation'], f['old_filename'], filename, diff, st))
 
            else:
 
                # downloads/raw we only need RAW diff nothing else
 
                file_diff_data.append(('', None, None, None, raw_diff, None))
kallithea/controllers/compare.py
Show inline comments
 
@@ -258,37 +258,36 @@ class CompareController(BaseRepoControll
 
        else: # comparing tips, not necessarily linearly related
 
            if org_repo != other_repo:
 
                # TODO: we could do this by using hg unionrepo
 
                log.error('cannot compare across repos %s and %s', org_repo, other_repo)
 
                h.flash(_('Cannot compare repositories without using common ancestor'), category='error')
 
                raise HTTPBadRequest
 
            rev1 = c.a_rev
 

	
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        log.debug('running diff between %s and %s in %s',
 
                  rev1, c.cs_rev, org_repo.scm_instance.path)
 
        txtdiff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
        raw_diff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=False,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        return render('compare/compare_diff.html')
kallithea/controllers/feed.py
Show inline comments
 
@@ -65,31 +65,31 @@ class FeedController(BaseRepoController)
 
        # branches, tags, bookmarks
 
        if cs.branch:
 
            desc_msg.append('branch: %s<br/>' % cs.branch)
 
        for book in cs.bookmarks:
 
            desc_msg.append('bookmark: %s<br/>' % book)
 
        for tag in cs.tags:
 
            desc_msg.append('tag: %s<br/>' % tag)
 

	
 
        changes = []
 
        diff_limit = safe_int(CONFIG.get('rss_cut_off_limit', 32 * 1024))
 
        raw_diff = cs.diff()
 
        diff_processor = DiffProcessor(raw_diff,
 
                                       diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare(inline_diff=False)
 
                                       diff_limit=diff_limit,
 
                                       inline_diff=False)
 
        limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            limited_diff = True
 

	
 
        for st in _parsed:
 
        for st in diff_processor.parsed:
 
            st.update({'added': st['stats']['added'],
 
                       'removed': st['stats']['deleted']})
 
            changes.append('\n %(operation)s %(filename)s '
 
                           '(%(added)s lines added, %(removed)s lines removed)'
 
                            % st)
 
        if limited_diff:
 
            changes = changes + ['\n ' +
 
                                 _('Changeset was too big and was cut off...')]
 

	
 
        # rev link
 
        _url = h.canonical_url('changeset_home', repo_name=c.db_repo.repo_name,
 
                   revision=cs.raw_id)
kallithea/controllers/pullrequests.py
Show inline comments
 
@@ -583,41 +583,40 @@ class PullrequestsController(BaseRepoCon
 

	
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        fulldiff = request.GET.get('fulldiff')
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        # we swap org/other ref since we run a simple diff on one repo
 
        log.debug('running diff between %s and %s in %s',
 
                  c.a_rev, c.cs_rev, org_scm_instance.path)
 
        try:
 
            txtdiff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
            raw_diff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
                                                ignore_whitespace=ignore_whitespace,
 
                                                context=line_context)
 
        except ChangesetDoesNotExistError:
 
            txtdiff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
            raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 

	
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=True,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        # inline comments
 
        c.inline_cnt = 0
 
        c.inline_comments = cc_model.get_inline_comments(
kallithea/lib/diffs.py
Show inline comments
 
@@ -63,31 +63,30 @@ def wrapped_diff(filenode_old, filenode_
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    a_path = filenode_old.path # default, might be overriden by actual rename in diff
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 

	
 
    elif diff_limit != -1 and (
 
            diff_limit is None or
 
            (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
        raw_diff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff)
 
        _parsed = diff_processor.prepare()
 
        if _parsed: # there should be exactly one element, for the specified file
 
            f = _parsed[0]
 
        diff_processor = DiffProcessor(raw_diff)
 
        if diff_processor.parsed: # there should be exactly one element, for the specified file
 
            f = diff_processor.parsed[0]
 
            op = f['operation']
 
            a_path = f['old_filename']
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 

	
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 

	
 
    if not diff:
 
@@ -194,45 +193,43 @@ class DiffProcessor(object):
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    # Used for inline highlighter word split, must match the substitutions in _escaper
 
    _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None):
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _escaper(self, string):
 
        """
 
        Do HTML escaping/markup and check the diff limit
 
        """
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
@@ -298,25 +295,25 @@ class DiffProcessor(object):
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        meta_info = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, diff_lines
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        If diff is truncated, wrap it in LimitedDiffContainer.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 
        diff_container = lambda arg: arg
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 
@@ -566,57 +563,45 @@ class DiffProcessor(object):
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'll return a list
 
        of dicts with diff information
 
        """
 
        parsed = self._parse_gitdiff(inline_diff=inline_diff)
 
        self.parsed = True
 
        self.parsed_diff = parsed
 
        return parsed
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                old_lineno_class='lineno old', new_lineno_class='lineno new',
 
                no_lineno_class='lineno',
 
                code_class='code', enable_comments=False, parsed_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if not self.parsed:
 
            self.prepare()
 

	
 
        diff_lines = self.parsed_diff
 
        diff_lines = self.parsed
 
        if parsed_lines:
 
            diff_lines = parsed_lines
 

	
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 

	
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -266,39 +266,37 @@ DIFF_FIXTURES = {
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
}
 

	
 

	
 
class TestDiffLib(TestController):
 

	
 
    @parametrize('diff_fixture', DIFF_FIXTURES)
 
    def test_diff(self, diff_fixture):
 
        diff = fixture.load_resource(diff_fixture, strip=False)
 
        raw_diff = fixture.load_resource(diff_fixture, strip=False)
 
        vcs = 'hg'
 
        if diff_fixture.startswith('git_'):
 
            vcs = 'git'
 
        diff_proc = DiffProcessor(diff, vcs=vcs)
 
        diff_proc_d = diff_proc.prepare()
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
 
        diff_processor = DiffProcessor(raw_diff, vcs=vcs)
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        assert expected_data == data
 

	
 
    def test_diff_markup(self):
 
        diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        chunks = diff_proc_d[0]['chunks']
 
        raw_diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_processor = DiffProcessor(raw_diff)
 
        chunks = diff_processor.parsed[0]['chunks']
 
        assert not chunks[0]
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        print s
 
        assert s == r'''
 
context ... ... u'@@ -51,6 +51,13 @@\n'
 
unmod    51  51 u'<u>\t</u>begin();\n'
 
unmod    52  52 u'<u>\t</u>\n'
 
add      53     u'<u>\t</u>int foo;<u class="cr"></u>\n'
0 comments (0 inline, 0 general)