Changeset - 24a9bec8138c
[Not reviewed]
default
0 6 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: inline prepare() into __init__ and make the result available as .parsed

Make it more clear what the DiffProcessor is: Something that works on a raw
diff as input, mainly compute when initialized, and returns an object where the
result is available in different ways.
6 files changed with 29 insertions and 49 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/changeset.py
Show inline comments
 
@@ -234,101 +234,100 @@ class ChangesetController(BaseRepoContro
 
        c.changes = OrderedDict()
 

	
 
        c.lines_added = 0  # count of lines added
 
        c.lines_deleted = 0  # count of lines removes
 

	
 
        c.changeset_statuses = ChangesetStatus.STATUSES
 
        comments = dict()
 
        c.statuses = []
 
        c.inline_comments = []
 
        c.inline_cnt = 0
 

	
 
        # Iterate over ranges (default changeset view is always one changeset)
 
        for changeset in c.cs_ranges:
 
            if method == 'show':
 
                c.statuses.extend([ChangesetStatusModel().get_status(
 
                            c.db_repo.repo_id, changeset.raw_id)])
 

	
 
                # Changeset comments
 
                comments.update((com.comment_id, com)
 
                                for com in ChangesetCommentsModel()
 
                                .get_comments(c.db_repo.repo_id,
 
                                              revision=changeset.raw_id))
 

	
 
                # Status change comments - mostly from pull requests
 
                comments.update((st.comment_id, st.comment)
 
                                for st in ChangesetStatusModel()
 
                                .get_statuses(c.db_repo.repo_id,
 
                                              changeset.raw_id, with_revisions=True)
 
                                if st.comment_id is not None)
 

	
 
                inlines = ChangesetCommentsModel() \
 
                            .get_inline_comments(c.db_repo.repo_id,
 
                                                 revision=changeset.raw_id)
 
                c.inline_comments.extend(inlines)
 

	
 
            cs2 = changeset.raw_id
 
            cs1 = changeset.parents[0].raw_id if changeset.parents else EmptyChangeset().raw_id
 
            context_lcl = get_line_ctx('', request.GET)
 
            ign_whitespace_lcl = get_ignore_ws('', request.GET)
 

	
 
            raw_diff = c.db_repo_scm_instance.get_diff(cs1, cs2,
 
                ignore_whitespace=ign_whitespace_lcl, context=context_lcl)
 
            diff_limit = None if c.fulldiff else self.cut_off_limit
 
            file_diff_data = []
 
            if method == 'show':
 
                diff_processor = diffs.DiffProcessor(raw_diff,
 
                                                     vcs=c.db_repo_scm_instance.alias,
 
                                                     diff_limit=diff_limit)
 
                _parsed = diff_processor.prepare()
 
                c.limited_diff = False
 
                if isinstance(_parsed, LimitedDiffContainer):
 
                if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
                    c.limited_diff = True
 
                for f in _parsed:
 
                for f in diff_processor.parsed:
 
                    st = f['stats']
 
                    c.lines_added += st['added']
 
                    c.lines_deleted += st['deleted']
 
                    filename = f['filename']
 
                    fid = h.FID(changeset.raw_id, filename)
 
                    url_fid = h.FID('', filename)
 
                    diff = diff_processor.as_html(enable_comments=enable_comments,
 
                                                  parsed_lines=[f])
 
                    file_diff_data.append((fid, url_fid, f['operation'], f['old_filename'], filename, diff, st))
 
            else:
 
                # downloads/raw we only need RAW diff nothing else
 
                file_diff_data.append(('', None, None, None, raw_diff, None))
 
            c.changes[changeset.raw_id] = (cs1, cs2, file_diff_data)
 

	
 
        # sort comments in creation order
 
        c.comments = [com for com_id, com in sorted(comments.items())]
 

	
 
        # count inline comments
 
        for __, lines in c.inline_comments:
 
            for comments in lines.values():
 
                c.inline_cnt += len(comments)
 

	
 
        if len(c.cs_ranges) == 1:
 
            c.changeset = c.cs_ranges[0]
 
            c.parent_tmpl = ''.join(['# Parent  %s\n' % x.raw_id
 
                                     for x in c.changeset.parents])
 
        if method == 'download':
 
            response.content_type = 'text/plain'
 
            response.content_disposition = 'attachment; filename=%s.diff' \
 
                                            % revision[:12]
 
            return raw_diff
 
        elif method == 'patch':
 
            response.content_type = 'text/plain'
 
            c.diff = safe_unicode(raw_diff)
 
            return render('changeset/patch_changeset.html')
 
        elif method == 'raw':
 
            response.content_type = 'text/plain'
 
            return raw_diff
 
        elif method == 'show':
 
            self.__load_data()
 
            if len(c.cs_ranges) == 1:
 
                return render('changeset/changeset.html')
 
            else:
 
                c.cs_ranges_org = None
 
                c.cs_comments = {}
 
                revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
                c.jsdata = graph_data(c.db_repo_scm_instance, revs)
 
                return render('changeset/changeset_range.html')
kallithea/controllers/compare.py
Show inline comments
 
@@ -222,73 +222,72 @@ class CompareController(BaseRepoControll
 
            c.a_repo.scm_instance.alias, c.a_repo.scm_instance, c.a_rev,
 
            c.cs_repo.scm_instance, c.cs_rev)
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
        c.jsdata = graph_data(c.cs_repo.scm_instance, revs)
 

	
 
        if partial:
 
            return render('compare/compare_cs.html')
 

	
 
        org_repo = c.a_repo
 
        other_repo = c.cs_repo
 

	
 
        if merge:
 
            rev1 = msg = None
 
            if not c.cs_ranges:
 
                msg = _('Cannot show empty diff')
 
            elif not c.ancestors:
 
                msg = _('No ancestor found for merge diff')
 
            elif len(c.ancestors) == 1:
 
                rev1 = c.ancestors[0]
 
            else:
 
                msg = _('Multiple merge ancestors found for merge compare')
 
            if rev1 is None:
 
                h.flash(msg, category='error')
 
                log.error(msg)
 
                raise HTTPNotFound
 

	
 
            # case we want a simple diff without incoming changesets,
 
            # previewing what will be merged.
 
            # Make the diff on the other repo (which is known to have other_rev)
 
            log.debug('Using ancestor %s as rev1 instead of %s',
 
                      rev1, c.a_rev)
 
            org_repo = other_repo
 
        else: # comparing tips, not necessarily linearly related
 
            if org_repo != other_repo:
 
                # TODO: we could do this by using hg unionrepo
 
                log.error('cannot compare across repos %s and %s', org_repo, other_repo)
 
                h.flash(_('Cannot compare repositories without using common ancestor'), category='error')
 
                raise HTTPBadRequest
 
            rev1 = c.a_rev
 

	
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        log.debug('running diff between %s and %s in %s',
 
                  rev1, c.cs_rev, org_repo.scm_instance.path)
 
        txtdiff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
        raw_diff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=False,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        return render('compare/compare_diff.html')
kallithea/controllers/feed.py
Show inline comments
 
@@ -29,103 +29,103 @@ Original author and date, and relevant c
 
import logging
 

	
 
from tg import response, tmpl_context as c
 
from tg.i18n import ugettext as _
 

	
 
from beaker.cache import cache_region, region_invalidate
 
from webhelpers.feedgenerator import Atom1Feed, Rss201rev2Feed
 

	
 
from kallithea import CONFIG
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import LoginRequired, HasRepoPermissionLevelDecorator
 
from kallithea.lib.base import BaseRepoController
 
from kallithea.lib.diffs import DiffProcessor, LimitedDiffContainer
 
from kallithea.model.db import CacheInvalidation
 
from kallithea.lib.utils2 import safe_int, str2bool, safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
language = 'en-us'
 
ttl = "5"
 

	
 

	
 
class FeedController(BaseRepoController):
 

	
 
    @LoginRequired(api_access=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def _before(self, *args, **kwargs):
 
        super(FeedController, self)._before(*args, **kwargs)
 

	
 
    def _get_title(self, cs):
 
        return h.shorter(cs.message, 160)
 

	
 
    def __get_desc(self, cs):
 
        desc_msg = [(_('%s committed on %s')
 
                     % (h.person(cs.author), h.fmt_date(cs.date))) + '<br/>']
 
        # branches, tags, bookmarks
 
        if cs.branch:
 
            desc_msg.append('branch: %s<br/>' % cs.branch)
 
        for book in cs.bookmarks:
 
            desc_msg.append('bookmark: %s<br/>' % book)
 
        for tag in cs.tags:
 
            desc_msg.append('tag: %s<br/>' % tag)
 

	
 
        changes = []
 
        diff_limit = safe_int(CONFIG.get('rss_cut_off_limit', 32 * 1024))
 
        raw_diff = cs.diff()
 
        diff_processor = DiffProcessor(raw_diff,
 
                                       diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare(inline_diff=False)
 
                                       diff_limit=diff_limit,
 
                                       inline_diff=False)
 
        limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            limited_diff = True
 

	
 
        for st in _parsed:
 
        for st in diff_processor.parsed:
 
            st.update({'added': st['stats']['added'],
 
                       'removed': st['stats']['deleted']})
 
            changes.append('\n %(operation)s %(filename)s '
 
                           '(%(added)s lines added, %(removed)s lines removed)'
 
                            % st)
 
        if limited_diff:
 
            changes = changes + ['\n ' +
 
                                 _('Changeset was too big and was cut off...')]
 

	
 
        # rev link
 
        _url = h.canonical_url('changeset_home', repo_name=c.db_repo.repo_name,
 
                   revision=cs.raw_id)
 
        desc_msg.append('changeset: <a href="%s">%s</a>' % (_url, cs.raw_id[:8]))
 

	
 
        desc_msg.append('<pre>')
 
        desc_msg.append(h.urlify_text(cs.message))
 
        desc_msg.append('\n')
 
        desc_msg.extend(changes)
 
        if str2bool(CONFIG.get('rss_include_diff', False)):
 
            desc_msg.append('\n\n')
 
            desc_msg.append(raw_diff)
 
        desc_msg.append('</pre>')
 
        return map(safe_unicode, desc_msg)
 

	
 
    def atom(self, repo_name):
 
        """Produce an atom-1.0 feed via feedgenerator module"""
 

	
 
        @cache_region('long_term', '_get_feed_from_cache')
 
        def _get_feed_from_cache(key, kind):
 
            feed = Atom1Feed(
 
                title=_('%s %s feed') % (c.site_name, repo_name),
 
                link=h.canonical_url('summary_home', repo_name=repo_name),
 
                description=_('Changes on %s repository') % repo_name,
 
                language=language,
 
                ttl=ttl
 
            )
 

	
 
            rss_items_per_page = safe_int(CONFIG.get('rss_items_per_page', 20))
 
            for cs in reversed(list(c.db_repo_scm_instance[-rss_items_per_page:])):
 
                feed.add_item(title=self._get_title(cs),
 
                              link=h.canonical_url('changeset_home', repo_name=repo_name,
 
                                       revision=cs.raw_id),
 
                              author_name=cs.author,
 
                              description=''.join(self.__get_desc(cs)),
 
                              pubdate=cs.date,
 
                              )
 

	
 
            response.content_type = feed.mime_type
kallithea/controllers/pullrequests.py
Show inline comments
 
@@ -547,113 +547,112 @@ class PullrequestsController(BaseRepoCon
 
                                                               org_scm_instance.path,
 
                                                               other_scm_instance.path)
 
                        else:
 
                            hgrepo = org_scm_instance._repo
 
                        show = set(hgrepo.revs('::%ld & !::parents(%s) & !::%s',
 
                                               avail_revs, revs[0], targethead))
 
                        c.update_msg = _('The following additional changes are available on %s:') % c.cs_branch_name
 
                    else:
 
                        show = set()
 
                        avail_revs = set() # drop revs[0]
 
                        c.update_msg = _('No additional changesets found for iterating on this pull request.')
 

	
 
                    # TODO: handle branch heads that not are tip-most
 
                    brevs = org_scm_instance._repo.revs('%s - %ld - %s', c.cs_branch_name, avail_revs, revs[0])
 
                    if brevs:
 
                        # also show changesets that are on branch but neither ancestors nor descendants
 
                        show.update(org_scm_instance._repo.revs('::%ld - ::%ld - ::%s', brevs, avail_revs, c.a_branch_name))
 
                        show.add(revs[0]) # make sure graph shows this so we can see how they relate
 
                        c.update_msg_other = _('Note: Branch %s has another head: %s.') % (c.cs_branch_name,
 
                            h.short_id(org_scm_instance.get_changeset((max(brevs))).raw_id))
 

	
 
                    avail_show = sorted(show, reverse=True)
 

	
 
            elif org_scm_instance.alias == 'git':
 
                c.cs_repo.scm_instance.get_changeset(c.cs_rev) # check it exists - raise ChangesetDoesNotExistError if not
 
                c.update_msg = _("Git pull requests don't support iterating yet.")
 
        except ChangesetDoesNotExistError:
 
            c.update_msg = _('Error: some changesets not found when displaying pull request from %s.') % c.cs_rev
 

	
 
        c.avail_revs = avail_revs
 
        c.avail_cs = [org_scm_instance.get_changeset(r) for r in avail_show]
 
        c.avail_jsdata = graph_data(org_scm_instance, avail_show)
 

	
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        fulldiff = request.GET.get('fulldiff')
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        # we swap org/other ref since we run a simple diff on one repo
 
        log.debug('running diff between %s and %s in %s',
 
                  c.a_rev, c.cs_rev, org_scm_instance.path)
 
        try:
 
            txtdiff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
            raw_diff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
                                                ignore_whitespace=ignore_whitespace,
 
                                                context=line_context)
 
        except ChangesetDoesNotExistError:
 
            txtdiff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
            raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 

	
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=True,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        # inline comments
 
        c.inline_cnt = 0
 
        c.inline_comments = cc_model.get_inline_comments(
 
                                c.db_repo.repo_id,
 
                                pull_request=pull_request_id)
 
        # count inline comments
 
        for __, lines in c.inline_comments:
 
            for comments in lines.values():
 
                c.inline_cnt += len(comments)
 
        # comments
 
        c.comments = cc_model.get_comments(c.db_repo.repo_id, pull_request=pull_request_id)
 

	
 
        # (badly named) pull-request status calculation based on reviewer votes
 
        (c.pull_request_reviewers,
 
         c.pull_request_pending_reviewers,
 
         c.current_voting_result,
 
         ) = cs_model.calculate_pull_request_result(c.pull_request)
 
        c.changeset_statuses = ChangesetStatus.STATUSES
 

	
 
        c.is_ajax_preview = False
 
        c.ancestors = None # [c.a_rev] ... but that is shown in an other way
 
        return render('/pullrequests/pullrequest_show.html')
 

	
 
    @LoginRequired()
 
    @NotAnonymous()
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def comment(self, repo_name, pull_request_id):
 
        pull_request = PullRequest.get_or_404(pull_request_id)
 

	
 
        status = request.POST.get('changeset_status')
 
        close_pr = request.POST.get('save_close')
 
        delete = request.POST.get('save_delete')
 
        f_path = request.POST.get('f_path')
 
        line_no = request.POST.get('line')
 

	
 
        if (status or close_pr or delete) and (f_path or line_no):
 
            # status votes and closing is only possible in general comments
 
            raise HTTPBadRequest()
kallithea/lib/diffs.py
Show inline comments
 
@@ -27,103 +27,102 @@ Original author and date, and relevant c
 
"""
 
import re
 
import difflib
 
import logging
 

	
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def wrap_to_table(html):
 
    """Given a string with html, return it wrapped in a table, similar to what
 
    DiffProcessor returns."""
 
    return '''\
 
              <table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % html
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    Returns a file diff wrapped into a table.
 
    Checks for diff_limit and presents a message if the diff is too big.
 
    """
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    a_path = filenode_old.path # default, might be overriden by actual rename in diff
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 

	
 
    elif diff_limit != -1 and (
 
            diff_limit is None or
 
            (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
        raw_diff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff)
 
        _parsed = diff_processor.prepare()
 
        if _parsed: # there should be exactly one element, for the specified file
 
            f = _parsed[0]
 
        diff_processor = DiffProcessor(raw_diff)
 
        if diff_processor.parsed: # there should be exactly one element, for the specified file
 
            f = diff_processor.parsed[0]
 
            op = f['operation']
 
            a_path = f['old_filename']
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 

	
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 

	
 
    if not diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return cs1, cs2, a_path, diff, stats, op
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 
@@ -158,201 +157,199 @@ class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _diff_git_re = re.compile('^diff --git', re.MULTILINE)
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
           ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    # Used for inline highlighter word split, must match the substitutions in _escaper
 
    _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None):
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _escaper(self, string):
 
        """
 
        Do HTML escaping/markup and check the diff limit
 
        """
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return '<u class="cr"></u>'
 
            if groups[5]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _highlight_inline_diff(self, old, new):
 
        """
 
        Highlight simple add/remove in two lines given as info dicts. They are
 
        modified in place and given markup with <del>/<ins>.
 
        """
 
        assert old['action'] == 'del'
 
        assert new['action'] == 'add'
 

	
 
        oldwords = self._token_re.split(old['line'])
 
        newwords = self._token_re.split(new['line'])
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
        oldfragments, newfragments = [], []
 
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
            oldfrag = ''.join(oldwords[i1:i2])
 
            newfrag = ''.join(newwords[j1:j2])
 
            if tag != 'equal':
 
                if oldfrag:
 
                    oldfrag = '<del>%s</del>' % oldfrag
 
                if newfrag:
 
                    newfrag = '<ins>%s</ins>' % newfrag
 
            oldfragments.append(oldfrag)
 
            newfragments.append(newfrag)
 

	
 
        old['line'] = "".join(oldfragments)
 
        new['line'] = "".join(newfragments)
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
        1. A dict with meta info:
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        2. An iterator yielding lines with simple HTML markup.
 
        """
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        meta_info = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, diff_lines
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        If diff is truncated, wrap it in LimitedDiffContainer.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 
        diff_container = lambda arg: arg
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'D'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'A'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'R'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'M'
 
                    stats['binary'] = True
 
@@ -530,129 +527,117 @@ class DiffProcessor(object):
 
                            'action':       action,
 
                            'line':         line[1:],
 
                        })
 

	
 
                    line = diff_lines.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line,
 
                        })
 
                        line = diff_lines.next()
 
                if old_line > old_end:
 
                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, added, deleted
 

	
 
    def _safe_id(self, idstring):
 
        """Make a string safe for including in an id attribute.
 

	
 
        The HTML spec says that id attributes 'must begin with
 
        a letter ([A-Za-z]) and may be followed by any number
 
        of letters, digits ([0-9]), hyphens ("-"), underscores
 
        ("_"), colons (":"), and periods (".")'. These regexps
 
        are slightly over-zealous, in that they remove colons
 
        and periods unnecessarily.
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'll return a list
 
        of dicts with diff information
 
        """
 
        parsed = self._parse_gitdiff(inline_diff=inline_diff)
 
        self.parsed = True
 
        self.parsed_diff = parsed
 
        return parsed
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                old_lineno_class='lineno old', new_lineno_class='lineno new',
 
                no_lineno_class='lineno',
 
                code_class='code', enable_comments=False, parsed_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if not self.parsed:
 
            self.prepare()
 

	
 
        diff_lines = self.parsed_diff
 
        diff_lines = self.parsed
 
        if parsed_lines:
 
            diff_lines = parsed_lines
 

	
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 

	
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
 
                for change in line:
 
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                        'lc': line_class,
 
                        'action': change['action']
 
                    })
 
                    anchor_old_id = ''
 
                    anchor_new_id = ''
 
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['old_lineno']
 
                    }
 
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['new_lineno']
 
                    }
 
                    cond_old = (change['old_lineno'] != '...' and
 
                                change['old_lineno'])
 
                    cond_new = (change['new_lineno'] != '...' and
 
                                change['new_lineno'])
 
                    no_lineno = (change['old_lineno'] == '...' and
 
                                 change['new_lineno'] == '...')
 
                    if cond_old:
 
                        anchor_old_id = 'id="%s"' % anchor_old
 
                    if cond_new:
 
                        anchor_new_id = 'id="%s"' % anchor_new
 
                    ###########################################################
 
                    # OLD LINE NUMBER
 
                    ###########################################################
 
                    _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
 
                        'a_id': anchor_old_id,
 
                        'olc': no_lineno_class if no_lineno else old_lineno_class,
 
                        'colspan': 'colspan="2"' if no_lineno else ''
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(not no_lineno, change['old_lineno'],
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -230,88 +230,86 @@ DIFF_FIXTURES = {
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'M',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'M',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_rename_space_cr.diff': [
 
        ('oh yes', 'R',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
}
 

	
 

	
 
class TestDiffLib(TestController):
 

	
 
    @parametrize('diff_fixture', DIFF_FIXTURES)
 
    def test_diff(self, diff_fixture):
 
        diff = fixture.load_resource(diff_fixture, strip=False)
 
        raw_diff = fixture.load_resource(diff_fixture, strip=False)
 
        vcs = 'hg'
 
        if diff_fixture.startswith('git_'):
 
            vcs = 'git'
 
        diff_proc = DiffProcessor(diff, vcs=vcs)
 
        diff_proc_d = diff_proc.prepare()
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
 
        diff_processor = DiffProcessor(raw_diff, vcs=vcs)
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        assert expected_data == data
 

	
 
    def test_diff_markup(self):
 
        diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        chunks = diff_proc_d[0]['chunks']
 
        raw_diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_processor = DiffProcessor(raw_diff)
 
        chunks = diff_processor.parsed[0]['chunks']
 
        assert not chunks[0]
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        print s
 
        assert s == r'''
 
context ... ... u'@@ -51,6 +51,13 @@\n'
 
unmod    51  51 u'<u>\t</u>begin();\n'
 
unmod    52  52 u'<u>\t</u>\n'
 
add      53     u'<u>\t</u>int foo;<u class="cr"></u>\n'
 
add      54     u'<u>\t</u>int bar; <u class="cr"></u>\n'
 
add      55     u'<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
 
add      56     u'<u>\t</u>int space; <i></i>'
 
add      57     u'<u>\t</u>int tab;<u>\t</u>\n'
 
add      58     u'<u>\t</u>\n'
 
unmod    59  53 u' <i></i>'
 
del          54 u'<u>\t</u>#define MAX_STEPS (48)\n'
 
add      60     u'<u>\t</u><u class="cr"></u>\n'
 
add      61     u'<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
 
unmod    62  55 u'\n'
 
del          56 u'<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
 
add      63     u'<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
 
'''
0 comments (0 inline, 0 general)