Changeset - 24a9bec8138c
[Not reviewed]
default
0 6 0
Mads Kiilerich - 8 years ago 2017-10-03 00:14:40
mads@kiilerich.com
diffs: inline prepare() into __init__ and make the result available as .parsed

Make it more clear what the DiffProcessor is: Something that works on a raw
diff as input, mainly compute when initialized, and returns an object where the
result is available in different ways.
6 files changed with 29 insertions and 49 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/changeset.py
Show inline comments
 
@@ -186,197 +186,196 @@ def create_comment(text, status, f_path,
 
        status_change=ChangesetStatus.get_status_lbl(status) if status else None,
 
        closing_pr=closing_pr,
 
    )
 

	
 
    return comment
 

	
 

	
 
class ChangesetController(BaseRepoController):
 

	
 
    def _before(self, *args, **kwargs):
 
        super(ChangesetController, self)._before(*args, **kwargs)
 
        c.affected_files_cut_off = 60
 

	
 
    def __load_data(self):
 
        repo_model = RepoModel()
 
        c.users_array = repo_model.get_users_js()
 
        c.user_groups_array = repo_model.get_user_groups_js()
 

	
 
    def _index(self, revision, method):
 
        c.pull_request = None
 
        c.anchor_url = anchor_url
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        c.fulldiff = request.GET.get('fulldiff') # for reporting number of changed files
 
        # get ranges of revisions if preset
 
        rev_range = revision.split('...')[:2]
 
        enable_comments = True
 
        c.cs_repo = c.db_repo
 
        try:
 
            if len(rev_range) == 2:
 
                enable_comments = False
 
                rev_start = rev_range[0]
 
                rev_end = rev_range[1]
 
                rev_ranges = c.db_repo_scm_instance.get_changesets(start=rev_start,
 
                                                             end=rev_end)
 
            else:
 
                rev_ranges = [c.db_repo_scm_instance.get_changeset(revision)]
 

	
 
            c.cs_ranges = list(rev_ranges)
 
            if not c.cs_ranges:
 
                raise RepositoryError('Changeset range returned empty result')
 

	
 
        except (ChangesetDoesNotExistError, EmptyRepositoryError):
 
            log.debug(traceback.format_exc())
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 

	
 
        c.changes = OrderedDict()
 

	
 
        c.lines_added = 0  # count of lines added
 
        c.lines_deleted = 0  # count of lines removes
 

	
 
        c.changeset_statuses = ChangesetStatus.STATUSES
 
        comments = dict()
 
        c.statuses = []
 
        c.inline_comments = []
 
        c.inline_cnt = 0
 

	
 
        # Iterate over ranges (default changeset view is always one changeset)
 
        for changeset in c.cs_ranges:
 
            if method == 'show':
 
                c.statuses.extend([ChangesetStatusModel().get_status(
 
                            c.db_repo.repo_id, changeset.raw_id)])
 

	
 
                # Changeset comments
 
                comments.update((com.comment_id, com)
 
                                for com in ChangesetCommentsModel()
 
                                .get_comments(c.db_repo.repo_id,
 
                                              revision=changeset.raw_id))
 

	
 
                # Status change comments - mostly from pull requests
 
                comments.update((st.comment_id, st.comment)
 
                                for st in ChangesetStatusModel()
 
                                .get_statuses(c.db_repo.repo_id,
 
                                              changeset.raw_id, with_revisions=True)
 
                                if st.comment_id is not None)
 

	
 
                inlines = ChangesetCommentsModel() \
 
                            .get_inline_comments(c.db_repo.repo_id,
 
                                                 revision=changeset.raw_id)
 
                c.inline_comments.extend(inlines)
 

	
 
            cs2 = changeset.raw_id
 
            cs1 = changeset.parents[0].raw_id if changeset.parents else EmptyChangeset().raw_id
 
            context_lcl = get_line_ctx('', request.GET)
 
            ign_whitespace_lcl = get_ignore_ws('', request.GET)
 

	
 
            raw_diff = c.db_repo_scm_instance.get_diff(cs1, cs2,
 
                ignore_whitespace=ign_whitespace_lcl, context=context_lcl)
 
            diff_limit = None if c.fulldiff else self.cut_off_limit
 
            file_diff_data = []
 
            if method == 'show':
 
                diff_processor = diffs.DiffProcessor(raw_diff,
 
                                                     vcs=c.db_repo_scm_instance.alias,
 
                                                     diff_limit=diff_limit)
 
                _parsed = diff_processor.prepare()
 
                c.limited_diff = False
 
                if isinstance(_parsed, LimitedDiffContainer):
 
                if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
                    c.limited_diff = True
 
                for f in _parsed:
 
                for f in diff_processor.parsed:
 
                    st = f['stats']
 
                    c.lines_added += st['added']
 
                    c.lines_deleted += st['deleted']
 
                    filename = f['filename']
 
                    fid = h.FID(changeset.raw_id, filename)
 
                    url_fid = h.FID('', filename)
 
                    diff = diff_processor.as_html(enable_comments=enable_comments,
 
                                                  parsed_lines=[f])
 
                    file_diff_data.append((fid, url_fid, f['operation'], f['old_filename'], filename, diff, st))
 
            else:
 
                # downloads/raw we only need RAW diff nothing else
 
                file_diff_data.append(('', None, None, None, raw_diff, None))
 
            c.changes[changeset.raw_id] = (cs1, cs2, file_diff_data)
 

	
 
        # sort comments in creation order
 
        c.comments = [com for com_id, com in sorted(comments.items())]
 

	
 
        # count inline comments
 
        for __, lines in c.inline_comments:
 
            for comments in lines.values():
 
                c.inline_cnt += len(comments)
 

	
 
        if len(c.cs_ranges) == 1:
 
            c.changeset = c.cs_ranges[0]
 
            c.parent_tmpl = ''.join(['# Parent  %s\n' % x.raw_id
 
                                     for x in c.changeset.parents])
 
        if method == 'download':
 
            response.content_type = 'text/plain'
 
            response.content_disposition = 'attachment; filename=%s.diff' \
 
                                            % revision[:12]
 
            return raw_diff
 
        elif method == 'patch':
 
            response.content_type = 'text/plain'
 
            c.diff = safe_unicode(raw_diff)
 
            return render('changeset/patch_changeset.html')
 
        elif method == 'raw':
 
            response.content_type = 'text/plain'
 
            return raw_diff
 
        elif method == 'show':
 
            self.__load_data()
 
            if len(c.cs_ranges) == 1:
 
                return render('changeset/changeset.html')
 
            else:
 
                c.cs_ranges_org = None
 
                c.cs_comments = {}
 
                revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
                c.jsdata = graph_data(c.db_repo_scm_instance, revs)
 
                return render('changeset/changeset_range.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    def index(self, revision, method='show'):
 
        return self._index(revision, method=method)
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    def changeset_raw(self, revision):
 
        return self._index(revision, method='raw')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    def changeset_patch(self, revision):
 
        return self._index(revision, method='patch')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    def changeset_download(self, revision):
 
        return self._index(revision, method='download')
 

	
 
    @LoginRequired()
 
    @NotAnonymous()
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def comment(self, repo_name, revision):
 
        assert request.environ.get('HTTP_X_PARTIAL_XHR')
 

	
 
        status = request.POST.get('changeset_status')
 
        text = request.POST.get('text', '').strip()
 

	
 
        c.comment = create_comment(
 
            text,
 
            status,
 
            revision=revision,
 
            f_path=request.POST.get('f_path'),
 
            line_no=request.POST.get('line'),
 
        )
 

	
 
        # get status if set !
 
        if status:
 
            # if latest status was from pull request and it's closed
 
            # disallow changing status ! RLY?
 
            try:
 
                ChangesetStatusModel().set_status(
 
                    c.db_repo.repo_id,
 
                    status,
 
                    request.authuser.user_id,
kallithea/controllers/compare.py
Show inline comments
 
@@ -174,121 +174,120 @@ class CompareController(BaseRepoControll
 
        return render('compare/compare_diff.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionLevelDecorator('read')
 
    def compare(self, repo_name, org_ref_type, org_ref_name, other_ref_type, other_ref_name):
 
        org_ref_name = org_ref_name.strip()
 
        other_ref_name = other_ref_name.strip()
 

	
 
        # If merge is True:
 
        #   Show what org would get if merged with other:
 
        #   List changesets that are ancestors of other but not of org.
 
        #   New changesets in org is thus ignored.
 
        #   Diff will be from common ancestor, and merges of org to other will thus be ignored.
 
        # If merge is False:
 
        #   Make a raw diff from org to other, no matter if related or not.
 
        #   Changesets in one and not in the other will be ignored
 
        merge = bool(request.GET.get('merge'))
 
        # fulldiff disables cut_off_limit
 
        fulldiff = request.GET.get('fulldiff')
 
        # partial uses compare_cs.html template directly
 
        partial = request.environ.get('HTTP_X_PARTIAL_XHR')
 
        # is_ajax_preview puts hidden input field with changeset revisions
 
        c.is_ajax_preview = partial and request.GET.get('is_ajax_preview')
 
        # swap url for compare_diff page - never partial and never is_ajax_preview
 
        c.swap_url = h.url('compare_url',
 
            repo_name=c.cs_repo.repo_name,
 
            org_ref_type=other_ref_type, org_ref_name=other_ref_name,
 
            other_repo=c.a_repo.repo_name,
 
            other_ref_type=org_ref_type, other_ref_name=org_ref_name,
 
            merge=merge or '')
 

	
 
        # set callbacks for generating markup for icons
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 

	
 
        c.a_rev = self._get_ref_rev(c.a_repo, org_ref_type, org_ref_name,
 
            returnempty=True)
 
        c.cs_rev = self._get_ref_rev(c.cs_repo, other_ref_type, other_ref_name)
 

	
 
        c.compare_home = False
 
        c.a_ref_name = org_ref_name
 
        c.a_ref_type = org_ref_type
 
        c.cs_ref_name = other_ref_name
 
        c.cs_ref_type = other_ref_type
 

	
 
        c.cs_ranges, c.cs_ranges_org, c.ancestors = self._get_changesets(
 
            c.a_repo.scm_instance.alias, c.a_repo.scm_instance, c.a_rev,
 
            c.cs_repo.scm_instance, c.cs_rev)
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
        c.jsdata = graph_data(c.cs_repo.scm_instance, revs)
 

	
 
        if partial:
 
            return render('compare/compare_cs.html')
 

	
 
        org_repo = c.a_repo
 
        other_repo = c.cs_repo
 

	
 
        if merge:
 
            rev1 = msg = None
 
            if not c.cs_ranges:
 
                msg = _('Cannot show empty diff')
 
            elif not c.ancestors:
 
                msg = _('No ancestor found for merge diff')
 
            elif len(c.ancestors) == 1:
 
                rev1 = c.ancestors[0]
 
            else:
 
                msg = _('Multiple merge ancestors found for merge compare')
 
            if rev1 is None:
 
                h.flash(msg, category='error')
 
                log.error(msg)
 
                raise HTTPNotFound
 

	
 
            # case we want a simple diff without incoming changesets,
 
            # previewing what will be merged.
 
            # Make the diff on the other repo (which is known to have other_rev)
 
            log.debug('Using ancestor %s as rev1 instead of %s',
 
                      rev1, c.a_rev)
 
            org_repo = other_repo
 
        else: # comparing tips, not necessarily linearly related
 
            if org_repo != other_repo:
 
                # TODO: we could do this by using hg unionrepo
 
                log.error('cannot compare across repos %s and %s', org_repo, other_repo)
 
                h.flash(_('Cannot compare repositories without using common ancestor'), category='error')
 
                raise HTTPBadRequest
 
            rev1 = c.a_rev
 

	
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        log.debug('running diff between %s and %s in %s',
 
                  rev1, c.cs_rev, org_repo.scm_instance.path)
 
        txtdiff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
        raw_diff = org_repo.scm_instance.get_diff(rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=False,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        return render('compare/compare_diff.html')
kallithea/controllers/feed.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.feed
 
~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Feed controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 23, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 

	
 
from tg import response, tmpl_context as c
 
from tg.i18n import ugettext as _
 

	
 
from beaker.cache import cache_region, region_invalidate
 
from webhelpers.feedgenerator import Atom1Feed, Rss201rev2Feed
 

	
 
from kallithea import CONFIG
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import LoginRequired, HasRepoPermissionLevelDecorator
 
from kallithea.lib.base import BaseRepoController
 
from kallithea.lib.diffs import DiffProcessor, LimitedDiffContainer
 
from kallithea.model.db import CacheInvalidation
 
from kallithea.lib.utils2 import safe_int, str2bool, safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
language = 'en-us'
 
ttl = "5"
 

	
 

	
 
class FeedController(BaseRepoController):
 

	
 
    @LoginRequired(api_access=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def _before(self, *args, **kwargs):
 
        super(FeedController, self)._before(*args, **kwargs)
 

	
 
    def _get_title(self, cs):
 
        return h.shorter(cs.message, 160)
 

	
 
    def __get_desc(self, cs):
 
        desc_msg = [(_('%s committed on %s')
 
                     % (h.person(cs.author), h.fmt_date(cs.date))) + '<br/>']
 
        # branches, tags, bookmarks
 
        if cs.branch:
 
            desc_msg.append('branch: %s<br/>' % cs.branch)
 
        for book in cs.bookmarks:
 
            desc_msg.append('bookmark: %s<br/>' % book)
 
        for tag in cs.tags:
 
            desc_msg.append('tag: %s<br/>' % tag)
 

	
 
        changes = []
 
        diff_limit = safe_int(CONFIG.get('rss_cut_off_limit', 32 * 1024))
 
        raw_diff = cs.diff()
 
        diff_processor = DiffProcessor(raw_diff,
 
                                       diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare(inline_diff=False)
 
                                       diff_limit=diff_limit,
 
                                       inline_diff=False)
 
        limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            limited_diff = True
 

	
 
        for st in _parsed:
 
        for st in diff_processor.parsed:
 
            st.update({'added': st['stats']['added'],
 
                       'removed': st['stats']['deleted']})
 
            changes.append('\n %(operation)s %(filename)s '
 
                           '(%(added)s lines added, %(removed)s lines removed)'
 
                            % st)
 
        if limited_diff:
 
            changes = changes + ['\n ' +
 
                                 _('Changeset was too big and was cut off...')]
 

	
 
        # rev link
 
        _url = h.canonical_url('changeset_home', repo_name=c.db_repo.repo_name,
 
                   revision=cs.raw_id)
 
        desc_msg.append('changeset: <a href="%s">%s</a>' % (_url, cs.raw_id[:8]))
 

	
 
        desc_msg.append('<pre>')
 
        desc_msg.append(h.urlify_text(cs.message))
 
        desc_msg.append('\n')
 
        desc_msg.extend(changes)
 
        if str2bool(CONFIG.get('rss_include_diff', False)):
 
            desc_msg.append('\n\n')
 
            desc_msg.append(raw_diff)
 
        desc_msg.append('</pre>')
 
        return map(safe_unicode, desc_msg)
 

	
 
    def atom(self, repo_name):
 
        """Produce an atom-1.0 feed via feedgenerator module"""
 

	
 
        @cache_region('long_term', '_get_feed_from_cache')
 
        def _get_feed_from_cache(key, kind):
 
            feed = Atom1Feed(
 
                title=_('%s %s feed') % (c.site_name, repo_name),
 
                link=h.canonical_url('summary_home', repo_name=repo_name),
 
                description=_('Changes on %s repository') % repo_name,
 
                language=language,
 
                ttl=ttl
 
            )
 

	
 
            rss_items_per_page = safe_int(CONFIG.get('rss_items_per_page', 20))
 
            for cs in reversed(list(c.db_repo_scm_instance[-rss_items_per_page:])):
 
                feed.add_item(title=self._get_title(cs),
 
                              link=h.canonical_url('changeset_home', repo_name=repo_name,
 
                                       revision=cs.raw_id),
 
                              author_name=cs.author,
 
                              description=''.join(self.__get_desc(cs)),
 
                              pubdate=cs.date,
 
                              )
 

	
 
            response.content_type = feed.mime_type
 
            return feed.writeString('utf-8')
 

	
 
        kind = 'ATOM'
 
        valid = CacheInvalidation.test_and_set_valid(repo_name, kind)
 
        if not valid:
 
            region_invalidate(_get_feed_from_cache, None, '_get_feed_from_cache', repo_name, kind)
 
        return _get_feed_from_cache(repo_name, kind)
 

	
 
    def rss(self, repo_name):
 
        """Produce an rss2 feed via feedgenerator module"""
 

	
 
        @cache_region('long_term', '_get_feed_from_cache')
 
        def _get_feed_from_cache(key, kind):
 
            feed = Rss201rev2Feed(
 
                title=_('%s %s feed') % (c.site_name, repo_name),
 
                link=h.canonical_url('summary_home', repo_name=repo_name),
 
                description=_('Changes on %s repository') % repo_name,
 
                language=language,
 
                ttl=ttl
 
            )
 

	
 
            rss_items_per_page = safe_int(CONFIG.get('rss_items_per_page', 20))
 
            for cs in reversed(list(c.db_repo_scm_instance[-rss_items_per_page:])):
 
                feed.add_item(title=self._get_title(cs),
 
                              link=h.canonical_url('changeset_home', repo_name=repo_name,
 
                                       revision=cs.raw_id),
 
                              author_name=cs.author,
 
                              description=''.join(self.__get_desc(cs)),
 
                              pubdate=cs.date,
 
                             )
 

	
 
            response.content_type = feed.mime_type
 
            return feed.writeString('utf-8')
 

	
 
        kind = 'RSS'
 
        valid = CacheInvalidation.test_and_set_valid(repo_name, kind)
 
        if not valid:
 
            region_invalidate(_get_feed_from_cache, None, '_get_feed_from_cache', repo_name, kind)
 
        return _get_feed_from_cache(repo_name, kind)
kallithea/controllers/pullrequests.py
Show inline comments
 
@@ -499,209 +499,208 @@ class PullrequestsController(BaseRepoCon
 
        try:
 
            if c.a_ref_type == 'rev': # this looks like a free range where target is ancestor
 
                cs_a = org_scm_instance.get_changeset(c.a_rev)
 
                root_parents = c.cs_ranges[0].parents
 
                c.is_range = cs_a in root_parents
 
                #c.merge_root = len(root_parents) > 1 # a range starting with a merge might deserve a warning
 
        except ChangesetDoesNotExistError: # probably because c.a_rev not found
 
            pass
 
        except IndexError: # probably because c.cs_ranges is empty, probably because revisions are missing
 
            pass
 

	
 
        avail_revs = set()
 
        avail_show = []
 
        c.cs_branch_name = c.cs_ref_name
 
        c.a_branch_name = None
 
        other_scm_instance = c.a_repo.scm_instance
 
        c.update_msg = ""
 
        c.update_msg_other = ""
 
        try:
 
            if not c.cs_ranges:
 
                c.update_msg = _('Error: changesets not found when displaying pull request from %s.') % c.cs_rev
 
            elif org_scm_instance.alias == 'hg' and c.a_ref_name != 'ancestor':
 
                if c.cs_ref_type != 'branch':
 
                    c.cs_branch_name = org_scm_instance.get_changeset(c.cs_ref_name).branch # use ref_type ?
 
                c.a_branch_name = c.a_ref_name
 
                if c.a_ref_type != 'branch':
 
                    try:
 
                        c.a_branch_name = other_scm_instance.get_changeset(c.a_ref_name).branch # use ref_type ?
 
                    except EmptyRepositoryError:
 
                        c.a_branch_name = 'null' # not a branch name ... but close enough
 
                # candidates: descendants of old head that are on the right branch
 
                #             and not are the old head itself ...
 
                #             and nothing at all if old head is a descendant of target ref name
 
                if not c.is_range and other_scm_instance._repo.revs('present(%s)::&%s', c.cs_ranges[-1].raw_id, c.a_branch_name):
 
                    c.update_msg = _('This pull request has already been merged to %s.') % c.a_branch_name
 
                elif c.pull_request.is_closed():
 
                    c.update_msg = _('This pull request has been closed and can not be updated.')
 
                else: # look for descendants of PR head on source branch in org repo
 
                    avail_revs = org_scm_instance._repo.revs('%s:: & branch(%s)',
 
                                                             revs[0], c.cs_branch_name)
 
                    if len(avail_revs) > 1: # more than just revs[0]
 
                        # also show changesets that not are descendants but would be merged in
 
                        targethead = other_scm_instance.get_changeset(c.a_branch_name).raw_id
 
                        if org_scm_instance.path != other_scm_instance.path:
 
                            # Note: org_scm_instance.path must come first so all
 
                            # valid revision numbers are 100% org_scm compatible
 
                            # - both for avail_revs and for revset results
 
                            hgrepo = unionrepo.unionrepository(org_scm_instance.baseui,
 
                                                               org_scm_instance.path,
 
                                                               other_scm_instance.path)
 
                        else:
 
                            hgrepo = org_scm_instance._repo
 
                        show = set(hgrepo.revs('::%ld & !::parents(%s) & !::%s',
 
                                               avail_revs, revs[0], targethead))
 
                        c.update_msg = _('The following additional changes are available on %s:') % c.cs_branch_name
 
                    else:
 
                        show = set()
 
                        avail_revs = set() # drop revs[0]
 
                        c.update_msg = _('No additional changesets found for iterating on this pull request.')
 

	
 
                    # TODO: handle branch heads that not are tip-most
 
                    brevs = org_scm_instance._repo.revs('%s - %ld - %s', c.cs_branch_name, avail_revs, revs[0])
 
                    if brevs:
 
                        # also show changesets that are on branch but neither ancestors nor descendants
 
                        show.update(org_scm_instance._repo.revs('::%ld - ::%ld - ::%s', brevs, avail_revs, c.a_branch_name))
 
                        show.add(revs[0]) # make sure graph shows this so we can see how they relate
 
                        c.update_msg_other = _('Note: Branch %s has another head: %s.') % (c.cs_branch_name,
 
                            h.short_id(org_scm_instance.get_changeset((max(brevs))).raw_id))
 

	
 
                    avail_show = sorted(show, reverse=True)
 

	
 
            elif org_scm_instance.alias == 'git':
 
                c.cs_repo.scm_instance.get_changeset(c.cs_rev) # check it exists - raise ChangesetDoesNotExistError if not
 
                c.update_msg = _("Git pull requests don't support iterating yet.")
 
        except ChangesetDoesNotExistError:
 
            c.update_msg = _('Error: some changesets not found when displaying pull request from %s.') % c.cs_rev
 

	
 
        c.avail_revs = avail_revs
 
        c.avail_cs = [org_scm_instance.get_changeset(r) for r in avail_show]
 
        c.avail_jsdata = graph_data(org_scm_instance, avail_show)
 

	
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        fulldiff = request.GET.get('fulldiff')
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        # we swap org/other ref since we run a simple diff on one repo
 
        log.debug('running diff between %s and %s in %s',
 
                  c.a_rev, c.cs_rev, org_scm_instance.path)
 
        try:
 
            txtdiff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
            raw_diff = org_scm_instance.get_diff(rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
                                                ignore_whitespace=ignore_whitespace,
 
                                                context=line_context)
 
        except ChangesetDoesNotExistError:
 
            txtdiff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(txtdiff or '', diff_limit=diff_limit)
 
        _parsed = diff_processor.prepare()
 
            raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 

	
 
        c.limited_diff = False
 
        if isinstance(_parsed, LimitedDiffContainer):
 
        if isinstance(diff_processor.parsed, LimitedDiffContainer):
 
            c.limited_diff = True
 

	
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 

	
 
        for f in _parsed:
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            diff = diff_processor.as_html(enable_comments=True,
 
                                          parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, diff, st))
 

	
 
        # inline comments
 
        c.inline_cnt = 0
 
        c.inline_comments = cc_model.get_inline_comments(
 
                                c.db_repo.repo_id,
 
                                pull_request=pull_request_id)
 
        # count inline comments
 
        for __, lines in c.inline_comments:
 
            for comments in lines.values():
 
                c.inline_cnt += len(comments)
 
        # comments
 
        c.comments = cc_model.get_comments(c.db_repo.repo_id, pull_request=pull_request_id)
 

	
 
        # (badly named) pull-request status calculation based on reviewer votes
 
        (c.pull_request_reviewers,
 
         c.pull_request_pending_reviewers,
 
         c.current_voting_result,
 
         ) = cs_model.calculate_pull_request_result(c.pull_request)
 
        c.changeset_statuses = ChangesetStatus.STATUSES
 

	
 
        c.is_ajax_preview = False
 
        c.ancestors = None # [c.a_rev] ... but that is shown in an other way
 
        return render('/pullrequests/pullrequest_show.html')
 

	
 
    @LoginRequired()
 
    @NotAnonymous()
 
    @HasRepoPermissionLevelDecorator('read')
 
    @jsonify
 
    def comment(self, repo_name, pull_request_id):
 
        pull_request = PullRequest.get_or_404(pull_request_id)
 

	
 
        status = request.POST.get('changeset_status')
 
        close_pr = request.POST.get('save_close')
 
        delete = request.POST.get('save_delete')
 
        f_path = request.POST.get('f_path')
 
        line_no = request.POST.get('line')
 

	
 
        if (status or close_pr or delete) and (f_path or line_no):
 
            # status votes and closing is only possible in general comments
 
            raise HTTPBadRequest()
 

	
 
        allowed_to_change_status = self._get_is_allowed_change_status(pull_request)
 
        if not allowed_to_change_status:
 
            if status or close_pr:
 
                h.flash(_('No permission to change pull request status'), 'error')
 
                raise HTTPForbidden()
 

	
 
        if delete == "delete":
 
            if (pull_request.owner_id == request.authuser.user_id or
 
                h.HasPermissionAny('hg.admin')() or
 
                h.HasRepoPermissionLevel('admin')(pull_request.org_repo.repo_name) or
 
                h.HasRepoPermissionLevel('admin')(pull_request.other_repo.repo_name)
 
                ) and not pull_request.is_closed():
 
                PullRequestModel().delete(pull_request)
 
                Session().commit()
 
                h.flash(_('Successfully deleted pull request %s') % pull_request_id,
 
                        category='success')
 
                return {
 
                   'location': url('my_pullrequests'), # or repo pr list?
 
                }
 
                raise HTTPFound(location=url('my_pullrequests')) # or repo pr list?
 
            raise HTTPForbidden()
 

	
 
        text = request.POST.get('text', '').strip()
 

	
 
        comment = create_comment(
 
            text,
 
            status,
 
            pull_request_id=pull_request_id,
 
            f_path=f_path,
 
            line_no=line_no,
 
            closing_pr=close_pr,
 
        )
 

	
 
        action_logger(request.authuser,
 
                      'user_commented_pull_request:%s' % pull_request_id,
 
                      c.db_repo, request.ip_addr)
 

	
 
        if status:
 
            ChangesetStatusModel().set_status(
 
                c.db_repo.repo_id,
 
                status,
 
                request.authuser.user_id,
 
                comment,
 
                pull_request=pull_request_id
 
            )
 

	
 
        if close_pr:
kallithea/lib/diffs.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.diffs
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Set of diffing helpers, previously part of vcs
 

	
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 4, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import re
 
import difflib
 
import logging
 

	
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def wrap_to_table(html):
 
    """Given a string with html, return it wrapped in a table, similar to what
 
    DiffProcessor returns."""
 
    return '''\
 
              <table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % html
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    Returns a file diff wrapped into a table.
 
    Checks for diff_limit and presents a message if the diff is too big.
 
    """
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    a_path = filenode_old.path # default, might be overriden by actual rename in diff
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 

	
 
    elif diff_limit != -1 and (
 
            diff_limit is None or
 
            (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
        raw_diff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff)
 
        _parsed = diff_processor.prepare()
 
        if _parsed: # there should be exactly one element, for the specified file
 
            f = _parsed[0]
 
        diff_processor = DiffProcessor(raw_diff)
 
        if diff_processor.parsed: # there should be exactly one element, for the specified file
 
            f = diff_processor.parsed[0]
 
            op = f['operation']
 
            a_path = f['old_filename']
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 

	
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 

	
 
    if not diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return cs1, cs2, a_path, diff, stats, op
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffLimitExceeded(Exception):
 
    pass
 

	
 

	
 
class LimitedDiffContainer(object):
 

	
 
    def __init__(self, diff_limit, cur_diff_size, diff):
 
        self.diff = diff
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = cur_diff_size
 

	
 
    def __iter__(self):
 
        for l in self.diff:
 
            yield l
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _diff_git_re = re.compile('^diff --git', re.MULTILINE)
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
           ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    # Used for inline highlighter word split, must match the substitutions in _escaper
 
    _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 
    def __init__(self, diff, vcs='hg', diff_limit=None):
 
    def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 
        self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 

	
 
    def _escaper(self, string):
 
        """
 
        Do HTML escaping/markup and check the diff limit
 
        """
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return '<u class="cr"></u>'
 
            if groups[5]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _highlight_inline_diff(self, old, new):
 
        """
 
        Highlight simple add/remove in two lines given as info dicts. They are
 
        modified in place and given markup with <del>/<ins>.
 
        """
 
        assert old['action'] == 'del'
 
        assert new['action'] == 'add'
 

	
 
        oldwords = self._token_re.split(old['line'])
 
        newwords = self._token_re.split(new['line'])
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
        oldfragments, newfragments = [], []
 
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
            oldfrag = ''.join(oldwords[i1:i2])
 
            newfrag = ''.join(newwords[j1:j2])
 
            if tag != 'equal':
 
                if oldfrag:
 
                    oldfrag = '<del>%s</del>' % oldfrag
 
                if newfrag:
 
                    newfrag = '<ins>%s</ins>' % newfrag
 
            oldfragments.append(oldfrag)
 
            newfragments.append(newfrag)
 

	
 
        old['line'] = "".join(oldfragments)
 
        new['line'] = "".join(newfragments)
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
        1. A dict with meta info:
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        2. An iterator yielding lines with simple HTML markup.
 
        """
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        meta_info = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return meta_info, diff_lines
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
    def _parse_gitdiff(self, inline_diff):
 
        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
 
        If diff is truncated, wrap it in LimitedDiffContainer.
 
        Optionally, do an extra pass and to extra markup of one-liner changes.
 
        """
 
        _files = [] # list of dicts with meta info and chunks
 
        diff_container = lambda arg: arg
 

	
 
        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
 
        starts.append(len(self._diff))
 

	
 
        for start, end in zip(starts, starts[1:]):
 
            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'D'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'A'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'R'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'A'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'D'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                try:
 
                    chunks, added, deleted = self._parse_lines(diff_lines)
 
                    stats['binary'] = False
 
                    stats['added'] = added
 
                    stats['deleted'] = deleted
 
                    # explicit mark that it's a modified file
 
                    if op == 'M':
 
                        stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
                except DiffLimitExceeded:
 
                    diff_container = lambda _diff: \
 
                        LimitedDiffContainer(self.diff_limit,
 
                                            self.cur_diff_size, _diff)
 
                    break
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'D' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
@@ -482,219 +479,207 @@ class DiffProcessor(object):
 
                (old_line, old_end,
 
                 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
                old_line -= 1
 
                new_line -= 1
 

	
 
                context = len(gr) == 5
 
                old_end += old_line
 
                new_end += new_line
 

	
 
                if context:
 
                    # skip context only if it's first line
 
                    if int(gr[0]) > 1:
 
                        lines.append({
 
                            'old_lineno': '...',
 
                            'new_lineno': '...',
 
                            'action':     'context',
 
                            'line':       line,
 
                        })
 

	
 
                line = diff_lines.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    if not line:
 
                        raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                    affects_old = affects_new = False
 

	
 
                    command = line[0]
 
                    if command == '+':
 
                        affects_new = True
 
                        action = 'add'
 
                        added += 1
 
                    elif command == '-':
 
                        affects_old = True
 
                        action = 'del'
 
                        deleted += 1
 
                    elif command == ' ':
 
                        affects_old = affects_new = True
 
                        action = 'unmod'
 
                    else:
 
                        raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         line[1:],
 
                        })
 

	
 
                    line = diff_lines.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         line,
 
                        })
 
                        line = diff_lines.next()
 
                if old_line > old_end:
 
                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, added, deleted
 

	
 
    def _safe_id(self, idstring):
 
        """Make a string safe for including in an id attribute.
 

	
 
        The HTML spec says that id attributes 'must begin with
 
        a letter ([A-Za-z]) and may be followed by any number
 
        of letters, digits ([0-9]), hyphens ("-"), underscores
 
        ("_"), colons (":"), and periods (".")'. These regexps
 
        are slightly over-zealous, in that they remove colons
 
        and periods unnecessarily.
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'll return a list
 
        of dicts with diff information
 
        """
 
        parsed = self._parse_gitdiff(inline_diff=inline_diff)
 
        self.parsed = True
 
        self.parsed_diff = parsed
 
        return parsed
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                old_lineno_class='lineno old', new_lineno_class='lineno new',
 
                no_lineno_class='lineno',
 
                code_class='code', enable_comments=False, parsed_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if not self.parsed:
 
            self.prepare()
 

	
 
        diff_lines = self.parsed_diff
 
        diff_lines = self.parsed
 
        if parsed_lines:
 
            diff_lines = parsed_lines
 

	
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 

	
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
 
                for change in line:
 
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                        'lc': line_class,
 
                        'action': change['action']
 
                    })
 
                    anchor_old_id = ''
 
                    anchor_new_id = ''
 
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['old_lineno']
 
                    }
 
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['new_lineno']
 
                    }
 
                    cond_old = (change['old_lineno'] != '...' and
 
                                change['old_lineno'])
 
                    cond_new = (change['new_lineno'] != '...' and
 
                                change['new_lineno'])
 
                    no_lineno = (change['old_lineno'] == '...' and
 
                                 change['new_lineno'] == '...')
 
                    if cond_old:
 
                        anchor_old_id = 'id="%s"' % anchor_old
 
                    if cond_new:
 
                        anchor_new_id = 'id="%s"' % anchor_new
 
                    ###########################################################
 
                    # OLD LINE NUMBER
 
                    ###########################################################
 
                    _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
 
                        'a_id': anchor_old_id,
 
                        'olc': no_lineno_class if no_lineno else old_lineno_class,
 
                        'colspan': 'colspan="2"' if no_lineno else ''
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(not no_lineno, change['old_lineno'],
 
                                            '#%s' % anchor_old)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # NEW LINE NUMBER
 
                    ###########################################################
 

	
 
                    if not no_lineno:
 
                        _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                            'a_id': anchor_new_id,
 
                            'nlc': new_lineno_class
 
                        })
 

	
 
                        _html.append('''%(link)s''' % {
 
                            'link': _link_to_if(True, change['new_lineno'],
 
                                                '#%s' % anchor_new)
 
                        })
 
                        _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # CODE
 
                    ###########################################################
 
                    comments = '' if enable_comments else 'no-comment'
 
                    _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                        'cc': code_class,
 
                        'inc': comments
 
                    })
 
                    _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
 
                        'code': change['line']
 
                    })
 

	
 
                    _html.append('''\t</td>''')
 
                    _html.append('''\n</tr>\n''')
 
        _html.append('''</table>''')
 
        if _html_empty:
 
            return None
 
        return ''.join(_html)
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -182,136 +182,134 @@ DIFF_FIXTURES = {
 
         {'added': 34,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('less/scaffolding.less', 'M',
 
         {'added': 1,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'M',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'diff_with_diff_data.diff': [
 
        ('vcs/backends/base.py', 'M',
 
         {'added': 18,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/git/repository.py', 'M',
 
         {'added': 46,
 
          'deleted': 15,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/hg.py', 'M',
 
         {'added': 22,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_git.py', 'M',
 
         {'added': 5,
 
          'deleted': 5,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_repository.py', 'M',
 
         {'added': 174,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_modify_binary_file.diff': [
 
        ('file.name', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'M',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'M',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'M',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_rename_space_cr.diff': [
 
        ('oh yes', 'R',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
}
 

	
 

	
 
class TestDiffLib(TestController):
 

	
 
    @parametrize('diff_fixture', DIFF_FIXTURES)
 
    def test_diff(self, diff_fixture):
 
        diff = fixture.load_resource(diff_fixture, strip=False)
 
        raw_diff = fixture.load_resource(diff_fixture, strip=False)
 
        vcs = 'hg'
 
        if diff_fixture.startswith('git_'):
 
            vcs = 'git'
 
        diff_proc = DiffProcessor(diff, vcs=vcs)
 
        diff_proc_d = diff_proc.prepare()
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_proc_d]
 
        diff_processor = DiffProcessor(raw_diff, vcs=vcs)
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        assert expected_data == data
 

	
 
    def test_diff_markup(self):
 
        diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_proc = DiffProcessor(diff)
 
        diff_proc_d = diff_proc.prepare()
 
        chunks = diff_proc_d[0]['chunks']
 
        raw_diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_processor = DiffProcessor(raw_diff)
 
        chunks = diff_processor.parsed[0]['chunks']
 
        assert not chunks[0]
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        print s
 
        assert s == r'''
 
context ... ... u'@@ -51,6 +51,13 @@\n'
 
unmod    51  51 u'<u>\t</u>begin();\n'
 
unmod    52  52 u'<u>\t</u>\n'
 
add      53     u'<u>\t</u>int foo;<u class="cr"></u>\n'
 
add      54     u'<u>\t</u>int bar; <u class="cr"></u>\n'
 
add      55     u'<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
 
add      56     u'<u>\t</u>int space; <i></i>'
 
add      57     u'<u>\t</u>int tab;<u>\t</u>\n'
 
add      58     u'<u>\t</u>\n'
 
unmod    59  53 u' <i></i>'
 
del          54 u'<u>\t</u>#define MAX_STEPS (48)\n'
 
add      60     u'<u>\t</u><u class="cr"></u>\n'
 
add      61     u'<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
 
unmod    62  55 u'\n'
 
del          56 u'<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
 
add      63     u'<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
 
'''
0 comments (0 inline, 0 general)