Changeset - 79676fef1ae0
[Not reviewed]
default
0 2 0
Mads Kiilerich - 9 years ago 2016-09-06 00:51:18
madski@unity3d.com
diff: show correct operation for file diffs instead of '???'

Fix issue seen on the url $repo/diff/$filename?diff2=hash2&diff1=hash2 .

Drop returning unused size from diffs.wrapped_diff and return the operation
instead.
2 files changed with 12 insertions and 8 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/files.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.files
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Files controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 21, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import os
 
import posixpath
 
import logging
 
import traceback
 
import tempfile
 
import shutil
 

	
 
from pylons import request, response, tmpl_context as c, url
 
from pylons.i18n.translation import _
 
from webob.exc import HTTPFound
 

	
 
from kallithea.lib.utils import jsonify, action_logger
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 

	
 
from kallithea.lib.compat import OrderedDict
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_str, \
 
    str2bool, safe_int
 
from kallithea.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
 
from kallithea.lib.base import BaseRepoController, render
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import RepositoryError, \
 
    ChangesetDoesNotExistError, EmptyRepositoryError, \
 
    ImproperArchiveTypeError, VCSError, NodeAlreadyExistsError, \
 
    NodeDoesNotExistError, ChangesetError, NodeError
 
from kallithea.lib.vcs.nodes import FileNode
 

	
 
from kallithea.model.repo import RepoModel
 
from kallithea.model.scm import ScmModel
 
from kallithea.model.db import Repository
 

	
 
from kallithea.controllers.changeset import anchor_url, _ignorews_url, \
 
    _context_url, get_line_ctx, get_ignore_ws
 
from webob.exc import HTTPNotFound
 
from kallithea.lib.exceptions import NonRelativePathError
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class FilesController(BaseRepoController):
 

	
 
    def __before__(self):
 
        super(FilesController, self).__before__()
 
        c.cut_off_limit = self.cut_off_limit
 

	
 
    def __get_cs(self, rev, silent_empty=False):
 
        """
 
        Safe way to get changeset if error occur it redirects to tip with
 
        proper message
 

	
 
        :param rev: revision to fetch
 
        :silent_empty: return None if repository is empty
 
        """
 

	
 
        try:
 
            return c.db_repo_scm_instance.get_changeset(rev)
 
        except EmptyRepositoryError as e:
 
            if silent_empty:
 
                return None
 
            url_ = url('files_add_home',
 
                       repo_name=c.repo_name,
 
                       revision=0, f_path='', anchor='edit')
 
            add_new = h.link_to(_('Click here to add new file'), url_, class_="alert-link")
 
            h.flash(h.literal(_('There are no files yet. %s') % add_new),
 
                    category='warning')
 
            raise HTTPNotFound()
 
        except (ChangesetDoesNotExistError, LookupError):
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        except RepositoryError as e:
 
            h.flash(safe_str(e), category='error')
 
            raise HTTPNotFound()
 

	
 
    def __get_filenode(self, cs, path):
 
        """
 
        Returns file_node or raise HTTP error.
 

	
 
        :param cs: given changeset
 
        :param path: path to lookup
 
        """
 

	
 
        try:
 
            file_node = cs.get_node(path)
 
            if file_node.is_dir():
 
                raise RepositoryError('given path is a directory')
 
        except ChangesetDoesNotExistError:
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        except RepositoryError as e:
 
            h.flash(safe_str(e), category='error')
 
            raise HTTPNotFound()
 

	
 
        return file_node
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def index(self, repo_name, revision, f_path, annotate=False):
 
        # redirect to given revision from form if given
 
        post_revision = request.POST.get('at_rev', None)
 
        if post_revision:
 
            cs = self.__get_cs(post_revision) # FIXME - unused!
 

	
 
        c.revision = revision
 
        c.changeset = self.__get_cs(revision)
 
        c.branch = request.GET.get('branch', None)
 
        c.f_path = f_path
 
        c.annotate = annotate
 
        cur_rev = c.changeset.revision
 
        c.fulldiff = request.GET.get('fulldiff')
 

	
 
        # prev link
 
        try:
 
            prev_rev = c.db_repo_scm_instance.get_changeset(cur_rev).prev(c.branch)
 
            c.url_prev = url('files_home', repo_name=c.repo_name,
 
                         revision=prev_rev.raw_id, f_path=f_path)
 
            if c.branch:
 
                c.url_prev += '?branch=%s' % c.branch
 
        except (ChangesetDoesNotExistError, VCSError):
 
            c.url_prev = '#'
 

	
 
        # next link
 
        try:
 
            next_rev = c.db_repo_scm_instance.get_changeset(cur_rev).next(c.branch)
 
            c.url_next = url('files_home', repo_name=c.repo_name,
 
                     revision=next_rev.raw_id, f_path=f_path)
 
            if c.branch:
 
                c.url_next += '?branch=%s' % c.branch
 
        except (ChangesetDoesNotExistError, VCSError):
 
            c.url_next = '#'
 

	
 
        # files or dirs
 
        try:
 
            c.file = c.changeset.get_node(f_path)
 

	
 
            if c.file.is_file():
 
                c.load_full_history = False
 
                #determine if we're on branch head
 
                _branches = c.db_repo_scm_instance.branches
 
                c.on_branch_head = revision in _branches.keys() + _branches.values()
 
                _hist = []
 
                c.file_history = []
 
                if c.load_full_history:
 
                    c.file_history, _hist = self._get_node_history(c.changeset, f_path)
 

	
 
                c.authors = []
 
                for a in set([x.author for x in _hist]):
 
                    c.authors.append((h.email(a), h.person(a)))
 
            else:
 
                c.authors = c.file_history = []
 
        except RepositoryError as e:
 
            h.flash(safe_str(e), category='error')
 
            raise HTTPNotFound()
 

	
 
        if request.environ.get('HTTP_X_PARTIAL_XHR'):
 
            return render('files/files_ypjax.html')
 

	
 
        # TODO: tags and bookmarks?
 
        c.revision_options = [(c.changeset.raw_id,
 
                              _('%s at %s') % (c.changeset.branch, h.short_id(c.changeset.raw_id)))] + \
 
            [(n, b) for b, n in c.db_repo_scm_instance.branches.items()]
 
        if c.db_repo_scm_instance.closed_branches:
 
            prefix = _('(closed)') + ' '
 
            c.revision_options += [('-', '-')] + \
 
                [(n, prefix + b) for b, n in c.db_repo_scm_instance.closed_branches.items()]
 

	
 
        return render('files/files.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    @jsonify
 
    def history(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        f_path = f_path
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 

	
 
            res = []
 
            for obj in file_history:
 
                res.append({
 
                    'text': obj[1],
 
                    'children': [{'id': o[0], 'text': o[1]} for o in obj[0]]
 
                })
 

	
 
            data = {
 
                'more': False,
 
                'results': res
 
            }
 
            return data
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def authors(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        f_path = f_path
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 
            c.authors = []
 
            for a in set([x.author for x in _hist]):
 
                c.authors.append((h.email(a), h.person(a)))
 
            return render('files/files_history_box.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def rawfile(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        response.content_disposition = 'attachment; filename=%s' % \
 
            safe_str(f_path.split(Repository.url_sep())[-1])
 

	
 
        response.content_type = file_node.mimetype
 
        return file_node.content
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def raw(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        raw_mimetype_mapping = {
 
            # map original mimetype to a mimetype used for "show as raw"
 
            # you can also provide a content-disposition to override the
 
            # default "attachment" disposition.
 
            # orig_type: (new_type, new_dispo)
 

	
 
            # show images inline:
 
            'image/x-icon': ('image/x-icon', 'inline'),
 
            'image/png': ('image/png', 'inline'),
 
            'image/gif': ('image/gif', 'inline'),
 
            'image/jpeg': ('image/jpeg', 'inline'),
 
            'image/svg+xml': ('image/svg+xml', 'inline'),
 
        }
 

	
 
        mimetype = file_node.mimetype
 
        try:
 
            mimetype, dispo = raw_mimetype_mapping[mimetype]
 
        except KeyError:
 
            # we don't know anything special about this, handle it safely
 
            if file_node.is_binary:
 
                # do same as download raw for binary files
 
                mimetype, dispo = 'application/octet-stream', 'attachment'
 
            else:
 
                # do not just use the original mimetype, but force text/plain,
 
                # otherwise it would serve text/html and that might be unsafe.
 
                # Note: underlying vcs library fakes text/plain mimetype if the
 
                # mimetype can not be determined and it thinks it is not
 
                # binary.This might lead to erroneous text display in some
 
                # cases, but helps in other cases, like with text files
 
                # without extension.
 
                mimetype, dispo = 'text/plain', 'inline'
 

	
 
        if dispo == 'attachment':
 
            dispo = 'attachment; filename=%s' % \
 
                        safe_str(f_path.split(os.sep)[-1])
 

	
 
        response.content_disposition = dispo
 
        response.content_type = mimetype
 
        return file_node.content
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
 
    def delete(self, repo_name, revision, f_path):
 
        repo = c.db_repo
 
        if repo.enable_locking and repo.locked[0]:
 
            h.flash(_('This repository has been locked by %s on %s')
 
                % (h.person_by_id(repo.locked[0]),
 
                   h.fmt_date(h.time_to_datetime(repo.locked[1]))),
 
                'warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip'))
 

	
 
        # check if revision is a branch identifier- basically we cannot
 
        # create multiple heads via file editing
 
        _branches = repo.scm_instance.branches
 
        # check if revision is a branch name or branch hash
 
        if revision not in _branches.keys() + _branches.values():
 
            h.flash(_('You can only delete files with revision '
 
                      'being a valid branch'), category='warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip',
 
                                  f_path=f_path))
 

	
 
        r_post = request.POST
 

	
 
        c.cs = self.__get_cs(revision)
 
        c.file = self.__get_filenode(c.cs, f_path)
 

	
 
        c.default_message = _('Deleted file %s via Kallithea') % (f_path)
 
        c.f_path = f_path
 
        node_path = f_path
 
        author = self.authuser.full_contact
 

	
 
        if r_post:
 
            message = r_post.get('message') or c.default_message
 

	
 
            try:
 
                nodes = {
 
                    node_path: {
 
                        'content': ''
 
                    }
 
                }
 
                self.scm_model.delete_nodes(
 
                    user=c.authuser.user_id, repo=c.db_repo,
 
                    message=message,
 
                    nodes=nodes,
 
                    parent_cs=c.cs,
 
                    author=author,
 
                )
 

	
 
                h.flash(_('Successfully deleted file %s') % f_path,
 
                        category='success')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                h.flash(_('Error occurred during commit'), category='error')
 
            raise HTTPFound(location=url('changeset_home',
 
                                repo_name=c.repo_name, revision='tip'))
 

	
 
        return render('files/files_delete.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
 
    def edit(self, repo_name, revision, f_path):
 
        repo = c.db_repo
 
        if repo.enable_locking and repo.locked[0]:
 
            h.flash(_('This repository has been locked by %s on %s')
 
                % (h.person_by_id(repo.locked[0]),
 
                   h.fmt_date(h.time_to_datetime(repo.locked[1]))),
 
                'warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip'))
 

	
 
        # check if revision is a branch identifier- basically we cannot
 
        # create multiple heads via file editing
 
        _branches = repo.scm_instance.branches
 
        # check if revision is a branch name or branch hash
 
        if revision not in _branches.keys() + _branches.values():
 
            h.flash(_('You can only edit files with revision '
 
                      'being a valid branch'), category='warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip',
 
                                  f_path=f_path))
 

	
 
        r_post = request.POST
 

	
 
        c.cs = self.__get_cs(revision)
 
        c.file = self.__get_filenode(c.cs, f_path)
 

	
 
        if c.file.is_binary:
 
            raise HTTPFound(location=url('files_home', repo_name=c.repo_name,
 
                            revision=c.cs.raw_id, f_path=f_path))
 
        c.default_message = _('Edited file %s via Kallithea') % (f_path)
 
        c.f_path = f_path
 

	
 
        if r_post:
 

	
 
            old_content = c.file.content
 
            sl = old_content.splitlines(1)
 
            first_line = sl[0] if sl else ''
 
            # modes:  0 - Unix, 1 - Mac, 2 - DOS
 
            mode = detect_mode(first_line, 0)
 
            content = convert_line_endings(r_post.get('content', ''), mode)
 

	
 
            message = r_post.get('message') or c.default_message
 
            author = self.authuser.full_contact
 

	
 
            if content == old_content:
 
                h.flash(_('No changes'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            try:
 
                self.scm_model.commit_change(repo=c.db_repo_scm_instance,
 
                                             repo_name=repo_name, cs=c.cs,
 
                                             user=self.authuser.user_id,
 
                                             author=author, message=message,
 
                                             content=content, f_path=f_path)
 
                h.flash(_('Successfully committed to %s') % f_path,
 
                        category='success')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                h.flash(_('Error occurred during commit'), category='error')
 
            raise HTTPFound(location=url('changeset_home',
 
                                repo_name=c.repo_name, revision='tip'))
 

	
 
        return render('files/files_edit.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
 
    def add(self, repo_name, revision, f_path):
 

	
 
        repo = Repository.get_by_repo_name(repo_name)
 
        if repo.enable_locking and repo.locked[0]:
 
            h.flash(_('This repository has been locked by %s on %s')
 
                % (h.person_by_id(repo.locked[0]),
 
                   h.fmt_date(h.time_to_datetime(repo.locked[1]))),
 
                  'warning')
 
            raise HTTPFound(location=h.url('files_home',
 
                                  repo_name=repo_name, revision='tip'))
 

	
 
        r_post = request.POST
 
        c.cs = self.__get_cs(revision, silent_empty=True)
 
        if c.cs is None:
 
            c.cs = EmptyChangeset(alias=c.db_repo_scm_instance.alias)
 
        c.default_message = (_('Added file via Kallithea'))
 
        c.f_path = f_path
 

	
 
        if r_post:
 
            unix_mode = 0
 
            content = convert_line_endings(r_post.get('content', ''), unix_mode)
 

	
 
            message = r_post.get('message') or c.default_message
 
            filename = r_post.get('filename')
 
            location = r_post.get('location', '')
 
            file_obj = r_post.get('upload_file', None)
 

	
 
            if file_obj is not None and hasattr(file_obj, 'filename'):
 
                filename = file_obj.filename
 
                content = file_obj.file
 

	
 
                if hasattr(content, 'file'):
 
                    # non posix systems store real file under file attr
 
                    content = content.file
 

	
 
            if not content:
 
                h.flash(_('No content'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            if not filename:
 
                h.flash(_('No filename'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            #strip all crap out of file, just leave the basename
 
            filename = os.path.basename(filename)
 
            node_path = posixpath.join(location, filename)
 
            author = self.authuser.full_contact
 

	
 
            try:
 
                nodes = {
 
                    node_path: {
 
                        'content': content
 
                    }
 
                }
 
                self.scm_model.create_nodes(
 
                    user=c.authuser.user_id, repo=c.db_repo,
 
                    message=message,
 
                    nodes=nodes,
 
                    parent_cs=c.cs,
 
                    author=author,
 
                )
 

	
 
                h.flash(_('Successfully committed to %s') % node_path,
 
                        category='success')
 
            except NonRelativePathError as e:
 
                h.flash(_('Location must be relative path and must not '
 
                          'contain .. in path'), category='warning')
 
                raise HTTPFound(location=url('changeset_home', repo_name=c.repo_name,
 
                                    revision='tip'))
 
            except (NodeError, NodeAlreadyExistsError) as e:
 
                h.flash(_(e), category='error')
 
            except Exception:
 
                log.error(traceback.format_exc())
 
                h.flash(_('Error occurred during commit'), category='error')
 
            raise HTTPFound(location=url('changeset_home',
 
                                repo_name=c.repo_name, revision='tip'))
 

	
 
        return render('files/files_add.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def archivefile(self, repo_name, fname):
 
        fileformat = None
 
        revision = None
 
        ext = None
 
        subrepos = request.GET.get('subrepos') == 'true'
 

	
 
        for a_type, ext_data in settings.ARCHIVE_SPECS.items():
 
            archive_spec = fname.split(ext_data[1])
 
            if len(archive_spec) == 2 and archive_spec[1] == '':
 
                fileformat = a_type or ext_data[1]
 
                revision = archive_spec[0]
 
                ext = ext_data[1]
 

	
 
        try:
 
            dbrepo = RepoModel().get_by_repo_name(repo_name)
 
            if not dbrepo.enable_downloads:
 
                return _('Downloads disabled') # TODO: do something else?
 

	
 
            if c.db_repo_scm_instance.alias == 'hg':
 
                # patch and reset hooks section of UI config to not run any
 
                # hooks on fetching archives with subrepos
 
                for k, v in c.db_repo_scm_instance._repo.ui.configitems('hooks'):
 
                    c.db_repo_scm_instance._repo.ui.setconfig('hooks', k, None)
 

	
 
            cs = c.db_repo_scm_instance.get_changeset(revision)
 
            content_type = settings.ARCHIVE_SPECS[fileformat][0]
 
        except ChangesetDoesNotExistError:
 
            return _('Unknown revision %s') % revision
 
        except EmptyRepositoryError:
 
            return _('Empty repository')
 
        except (ImproperArchiveTypeError, KeyError):
 
            return _('Unknown archive type')
 

	
 
        from kallithea import CONFIG
 
        rev_name = cs.raw_id[:12]
 
        archive_name = '%s-%s%s' % (safe_str(repo_name.replace('/', '_')),
 
                                    safe_str(rev_name), ext)
 

	
 
        archive_path = None
 
        cached_archive_path = None
 
        archive_cache_dir = CONFIG.get('archive_cache_dir')
 
        if archive_cache_dir and not subrepos: # TODO: subrepo caching?
 
            if not os.path.isdir(archive_cache_dir):
 
                os.makedirs(archive_cache_dir)
 
            cached_archive_path = os.path.join(archive_cache_dir, archive_name)
 
            if os.path.isfile(cached_archive_path):
 
                log.debug('Found cached archive in %s', cached_archive_path)
 
                archive_path = cached_archive_path
 
            else:
 
                log.debug('Archive %s is not yet cached', archive_name)
 

	
 
        if archive_path is None:
 
            # generate new archive
 
            fd, archive_path = tempfile.mkstemp()
 
            log.debug('Creating new temp archive in %s', archive_path)
 
            with os.fdopen(fd, 'wb') as stream:
 
                cs.fill_archive(stream=stream, kind=fileformat, subrepos=subrepos)
 
                # stream (and thus fd) has been closed by cs.fill_archive
 
            if cached_archive_path is not None:
 
                # we generated the archive - move it to cache
 
                log.debug('Storing new archive in %s', cached_archive_path)
 
                shutil.move(archive_path, cached_archive_path)
 
                archive_path = cached_archive_path
 

	
 
        def get_chunked_archive(archive_path):
 
            stream = open(archive_path, 'rb')
 
            while True:
 
                data = stream.read(16 * 1024)
 
                if not data:
 
                    break
 
                yield data
 
            stream.close()
 
            if archive_path != cached_archive_path:
 
                log.debug('Destroying temp archive %s', archive_path)
 
                os.remove(archive_path)
 

	
 
        action_logger(user=c.authuser,
 
                      action='user_downloaded_archive:%s' % (archive_name),
 
                      repo=repo_name, ipaddr=self.ip_addr, commit=True)
 

	
 
        response.content_disposition = str('attachment; filename=%s' % (archive_name))
 
        response.content_type = str(content_type)
 
        return get_chunked_archive(archive_path)
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def diff(self, repo_name, f_path):
 
        ignore_whitespace = request.GET.get('ignorews') == '1'
 
        line_context = safe_int(request.GET.get('context'), 3)
 
        diff2 = request.GET.get('diff2', '')
 
        diff1 = request.GET.get('diff1', '') or diff2
 
        c.action = request.GET.get('diff')
 
        c.no_changes = diff1 == diff2
 
        c.f_path = f_path
 
        c.big_diff = False
 
        c.anchor_url = anchor_url
 
        c.ignorews_url = _ignorews_url
 
        c.context_url = _context_url
 
        c.changes = OrderedDict()
 
        c.changes[diff2] = []
 

	
 
        #special case if we want a show rev only, it's impl here
 
        #to reduce JS and callbacks
 

	
 
        if request.GET.get('show_rev'):
 
            if str2bool(request.GET.get('annotate', 'False')):
 
                _url = url('files_annotate_home', repo_name=c.repo_name,
 
                           revision=diff1, f_path=c.f_path)
 
            else:
 
                _url = url('files_home', repo_name=c.repo_name,
 
                           revision=diff1, f_path=c.f_path)
 

	
 
            raise HTTPFound(location=_url)
 
        try:
 
            if diff1 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_1 = c.db_repo_scm_instance.get_changeset(diff1)
 
                try:
 
                    node1 = c.changeset_1.get_node(f_path)
 
                    if node1.is_dir():
 
                        raise NodeError('%s path is a %s not a file'
 
                                        % (node1, type(node1)))
 
                except NodeDoesNotExistError:
 
                    c.changeset_1 = EmptyChangeset(cs=diff1,
 
                                                   revision=c.changeset_1.revision,
 
                                                   repo=c.db_repo_scm_instance)
 
                    node1 = FileNode(f_path, '', changeset=c.changeset_1)
 
            else:
 
                c.changeset_1 = EmptyChangeset(repo=c.db_repo_scm_instance)
 
                node1 = FileNode(f_path, '', changeset=c.changeset_1)
 

	
 
            if diff2 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_2 = c.db_repo_scm_instance.get_changeset(diff2)
 
                try:
 
                    node2 = c.changeset_2.get_node(f_path)
 
                    if node2.is_dir():
 
                        raise NodeError('%s path is a %s not a file'
 
                                        % (node2, type(node2)))
 
                except NodeDoesNotExistError:
 
                    c.changeset_2 = EmptyChangeset(cs=diff2,
 
                                                   revision=c.changeset_2.revision,
 
                                                   repo=c.db_repo_scm_instance)
 
                    node2 = FileNode(f_path, '', changeset=c.changeset_2)
 
            else:
 
                c.changeset_2 = EmptyChangeset(repo=c.db_repo_scm_instance)
 
                node2 = FileNode(f_path, '', changeset=c.changeset_2)
 
        except (RepositoryError, NodeError):
 
            log.error(traceback.format_exc())
 
            raise HTTPFound(location=url('files_home', repo_name=c.repo_name,
 
                                f_path=f_path))
 

	
 
        if c.action == 'download':
 
            _diff = diffs.get_gitdiff(node1, node2,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 
            diff = diffs.DiffProcessor(_diff, format='gitdiff')
 

	
 
            diff_name = '%s_vs_%s.diff' % (diff1, diff2)
 
            response.content_type = 'text/plain'
 
            response.content_disposition = (
 
                'attachment; filename=%s' % diff_name
 
            )
 
            return diff.as_raw()
 

	
 
        elif c.action == 'raw':
 
            _diff = diffs.get_gitdiff(node1, node2,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 
            diff = diffs.DiffProcessor(_diff, format='gitdiff')
 
            response.content_type = 'text/plain'
 
            return diff.as_raw()
 

	
 
        else:
 
            fid = h.FID(diff2, node2.path)
 
            line_context_lcl = get_line_ctx(fid, request.GET)
 
            ign_whitespace_lcl = get_ignore_ws(fid, request.GET)
 

	
 
            lim = request.GET.get('fulldiff') or self.cut_off_limit
 
            _, cs1, cs2, diff, st = diffs.wrapped_diff(filenode_old=node1,
 
            cs1, cs2, op, diff, st = diffs.wrapped_diff(filenode_old=node1,
 
                                         filenode_new=node2,
 
                                         cut_off_limit=lim,
 
                                         ignore_whitespace=ign_whitespace_lcl,
 
                                         line_context=line_context_lcl,
 
                                         enable_comments=False)
 
            op = ''
 
            filename = node1.path
 
            cs_changes = {
 
                'fid': [cs1, cs2, op, filename, diff, st]
 
            }
 
            c.changes = cs_changes
 

	
 
        return render('files/file_diff.html')
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    def diff_2way(self, repo_name, f_path):
 
        diff1 = request.GET.get('diff1', '')
 
        diff2 = request.GET.get('diff2', '')
 
        try:
 
            if diff1 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_1 = c.db_repo_scm_instance.get_changeset(diff1)
 
                try:
 
                    node1 = c.changeset_1.get_node(f_path)
 
                    if node1.is_dir():
 
                        raise NodeError('%s path is a %s not a file'
 
                                        % (node1, type(node1)))
 
                except NodeDoesNotExistError:
 
                    c.changeset_1 = EmptyChangeset(cs=diff1,
 
                                                   revision=c.changeset_1.revision,
 
                                                   repo=c.db_repo_scm_instance)
 
                    node1 = FileNode(f_path, '', changeset=c.changeset_1)
 
            else:
 
                c.changeset_1 = EmptyChangeset(repo=c.db_repo_scm_instance)
 
                node1 = FileNode(f_path, '', changeset=c.changeset_1)
 

	
 
            if diff2 not in ['', None, 'None', '0' * 12, '0' * 40]:
 
                c.changeset_2 = c.db_repo_scm_instance.get_changeset(diff2)
 
                try:
 
                    node2 = c.changeset_2.get_node(f_path)
 
                    if node2.is_dir():
 
                        raise NodeError('%s path is a %s not a file'
 
                                        % (node2, type(node2)))
 
                except NodeDoesNotExistError:
 
                    c.changeset_2 = EmptyChangeset(cs=diff2,
 
                                                   revision=c.changeset_2.revision,
 
                                                   repo=c.db_repo_scm_instance)
 
                    node2 = FileNode(f_path, '', changeset=c.changeset_2)
 
            else:
 
                c.changeset_2 = EmptyChangeset(repo=c.db_repo_scm_instance)
 
                node2 = FileNode(f_path, '', changeset=c.changeset_2)
 
        except ChangesetDoesNotExistError as e:
 
            msg = _('Such revision does not exist for this repository')
 
            h.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        c.node1 = node1
 
        c.node2 = node2
 
        c.cs1 = c.changeset_1
 
        c.cs2 = c.changeset_2
 

	
 
        return render('files/diff_2way.html')
 

	
 
    def _get_node_history(self, cs, f_path, changesets=None):
 
        """
 
        get changesets history for given node
 

	
 
        :param cs: changeset to calculate history
 
        :param f_path: path for node to calculate history for
 
        :param changesets: if passed don't calculate history and take
 
            changesets defined in this list
 
        """
 
        # calculate history based on tip
 
        tip_cs = c.db_repo_scm_instance.get_changeset()
 
        if changesets is None:
 
            try:
 
                changesets = tip_cs.get_file_history(f_path)
 
            except (NodeDoesNotExistError, ChangesetError):
 
                #this node is not present at tip !
 
                changesets = cs.get_file_history(f_path)
 
        hist_l = []
 

	
 
        changesets_group = ([], _("Changesets"))
 
        branches_group = ([], _("Branches"))
 
        tags_group = ([], _("Tags"))
 
        for chs in changesets:
 
            #_branch = '(%s)' % chs.branch if (cs.repository.alias == 'hg') else ''
 
            _branch = chs.branch
 
            n_desc = '%s (%s)' % (h.show_id(chs), _branch)
 
            changesets_group[0].append((chs.raw_id, n_desc,))
 
        hist_l.append(changesets_group)
 

	
 
        for name, chs in c.db_repo_scm_instance.branches.items():
 
            branches_group[0].append((chs, name),)
 
        hist_l.append(branches_group)
 

	
 
        for name, chs in c.db_repo_scm_instance.tags.items():
 
            tags_group[0].append((chs, name),)
 
        hist_l.append(tags_group)
 

	
 
        return hist_l, changesets
 

	
 
    @LoginRequired()
 
    @HasRepoPermissionAnyDecorator('repository.read', 'repository.write',
 
                                   'repository.admin')
 
    @jsonify
 
    def nodelist(self, repo_name, revision, f_path):
 
        if request.environ.get('HTTP_X_PARTIAL_XHR'):
 
            cs = self.__get_cs(revision)
 
            _d, _f = ScmModel().get_nodes(repo_name, cs.raw_id, f_path,
 
                                          flat=False)
 
            return {'nodes': _d + _f}
kallithea/lib/diffs.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.lib.diffs
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Set of diffing helpers, previously part of vcs
 

	
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 4, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import re
 
import difflib
 
import logging
 

	
 
from itertools import tee, imap
 

	
 
from pylons.i18n.translation import _
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def wrap_to_table(str_):
 
    return '''<table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % str_
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    returns a wrapped diff into a table, checks for cut_off_limit and presents
 
    proper message
 
    """
 

	
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 
        size = 0
 

	
 
    elif cut_off_limit != -1 and (cut_off_limit is None or
 
    (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
 
    elif cut_off_limit != -1 and (
 
            cut_off_limit is None or
 
            (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):
 

	
 
        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')
 
        _parsed = diff_processor.prepare()
 
        if _parsed: # there should be exactly one element, for the specified file
 
            f = _parsed[0]
 
            op = f['operation']
 

	
 
        diff = diff_processor.as_html(enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 
        size = len(diff or '')
 

	
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 
        size = 0
 

	
 
    if not diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return size, cs1, cs2, diff, stats
 
    return cs1, cs2, op, diff, stats
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 

	
 
    :param ignore_whitespace: ignore whitespaces in diff
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
COPIED_FILENODE = 5
 
CHMOD_FILENODE = 6
 
BIN_FILENODE = 7
 

	
 

	
 
class DiffLimitExceeded(Exception):
 
    pass
 

	
 

	
 
class LimitedDiffContainer(object):
 

	
 
    def __init__(self, diff_limit, cur_diff_size, diff):
 
        self.diff = diff
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = cur_diff_size
 

	
 
    def __iter__(self):
 
        for l in self.diff:
 
            yield l
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        # has already been split on this:
 
        # ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        # has already been split on this:
 
        # ^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
           ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
        (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
           ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    # Used for inline highlighter word split, must match the substitutions in _escaper
 
    _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 
    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
 

	
 

	
 
    def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param format: format of diff passed, `udiff` or `gitdiff`
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self._format = format
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 

	
 
        if format == 'gitdiff':
 
            self.differ = self._highlight_line_difflib
 
            self._parser = self._parse_gitdiff
 
        else:
 
            self.differ = self._highlight_line_udiff
 
            self._parser = self._parse_udiff
 

	
 
    def _copy_iterator(self):
 
        """
 
        make a fresh copy of generator, we should not iterate thru
 
        an original as it's needed for repeating operations on
 
        this instance of DiffProcessor
 
        """
 
        self.__udiff, iterator_copy = tee(self.__udiff)
 
        return iterator_copy
 

	
 
    def _escaper(self, string):
 
        """
 
        Escaper for diff escapes special chars and checks the diff limit
 

	
 
        :param string:
 
        """
 

	
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper gets iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        def substitute(m):
 
            groups = m.groups()
 
            if groups[0]:
 
                return '&amp;'
 
            if groups[1]:
 
                return '&lt;'
 
            if groups[2]:
 
                return '&gt;'
 
            if groups[3]:
 
                return '<u>\t</u>'
 
            if groups[4]:
 
                return '<u class="cr"></u>'
 
            if groups[5]:
 
                return ' <i></i>'
 
            assert False
 

	
 
        return self._escape_re.sub(substitute, safe_unicode(string))
 

	
 
    def _line_counter(self, l):
 
        """
 
        Checks each line and bumps total adds/removes for this diff
 

	
 
        :param l:
 
        """
 
        if l.startswith('+') and not l.startswith('+++'):
 
            self.adds += 1
 
        elif l.startswith('-') and not l.startswith('---'):
 
            self.removes += 1
 
        return safe_unicode(l)
 

	
 
    def _highlight_line_difflib(self, old, new):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 

	
 
        assert old['action'] == 'del'
 
        assert new['action'] == 'add'
 

	
 
        oldwords = self._token_re.split(old['line'])
 
        newwords = self._token_re.split(new['line'])
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
        oldfragments, newfragments = [], []
 
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
            oldfrag = ''.join(oldwords[i1:i2])
 
            newfrag = ''.join(newwords[j1:j2])
 
            if tag != 'equal':
 
                if oldfrag:
 
                    oldfrag = '<del>%s</del>' % oldfrag
 
                if newfrag:
 
                    newfrag = '<ins>%s</ins>' % newfrag
 
            oldfragments.append(oldfrag)
 
            newfragments.append(newfrag)
 

	
 
        old['line'] = "".join(oldfragments)
 
        new['line'] = "".join(newfragments)
 

	
 
    def _highlight_line_udiff(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 
        start = 0
 
        limit = min(len(line['line']), len(next_['line']))
 
        while start < limit and line['line'][start] == next_['line'][start]:
 
            start += 1
 
        end = -1
 
        limit -= start
 
        while -end <= limit and line['line'][end] == next_['line'][end]:
 
            end -= 1
 
        end += 1
 
        if start or end:
 
            def do(l):
 
                last = end + len(l['line'])
 
                if l['action'] == 'add':
 
                    tag = 'ins'
 
                else:
 
                    tag = 'del'
 
                l['line'] = '%s<%s>%s</%s>%s' % (
 
                    l['line'][:start],
 
                    tag,
 
                    l['line'][start:last],
 
                    tag,
 
                    l['line'][last:]
 
                )
 
            do(line)
 
            do(next_)
 

	
 
    def _get_header(self, diff_chunk):
 
        """
 
        parses the diff header, and returns parts, and leftover diff
 
        parts consists of 14 elements::
 

	
 
            a_path, b_path, similarity_index, rename_from, rename_to,
 
            old_mode, new_mode, new_file_mode, deleted_file_mode,
 
            a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
        :param diff_chunk:
 
        """
 

	
 
        match = None
 
        if self.vcs == 'git':
 
            match = self._git_header_re.match(diff_chunk)
 
        elif self.vcs == 'hg':
 
            match = self._hg_header_re.match(diff_chunk)
 
        if match is None:
 
            raise Exception('diff not recognized as valid %s diff' % self.vcs)
 
        groups = match.groupdict()
 
        rest = diff_chunk[match.end():]
 
        if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
 
        difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
        return groups, difflines
 

	
 
    def _clean_line(self, line, command):
 
        if command in ['+', '-', ' ']:
 
            #only modify the line if it's actually a diff thing
 
            line = line[1:]
 
        return line
 

	
 
    def _parse_gitdiff(self, inline_diff=True):
 
        _files = []
 
        diff_container = lambda arg: arg
 

	
 
        ##split the diff in chunks of separate --git a/file b/file chunks
 
        for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
 
            head, diff = self._get_header(raw_diff)
 

	
 
            op = None
 
            stats = {
 
                'added': 0,
 
                'deleted': 0,
 
                'binary': False,
 
                'ops': {},
 
            }
 

	
 
            if head['deleted_file_mode']:
 
                op = 'D'
 
                stats['binary'] = True
 
                stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
            elif head['new_file_mode']:
 
                op = 'A'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'R'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'A'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'D'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'M'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                try:
 
                    chunks, _stats = self._parse_lines(diff)
 
                    stats['binary'] = False
 
                    stats['added'] = _stats[0]
 
                    stats['deleted'] = _stats[1]
 
                    # explicit mark that it's a modified file
 
                    if op == 'M':
 
                        stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
                except DiffLimitExceeded:
 
                    diff_container = lambda _diff: \
 
                        LimitedDiffContainer(self.diff_limit,
 
                                            self.cur_diff_size, _diff)
 
                    break
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'D' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
                'old_lineno': '',
 
                'new_lineno': '',
 
                'action':     'context',
 
                'line':       msg,
 
                } for _op, msg in stats['ops'].iteritems()
 
                  if _op not in [MOD_FILENODE]])
 

	
 
            _files.append({
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        if not inline_diff:
 
            return diff_container(_files)
 

	
 
        # highlight inline changes when one del is followed by one add
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    peekline = lineiter.next()
 
                    while True:
 
                        # find a first del line
 
                        while peekline['action'] != 'del':
 
                            peekline = lineiter.next()
 
                        delline = peekline
 
                        peekline = lineiter.next()
 
                        # if not followed by add, eat all following del lines
 
                        if peekline['action'] != 'add':
 
                            while peekline['action'] == 'del':
 
                                peekline = lineiter.next()
 
                            continue
 
                        # found an add - make sure it is the only one
 
                        addline = peekline
 
                        try:
 
                            peekline = lineiter.next()
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            self.differ(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            self.differ(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return diff_container(_files)
 

	
 
    def _parse_udiff(self, inline_diff=True):
 
        raise NotImplementedError()
 

	
 
    def _parse_lines(self, diff):
 
        """
 
        Parse the diff and return data for the template.
 
        """
 

	
 
        stats = [0, 0]
 
        (old_line, old_end, new_line, new_end) = (None, None, None, None)
 

	
 
        try:
 
            chunks = []
 
            line = diff.next()
 

	
 
            while True:
 
                lines = []
 
                chunks.append(lines)
 

	
 
                match = self._chunk_re.match(line)
 

	
 
                if not match:
 
                    raise Exception('error parsing diff @@ line %r' % line)
 

	
 
                gr = match.groups()
 
                (old_line, old_end,
 
                 new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
                old_line -= 1
 
                new_line -= 1
 

	
 
                context = len(gr) == 5
 
                old_end += old_line
 
                new_end += new_line
 

	
 
                if context:
 
                    # skip context only if it's first line
 
                    if int(gr[0]) > 1:
 
                        lines.append({
 
                            'old_lineno': '...',
 
                            'new_lineno': '...',
 
                            'action':     'context',
 
                            'line':       line,
 
                        })
 

	
 
                line = diff.next()
 

	
 
                while old_line < old_end or new_line < new_end:
 
                    if not line:
 
                        raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                    affects_old = affects_new = False
 

	
 
                    command = line[0]
 
                    if command == '+':
 
                        affects_new = True
 
                        action = 'add'
 
                        stats[0] += 1
 
                    elif command == '-':
 
                        affects_old = True
 
                        action = 'del'
 
                        stats[1] += 1
 
                    elif command == ' ':
 
                        affects_old = affects_new = True
 
                        action = 'unmod'
 
                    else:
 
                        raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                    if not self._newline_marker.match(line):
 
                        old_line += affects_old
 
                        new_line += affects_new
 
                        lines.append({
 
                            'old_lineno':   affects_old and old_line or '',
 
                            'new_lineno':   affects_new and new_line or '',
 
                            'action':       action,
 
                            'line':         self._clean_line(line, command)
 
                        })
 

	
 
                    line = diff.next()
 

	
 
                    if self._newline_marker.match(line):
 
                        # we need to append to lines, since this is not
 
                        # counted in the line specs of diff
 
                        lines.append({
 
                            'old_lineno':   '...',
 
                            'new_lineno':   '...',
 
                            'action':       'context',
 
                            'line':         self._clean_line(line, command)
 
                        })
 
                        line = diff.next()
 
                if old_line > old_end:
 
                        raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
                if new_line > new_end:
 
                        raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
        except StopIteration:
 
            pass
 
        if old_line != old_end or new_line != new_end:
 
            raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
        return chunks, stats
 

	
 
    def _safe_id(self, idstring):
 
        """Make a string safe for including in an id attribute.
 

	
 
        The HTML spec says that id attributes 'must begin with
 
        a letter ([A-Za-z]) and may be followed by any number
 
        of letters, digits ([0-9]), hyphens ("-"), underscores
 
        ("_"), colons (":"), and periods (".")'. These regexps
 
        are slightly over-zealous, in that they remove colons
 
        and periods unnecessarily.
 

	
 
        Whitespace is transformed into underscores, and then
 
        anything which is not a hyphen or a character that
 
        matches \w (alphanumerics and underscore) is removed.
 

	
 
        """
 
        # Transform all whitespace to underscore
 
        idstring = re.sub(r'\s', "_", idstring)
 
        # Remove everything that is not a hyphen or a member of \w
 
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
        return idstring
 

	
 
    def prepare(self, inline_diff=True):
 
        """
 
        Prepare the passed udiff for HTML rendering. It'll return a list
 
        of dicts with diff information
 
        """
 
        parsed = self._parser(inline_diff=inline_diff)
 
        self.parsed = True
 
        self.parsed_diff = parsed
 
        return parsed
 

	
 
    def as_raw(self, diff_lines=None):
 
        """
 
        Returns raw string diff
 
        """
 
        return self._diff
 
        #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
 

	
 
    def as_html(self, table_class='code-difftable', line_class='line',
 
                old_lineno_class='lineno old', new_lineno_class='lineno new',
 
                no_lineno_class='lineno',
 
                code_class='code', enable_comments=False, parsed_lines=None):
 
        """
 
        Return given diff as html table with customized css classes
 
        """
 
        def _link_to_if(condition, label, url):
 
            """
 
            Generates a link if condition is meet or just the label if not.
 
            """
 

	
 
            if condition:
 
                return '''<a href="%(url)s">%(label)s</a>''' % {
 
                    'url': url,
 
                    'label': label
 
                }
 
            else:
 
                return label
 
        if not self.parsed:
 
            self.prepare()
 

	
 
        diff_lines = self.parsed_diff
 
        if parsed_lines:
 
            diff_lines = parsed_lines
 

	
 
        _html_empty = True
 
        _html = []
 
        _html.append('''<table class="%(table_class)s">\n''' % {
 
            'table_class': table_class
 
        })
 

	
 
        for diff in diff_lines:
 
            for line in diff['chunks']:
 
                _html_empty = False
 
                for change in line:
 
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                        'lc': line_class,
 
                        'action': change['action']
 
                    })
 
                    anchor_old_id = ''
 
                    anchor_new_id = ''
 
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['old_lineno']
 
                    }
 
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
 
                        'filename': self._safe_id(diff['filename']),
 
                        'oldline_no': change['new_lineno']
 
                    }
 
                    cond_old = (change['old_lineno'] != '...' and
 
                                change['old_lineno'])
 
                    cond_new = (change['new_lineno'] != '...' and
 
                                change['new_lineno'])
 
                    no_lineno = (change['old_lineno'] == '...' and
 
                                 change['new_lineno'] == '...')
 
                    if cond_old:
 
                        anchor_old_id = 'id="%s"' % anchor_old
 
                    if cond_new:
 
                        anchor_new_id = 'id="%s"' % anchor_new
 
                    ###########################################################
 
                    # OLD LINE NUMBER
 
                    ###########################################################
 
                    _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
 
                        'a_id': anchor_old_id,
 
                        'olc': no_lineno_class if no_lineno else old_lineno_class,
 
                        'colspan': 'colspan="2"' if no_lineno else ''
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['old_lineno'],
 
                                            '#%s' % anchor_old)
 
                    })
 
                    _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # NEW LINE NUMBER
 
                    ###########################################################
 

	
 
                    if not no_lineno:
 
                        _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                            'a_id': anchor_new_id,
 
                            'nlc': new_lineno_class
 
                        })
 

	
 
                        _html.append('''%(link)s''' % {
 
                            'link': _link_to_if(True, change['new_lineno'],
 
                                                '#%s' % anchor_new)
 
                        })
 
                        _html.append('''</td>\n''')
 
                    ###########################################################
 
                    # CODE
 
                    ###########################################################
 
                    comments = '' if enable_comments else 'no-comment'
 
                    _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                        'cc': code_class,
 
                        'inc': comments
 
                    })
 
                    _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
 
                        'code': change['line']
 
                    })
 

	
 
                    _html.append('''\t</td>''')
 
                    _html.append('''\n</tr>\n''')
 
        _html.append('''</table>''')
 
        if _html_empty:
 
            return None
 
        return ''.join(_html)
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
0 comments (0 inline, 0 general)