Changeset - 9203621cae03
[Not reviewed]
default
0 18 0
Mads Kiilerich - 6 years ago 2019-12-28 01:08:48
mads@kiilerich.com
Grafted from: bcf2dec5faa9
vcs: always return bytes from node.content

We will rather have the unicode conversions explicit.

Note: Py3 bytes doesn't have .startswith - replace that with a regexp.
18 files changed with 43 insertions and 51 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/admin/gists.py
Show inline comments
 
@@ -179,13 +179,13 @@ class GistsController(BaseController):
 
            c.file_changeset, c.files = GistModel().get_gist_files(gist_id,
 
                                                            revision=revision)
 
        except VCSError:
 
            log.error(traceback.format_exc())
 
            raise HTTPNotFound()
 
        if format == 'raw':
 
            content = '\n\n'.join([f.content for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
 
            content = '\n\n'.join([safe_unicode(f.content) for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
 
            response.content_type = 'text/plain'
 
            return content
 
        return render('admin/gists/show.html')
 

	
 
    @LoginRequired()
 
    def edit(self, gist_id, format='html'):
kallithea/controllers/compare.py
Show inline comments
 
@@ -269,13 +269,13 @@ class CompareController(BaseRepoControll
 
        log.debug('running diff between %s and %s in %s',
 
                  rev1, c.cs_rev, org_repo.scm_instance.path)
 
        raw_diff = diffs.get_diff(org_repo.scm_instance, rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in diff_processor.parsed:
 
            st = f['stats']
kallithea/controllers/feed.py
Show inline comments
 
@@ -91,13 +91,13 @@ class FeedController(BaseRepoController)
 
        desc_msg.append('<pre>')
 
        desc_msg.append(h.urlify_text(cs.message))
 
        desc_msg.append('\n')
 
        desc_msg.extend(changes)
 
        if str2bool(CONFIG.get('rss_include_diff', False)):
 
            desc_msg.append('\n\n')
 
            desc_msg.append(raw_diff)
 
            desc_msg.append(safe_unicode(raw_diff))
 
        desc_msg.append('</pre>')
 
        return [safe_unicode(chunk) for chunk in desc_msg]
 

	
 
    def _feed(self, repo_name, feeder):
 
        """Produce a simple feed"""
 

	
kallithea/controllers/files.py
Show inline comments
 
@@ -43,13 +43,13 @@ from kallithea.controllers.changeset imp
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, jsonify, render
 
from kallithea.lib.exceptions import NonRelativePathError
 
from kallithea.lib.utils import action_logger
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, str2bool
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (
 
    ChangesetDoesNotExistError, ChangesetError, EmptyRepositoryError, ImproperArchiveTypeError, NodeAlreadyExistsError, NodeDoesNotExistError, NodeError, RepositoryError, VCSError)
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.model.db import Repository
 
@@ -362,14 +362,13 @@ class FilesController(BaseRepoController
 
            raise HTTPFound(location=url('files_home', repo_name=c.repo_name,
 
                            revision=c.cs.raw_id, f_path=f_path))
 
        c.default_message = _('Edited file %s via Kallithea') % (f_path)
 
        c.f_path = f_path
 

	
 
        if r_post:
 

	
 
            old_content = c.file.content
 
            old_content = safe_unicode(c.file.content)
 
            sl = old_content.splitlines(1)
 
            first_line = sl[0] if sl else ''
 
            # modes:  0 - Unix, 1 - Mac, 2 - DOS
 
            mode = detect_mode(first_line, 0)
 
            content = convert_line_endings(r_post.get('content', ''), mode)
 

	
kallithea/controllers/pullrequests.py
Show inline comments
 
@@ -588,13 +588,13 @@ class PullrequestsController(BaseRepoCon
 
                  c.a_rev, c.cs_rev, org_scm_instance.path)
 
        try:
 
            raw_diff = diffs.get_diff(org_scm_instance, rev1=safe_str(c.a_rev), rev2=safe_str(c.cs_rev),
 
                                      ignore_whitespace=ignore_whitespace, context=line_context)
 
        except ChangesetDoesNotExistError:
 
            raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 

	
 
        for f in diff_processor.parsed:
kallithea/controllers/summary.py
Show inline comments
 
@@ -43,13 +43,13 @@ from kallithea.config.conf import ALL_EX
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, jsonify, render
 
from kallithea.lib.celerylib.tasks import get_commits_stats
 
from kallithea.lib.compat import json
 
from kallithea.lib.markup_renderer import MarkupRenderer
 
from kallithea.lib.page import Page
 
from kallithea.lib.utils2 import safe_int
 
from kallithea.lib.utils2 import safe_int, safe_unicode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.model.db import Statistics
 

	
 

	
 
@@ -81,13 +81,13 @@ class SummaryController(BaseRepoControll
 
                        readme = cs.get_node(f)
 
                        if not isinstance(readme, FileNode):
 
                            continue
 
                        readme_file = f
 
                        log.debug('Found README file `%s` rendering...',
 
                                  readme_file)
 
                        readme_data = renderer.render(readme.content,
 
                        readme_data = renderer.render(safe_unicode(readme.content),
 
                                                      filename=f)
 
                        break
 
                    except NodeDoesNotExistError:
 
                        continue
 
            except ChangesetError:
 
                log.error(traceback.format_exc())
kallithea/lib/annotate.py
Show inline comments
 
@@ -27,12 +27,13 @@ Original author and date, and relevant c
 

	
 
from pygments import highlight
 
from pygments.formatters import HtmlFormatter
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils import safe_unicode
 

	
 

	
 
def annotate_highlight(filenode, annotate_from_changeset_func=None,
 
        order=None, headers=None, **options):
 
    """
 
    Returns html portion containing annotated table with 3 columns: line
 
@@ -50,13 +51,13 @@ def annotate_highlight(filenode, annotat
 
    from kallithea.lib.pygmentsutils import get_custom_lexer
 
    options['linenos'] = True
 
    formatter = AnnotateHtmlFormatter(filenode=filenode, order=order,
 
        headers=headers,
 
        annotate_from_changeset_func=annotate_from_changeset_func, **options)
 
    lexer = get_custom_lexer(filenode.extension) or filenode.lexer
 
    highlighted = highlight(filenode.content, lexer, formatter)
 
    highlighted = highlight(safe_unicode(filenode.content), lexer, formatter)
 
    return highlighted
 

	
 

	
 
class AnnotateHtmlFormatter(HtmlFormatter):
 

	
 
    def __init__(self, filenode, annotate_from_changeset_func=None,
kallithea/lib/diffs.py
Show inline comments
 
@@ -286,14 +286,14 @@ class DiffProcessor(object):
 
        :param diff:   a text in diff format
 
        :param vcs: type of version control hg or git
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 
        if not isinstance(diff, bytes):
 
            raise Exception('Diff must be bytes - got %s' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
        self.removes = 0
 
        self.diff_limit = diff_limit
 
        self.limited_diff = False
 
@@ -513,12 +513,15 @@ _hg_header_re = re.compile(r"""
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 

	
 

	
 
def _get_header(vcs, diff_chunk):
 
    """
 
    Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
    1. A dict with meta info:
 

	
 
@@ -534,13 +537,13 @@ def _get_header(vcs, diff_chunk):
 
    elif vcs == 'hg':
 
        match = _hg_header_re.match(diff_chunk)
 
    if match is None:
 
        raise Exception('diff not recognized as valid %s diff' % vcs)
 
    meta_info = match.groupdict()
 
    rest = diff_chunk[match.end():]
 
    if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
    if rest and _header_next_check.match(rest):
 
        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000]))
 
    diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
kallithea/lib/helpers.py
Show inline comments
 
@@ -327,13 +327,13 @@ def pygmentize(filenode, **kwargs):
 
    pygmentize function using pygments
 

	
 
    :param filenode:
 
    """
 
    lexer = get_custom_lexer(filenode.extension) or filenode.lexer
 
    return literal(markup_whitespace(
 
        code_highlight(filenode.content, lexer, CodeHtmlFormatter(**kwargs))))
 
        code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs))))
 

	
 

	
 
def pygmentize_annotation(repo_name, filenode, **kwargs):
 
    """
 
    pygmentize function for annotation
 

	
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -179,18 +179,19 @@ class WhooshIndexingDaemon(object):
 
        except (ChangesetError, NodeDoesNotExistError):
 
            log.debug("    >> %s - not found in %s %s", path, repo, index_rev)
 
            return 0, 0
 

	
 
        indexed = indexed_w_content = 0
 
        if self.is_indexable_node(node):
 
            u_content = node.content
 
            if not isinstance(u_content, unicode):
 
            bytes_content = node.content
 
            if b'\0' in bytes_content:
 
                log.warning('    >> %s - no text content', path)
 
                u_content = u''
 
            else:
 
                log.debug('    >> %s', path)
 
                u_content = safe_unicode(bytes_content)
 
                indexed_w_content += 1
 

	
 
        else:
 
            log.debug('    >> %s - not indexable', path)
 
            # just index file name without it's content
 
            u_content = u''
kallithea/lib/vcs/backends/git/inmemory.py
Show inline comments
 
@@ -65,17 +65,13 @@ class GitInMemoryChangeset(BaseInMemoryC
 
                    parent = self.repository._repo[dir_id]
 
                    ancestors.append((curdir, parent))
 
            # Now parent is deepest existing tree and we need to create subtrees
 
            # for dirnames (in reverse order) [this only applies for nodes from added]
 
            new_trees = []
 

	
 
            if not node.is_binary:
 
                content = node.content.encode(ENCODING)
 
            else:
 
                content = node.content
 
            blob = objects.Blob.from_string(content)
 
            blob = objects.Blob.from_string(node.content)
 

	
 
            node_path = safe_bytes(node.name)
 
            if dirnames:
 
                # If there are trees which should be created we need to build
 
                # them now (in reverse order)
 
                reversed_dirnames = list(reversed(dirnames))
kallithea/lib/vcs/backends/hg/inmemory.py
Show inline comments
 
@@ -49,24 +49,22 @@ class MercurialInMemoryChangeset(BaseInM
 
                return None
 

	
 
            # check if this path is added
 
            for node in self.added:
 
                if node.path == path:
 
                    return memfilectx(_repo, memctx, path=node.path,
 
                        data=(node.content.encode('utf-8')
 
                              if not node.is_binary else node.content),
 
                        data=node.content,
 
                        islink=False,
 
                        isexec=node.is_executable,
 
                        copysource=False)
 

	
 
            # or changed
 
            for node in self.changed:
 
                if node.path == path:
 
                    return memfilectx(_repo, memctx, path=node.path,
 
                        data=(node.content.encode('utf-8')
 
                              if not node.is_binary else node.content),
 
                        data=node.content,
 
                        islink=False,
 
                        isexec=node.is_executable,
 
                        copysource=False)
 

	
 
            raise RepositoryError("Given path haven't been marked as added,"
 
                                  "changed or removed (%s)" % path)
kallithea/lib/vcs/nodes.py
Show inline comments
 
@@ -13,13 +13,13 @@ import functools
 
import mimetypes
 
import posixpath
 
import stat
 

	
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError
 
from kallithea.lib.vcs.utils import safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
class NodeKind:
 
    SUBMODULE = -1
 
    DIR = 1
 
@@ -260,12 +260,16 @@ class FileNode(Node):
 
        """
 

	
 
        if content and changeset:
 
            raise NodeError("Cannot use both content and changeset")
 
        super(FileNode, self).__init__(path, kind=NodeKind.FILE)
 
        self.changeset = changeset
 
        if not isinstance(content, bytes) and content is not None:
 
            # File content is one thing that inherently must be bytes ... but
 
            # VCS module tries to be "user friendly" and support unicode ...
 
            content = safe_bytes(content)
 
        self._content = content
 
        self._mode = mode or 0o100644
 

	
 
    @LazyProperty
 
    def mode(self):
 
        """
 
@@ -275,31 +279,23 @@ class FileNode(Node):
 
        if self.changeset:
 
            mode = self.changeset.get_file_mode(self.path)
 
        else:
 
            mode = self._mode
 
        return mode
 

	
 
    def _get_content(self):
 
    @property
 
    def content(self):
 
        """
 
        Returns lazily byte content of the FileNode.
 
        """
 
        if self.changeset:
 
            content = self.changeset.get_file_content(self.path)
 
        else:
 
            content = self._content
 
        return content
 

	
 
    @property
 
    def content(self):
 
        """
 
        Returns lazily content of the FileNode. If possible, would try to
 
        decode content from UTF-8.
 
        """
 
        content = self._get_content()
 

	
 
        if bool(content and '\0' in content):
 
            return content
 
        return safe_unicode(content)
 

	
 
    @LazyProperty
 
    def size(self):
 
        if self.changeset:
 
            return self.changeset.get_file_size(self.path)
 
        raise NodeError("Cannot retrieve size of the file without related "
 
            "changeset attribute")
 
@@ -363,13 +359,13 @@ class FileNode(Node):
 
        """
 
        Returns pygment's lexer class. Would try to guess lexer taking file's
 
        content, name and mimetype.
 
        """
 
        from pygments import lexers
 
        try:
 
            lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False)
 
            lexer = lexers.guess_lexer_for_filename(self.name, safe_unicode(self.content), stripnl=False)
 
        except lexers.ClassNotFound:
 
            lexer = lexers.TextLexer(stripnl=False)
 
        # returns first alias
 
        return lexer
 

	
 
    @LazyProperty
 
@@ -411,14 +407,13 @@ class FileNode(Node):
 

	
 
    @property
 
    def is_binary(self):
 
        """
 
        Returns True if file has binary content.
 
        """
 
        _bin = '\0' in self._get_content()
 
        return _bin
 
        return b'\0' in self.content
 

	
 
    def is_browser_compatible_image(self):
 
        return self.mimetype in [
 
            "image/gif",
 
            "image/jpeg",
 
            "image/png",
kallithea/lib/vcs/utils/annotate.py
Show inline comments
 
from pygments import highlight
 
from pygments.formatters import HtmlFormatter
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils import safe_unicode
 

	
 

	
 
def annotate_highlight(filenode, annotate_from_changeset_func=None,
 
        order=None, headers=None, **options):
 
    """
 
    Returns html portion containing annotated table with 3 columns: line
 
@@ -21,15 +22,13 @@ def annotate_highlight(filenode, annotat
 
      parameter)
 
    """
 
    options['linenos'] = True
 
    formatter = AnnotateHtmlFormatter(filenode=filenode, order=order,
 
        headers=headers,
 
        annotate_from_changeset_func=annotate_from_changeset_func, **options)
 
    lexer = filenode.lexer
 
    highlighted = highlight(filenode.content, lexer, formatter)
 
    return highlighted
 
    return highlight(safe_unicode(filenode.content), filenode.lexer, formatter)
 

	
 

	
 
class AnnotateHtmlFormatter(HtmlFormatter):
 

	
 
    def __init__(self, filenode, annotate_from_changeset_func=None,
 
            order=None, **options):
kallithea/templates/admin/gists/edit.html
Show inline comments
 
@@ -70,13 +70,13 @@
 
                        <input type="hidden" value="${h.safe_unicode(file.path)}" name="org_files">
 
                        <input class="form-control" id="filename_${h.FID('f',file.path)}" name="files" size="30" type="text" value="${h.safe_unicode(file.path)}">
 
                        <select class="form-control" id="mimetype_${h.FID('f',file.path)}" name="mimetypes"></select>
 
                    </div>
 
                    <div class="panel-body no-padding">
 
                        <div id="editor_container">
 
                            <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${file.content}</textarea>
 
                            <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${safe_unicode(file.content)}</textarea>
 
                        </div>
 
                    </div>
 
                </div>
 

	
 
                ## dynamic edit box.
 
                <script type="text/javascript">
kallithea/templates/files/files_edit.html
Show inline comments
 
@@ -56,13 +56,13 @@ ${self.repo_context_bar('files')}
 
                        ${h.link_to(_('Source'),h.url('files_home',repo_name=c.repo_name,revision=c.cs.raw_id,f_path=c.f_path),class_="btn btn-default btn-xs")}
 
                       % endif
 
                      % endif
 
                    </span>
 
              </div>
 
              <div class="panel-body no-padding">
 
                <textarea id="editor" name="content" style="display:none">${h.escape(c.file.content)|n}</textarea>
 
                <textarea id="editor" name="content" style="display:none">${h.escape(safe_unicode(c.file.content))|n}</textarea>
 
              </div>
 
            </div>
 
            <div>
 
              <div class="form-group">
 
                  <label>${_('Commit Message')}</label>
 
                  <textarea class="form-control" id="commit" name="message" placeholder="${c.default_message}"></textarea>
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -593,17 +593,17 @@ class TestGitChangeset(object):
 
            assert isinstance(cs.message, unicode)
 

	
 
    def test_changeset_author_is_unicode(self):
 
        for cs in self.repo:
 
            assert isinstance(cs.author, unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
    def test_repo_files_content_is_bytes(self):
 
        changeset = self.repo.get_changeset()
 
        for node in changeset.get_node('/'):
 
            if node.is_file():
 
                assert isinstance(node.content, unicode)
 
                assert isinstance(node.content, bytes)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
 
        path = 'foo/bar/setup.py'
 
        tip = self.repo.get_changeset()
 
        with pytest.raises(VCSError):
kallithea/tests/vcs/test_hg.py
Show inline comments
 
@@ -541,17 +541,17 @@ class TestMercurialChangeset(object):
 
            assert isinstance(cm.message, unicode)
 

	
 
    def test_changeset_author_is_unicode(self):
 
        for cm in self.repo:
 
            assert isinstance(cm.author, unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
    def test_repo_files_content_is_bytes(self):
 
        test_changeset = self.repo.get_changeset(100)
 
        for node in test_changeset.get_node('/'):
 
            if node.is_file():
 
                assert isinstance(node.content, unicode)
 
                assert isinstance(node.content, bytes)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
 
        path = 'foo/bar/setup.py'
 
        with pytest.raises(VCSError):
 
            self.repo.get_changeset().get_node(path)
0 comments (0 inline, 0 general)