Changeset - 9203621cae03
[Not reviewed]
default
0 18 0
Mads Kiilerich - 6 years ago 2019-12-28 01:08:48
mads@kiilerich.com
Grafted from: bcf2dec5faa9
vcs: always return bytes from node.content

We will rather have the unicode conversions explicit.

Note: Py3 bytes doesn't have .startswith - replace that with a regexp.
18 files changed with 43 insertions and 51 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/admin/gists.py
Show inline comments
 
@@ -182,7 +182,7 @@ class GistsController(BaseController):
 
            log.error(traceback.format_exc())
 
            raise HTTPNotFound()
 
        if format == 'raw':
 
            content = '\n\n'.join([f.content for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
 
            content = '\n\n'.join([safe_unicode(f.content) for f in c.files if (f_path is None or safe_unicode(f.path) == f_path)])
 
            response.content_type = 'text/plain'
 
            return content
 
        return render('admin/gists/show.html')
kallithea/controllers/compare.py
Show inline comments
 
@@ -272,7 +272,7 @@ class CompareController(BaseRepoControll
 
                                      ignore_whitespace=ignore_whitespace,
 
                                      context=line_context)
 

	
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
kallithea/controllers/feed.py
Show inline comments
 
@@ -94,7 +94,7 @@ class FeedController(BaseRepoController)
 
        desc_msg.extend(changes)
 
        if str2bool(CONFIG.get('rss_include_diff', False)):
 
            desc_msg.append('\n\n')
 
            desc_msg.append(raw_diff)
 
            desc_msg.append(safe_unicode(raw_diff))
 
        desc_msg.append('</pre>')
 
        return [safe_unicode(chunk) for chunk in desc_msg]
 

	
kallithea/controllers/files.py
Show inline comments
 
@@ -46,7 +46,7 @@ from kallithea.lib.auth import HasRepoPe
 
from kallithea.lib.base import BaseRepoController, jsonify, render
 
from kallithea.lib.exceptions import NonRelativePathError
 
from kallithea.lib.utils import action_logger
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, str2bool
 
from kallithea.lib.utils2 import convert_line_endings, detect_mode, safe_int, safe_str, safe_unicode, str2bool
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (
 
@@ -365,8 +365,7 @@ class FilesController(BaseRepoController
 
        c.f_path = f_path
 

	
 
        if r_post:
 

	
 
            old_content = c.file.content
 
            old_content = safe_unicode(c.file.content)
 
            sl = old_content.splitlines(1)
 
            first_line = sl[0] if sl else ''
 
            # modes:  0 - Unix, 1 - Mac, 2 - DOS
kallithea/controllers/pullrequests.py
Show inline comments
 
@@ -591,7 +591,7 @@ class PullrequestsController(BaseRepoCon
 
                                      ignore_whitespace=ignore_whitespace, context=line_context)
 
        except ChangesetDoesNotExistError:
 
            raw_diff = _("The diff can't be shown - the PR revisions could not be found.")
 
        diff_processor = diffs.DiffProcessor(raw_diff or '', diff_limit=diff_limit)
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
kallithea/controllers/summary.py
Show inline comments
 
@@ -46,7 +46,7 @@ from kallithea.lib.celerylib.tasks impor
 
from kallithea.lib.compat import json
 
from kallithea.lib.markup_renderer import MarkupRenderer
 
from kallithea.lib.page import Page
 
from kallithea.lib.utils2 import safe_int
 
from kallithea.lib.utils2 import safe_int, safe_unicode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError
 
from kallithea.lib.vcs.nodes import FileNode
 
@@ -84,7 +84,7 @@ class SummaryController(BaseRepoControll
 
                        readme_file = f
 
                        log.debug('Found README file `%s` rendering...',
 
                                  readme_file)
 
                        readme_data = renderer.render(readme.content,
 
                        readme_data = renderer.render(safe_unicode(readme.content),
 
                                                      filename=f)
 
                        break
 
                    except NodeDoesNotExistError:
kallithea/lib/annotate.py
Show inline comments
 
@@ -30,6 +30,7 @@ from pygments.formatters import HtmlForm
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils import safe_unicode
 

	
 

	
 
def annotate_highlight(filenode, annotate_from_changeset_func=None,
 
@@ -53,7 +54,7 @@ def annotate_highlight(filenode, annotat
 
        headers=headers,
 
        annotate_from_changeset_func=annotate_from_changeset_func, **options)
 
    lexer = get_custom_lexer(filenode.extension) or filenode.lexer
 
    highlighted = highlight(filenode.content, lexer, formatter)
 
    highlighted = highlight(safe_unicode(filenode.content), lexer, formatter)
 
    return highlighted
 

	
 

	
kallithea/lib/diffs.py
Show inline comments
 
@@ -289,8 +289,8 @@ class DiffProcessor(object):
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 
        if not isinstance(diff, bytes):
 
            raise Exception('Diff must be bytes - got %s' % type(diff))
 

	
 
        self._diff = diff
 
        self.adds = 0
 
@@ -516,6 +516,9 @@ _hg_header_re = re.compile(r"""
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 

	
 

	
 
def _get_header(vcs, diff_chunk):
 
    """
 
    Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 
@@ -537,7 +540,7 @@ def _get_header(vcs, diff_chunk):
 
        raise Exception('diff not recognized as valid %s diff' % vcs)
 
    meta_info = match.groupdict()
 
    rest = diff_chunk[match.end():]
 
    if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
 
    if rest and _header_next_check.match(rest):
 
        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000]))
 
    diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
    return meta_info, diff_lines
kallithea/lib/helpers.py
Show inline comments
 
@@ -330,7 +330,7 @@ def pygmentize(filenode, **kwargs):
 
    """
 
    lexer = get_custom_lexer(filenode.extension) or filenode.lexer
 
    return literal(markup_whitespace(
 
        code_highlight(filenode.content, lexer, CodeHtmlFormatter(**kwargs))))
 
        code_highlight(safe_unicode(filenode.content), lexer, CodeHtmlFormatter(**kwargs))))
 

	
 

	
 
def pygmentize_annotation(repo_name, filenode, **kwargs):
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -182,12 +182,13 @@ class WhooshIndexingDaemon(object):
 

	
 
        indexed = indexed_w_content = 0
 
        if self.is_indexable_node(node):
 
            u_content = node.content
 
            if not isinstance(u_content, unicode):
 
            bytes_content = node.content
 
            if b'\0' in bytes_content:
 
                log.warning('    >> %s - no text content', path)
 
                u_content = u''
 
            else:
 
                log.debug('    >> %s', path)
 
                u_content = safe_unicode(bytes_content)
 
                indexed_w_content += 1
 

	
 
        else:
kallithea/lib/vcs/backends/git/inmemory.py
Show inline comments
 
@@ -68,11 +68,7 @@ class GitInMemoryChangeset(BaseInMemoryC
 
            # for dirnames (in reverse order) [this only applies for nodes from added]
 
            new_trees = []
 

	
 
            if not node.is_binary:
 
                content = node.content.encode(ENCODING)
 
            else:
 
                content = node.content
 
            blob = objects.Blob.from_string(content)
 
            blob = objects.Blob.from_string(node.content)
 

	
 
            node_path = safe_bytes(node.name)
 
            if dirnames:
kallithea/lib/vcs/backends/hg/inmemory.py
Show inline comments
 
@@ -52,8 +52,7 @@ class MercurialInMemoryChangeset(BaseInM
 
            for node in self.added:
 
                if node.path == path:
 
                    return memfilectx(_repo, memctx, path=node.path,
 
                        data=(node.content.encode('utf-8')
 
                              if not node.is_binary else node.content),
 
                        data=node.content,
 
                        islink=False,
 
                        isexec=node.is_executable,
 
                        copysource=False)
 
@@ -62,8 +61,7 @@ class MercurialInMemoryChangeset(BaseInM
 
            for node in self.changed:
 
                if node.path == path:
 
                    return memfilectx(_repo, memctx, path=node.path,
 
                        data=(node.content.encode('utf-8')
 
                              if not node.is_binary else node.content),
 
                        data=node.content,
 
                        islink=False,
 
                        isexec=node.is_executable,
 
                        copysource=False)
kallithea/lib/vcs/nodes.py
Show inline comments
 
@@ -16,7 +16,7 @@ import stat
 

	
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import NodeError, RemovedFileNodeError
 
from kallithea.lib.vcs.utils import safe_str, safe_unicode
 
from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
@@ -263,6 +263,10 @@ class FileNode(Node):
 
            raise NodeError("Cannot use both content and changeset")
 
        super(FileNode, self).__init__(path, kind=NodeKind.FILE)
 
        self.changeset = changeset
 
        if not isinstance(content, bytes) and content is not None:
 
            # File content is one thing that inherently must be bytes ... but
 
            # VCS module tries to be "user friendly" and support unicode ...
 
            content = safe_bytes(content)
 
        self._content = content
 
        self._mode = mode or 0o100644
 

	
 
@@ -278,25 +282,17 @@ class FileNode(Node):
 
            mode = self._mode
 
        return mode
 

	
 
    def _get_content(self):
 
    @property
 
    def content(self):
 
        """
 
        Returns lazily byte content of the FileNode.
 
        """
 
        if self.changeset:
 
            content = self.changeset.get_file_content(self.path)
 
        else:
 
            content = self._content
 
        return content
 

	
 
    @property
 
    def content(self):
 
        """
 
        Returns lazily content of the FileNode. If possible, would try to
 
        decode content from UTF-8.
 
        """
 
        content = self._get_content()
 

	
 
        if bool(content and '\0' in content):
 
            return content
 
        return safe_unicode(content)
 

	
 
    @LazyProperty
 
    def size(self):
 
        if self.changeset:
 
@@ -366,7 +362,7 @@ class FileNode(Node):
 
        """
 
        from pygments import lexers
 
        try:
 
            lexer = lexers.guess_lexer_for_filename(self.name, self.content, stripnl=False)
 
            lexer = lexers.guess_lexer_for_filename(self.name, safe_unicode(self.content), stripnl=False)
 
        except lexers.ClassNotFound:
 
            lexer = lexers.TextLexer(stripnl=False)
 
        # returns first alias
 
@@ -414,8 +410,7 @@ class FileNode(Node):
 
        """
 
        Returns True if file has binary content.
 
        """
 
        _bin = '\0' in self._get_content()
 
        return _bin
 
        return b'\0' in self.content
 

	
 
    def is_browser_compatible_image(self):
 
        return self.mimetype in [
kallithea/lib/vcs/utils/annotate.py
Show inline comments
 
@@ -3,6 +3,7 @@ from pygments.formatters import HtmlForm
 

	
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils import safe_unicode
 

	
 

	
 
def annotate_highlight(filenode, annotate_from_changeset_func=None,
 
@@ -24,9 +25,7 @@ def annotate_highlight(filenode, annotat
 
    formatter = AnnotateHtmlFormatter(filenode=filenode, order=order,
 
        headers=headers,
 
        annotate_from_changeset_func=annotate_from_changeset_func, **options)
 
    lexer = filenode.lexer
 
    highlighted = highlight(filenode.content, lexer, formatter)
 
    return highlighted
 
    return highlight(safe_unicode(filenode.content), filenode.lexer, formatter)
 

	
 

	
 
class AnnotateHtmlFormatter(HtmlFormatter):
kallithea/templates/admin/gists/edit.html
Show inline comments
 
@@ -73,7 +73,7 @@
 
                    </div>
 
                    <div class="panel-body no-padding">
 
                        <div id="editor_container">
 
                            <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${file.content}</textarea>
 
                            <textarea id="editor_${h.FID('f',file.path)}" name="contents" style="display:none">${safe_unicode(file.content)}</textarea>
 
                        </div>
 
                    </div>
 
                </div>
kallithea/templates/files/files_edit.html
Show inline comments
 
@@ -59,7 +59,7 @@ ${self.repo_context_bar('files')}
 
                    </span>
 
              </div>
 
              <div class="panel-body no-padding">
 
                <textarea id="editor" name="content" style="display:none">${h.escape(c.file.content)|n}</textarea>
 
                <textarea id="editor" name="content" style="display:none">${h.escape(safe_unicode(c.file.content))|n}</textarea>
 
              </div>
 
            </div>
 
            <div>
kallithea/tests/vcs/test_git.py
Show inline comments
 
@@ -596,11 +596,11 @@ class TestGitChangeset(object):
 
        for cs in self.repo:
 
            assert isinstance(cs.author, unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
    def test_repo_files_content_is_bytes(self):
 
        changeset = self.repo.get_changeset()
 
        for node in changeset.get_node('/'):
 
            if node.is_file():
 
                assert isinstance(node.content, unicode)
 
                assert isinstance(node.content, bytes)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
kallithea/tests/vcs/test_hg.py
Show inline comments
 
@@ -544,11 +544,11 @@ class TestMercurialChangeset(object):
 
        for cm in self.repo:
 
            assert isinstance(cm.author, unicode)
 

	
 
    def test_repo_files_content_is_unicode(self):
 
    def test_repo_files_content_is_bytes(self):
 
        test_changeset = self.repo.get_changeset(100)
 
        for node in test_changeset.get_node('/'):
 
            if node.is_file():
 
                assert isinstance(node.content, unicode)
 
                assert isinstance(node.content, bytes)
 

	
 
    def test_wrong_path(self):
 
        # There is 'setup.py' in the root dir but not there:
0 comments (0 inline, 0 general)