Changeset - 07f026cdf7ac
[Not reviewed]
default
0 1 0
domruf - 9 years ago 2017-05-04 20:51:26
dominikruf@gmail.com
vcs: catch MemoryErrors when calling Git diff

Binary diffs can make the diffs VERY big and cause MemoryError exceptions.

Before giving MemoryError, the system might start swapping, any process might
fail when allocating memory, random processes might get killed, and our process
might fail in other places. The proper fix would be to avoid the problem by not
trying to process more data than we can handle - for example by not processing
more than a certain amount of Git output.

Before, memory errors were shown to the user as a 500 Internal Server Error
page.

Now, as long as we have no better/safer way get the diff, catch the MemoryError
and show the page with a flash error message and no diff.

The error handling is placed in the diffs module to avoid leaking flash
messages into the vcs lib.
1 file changed with 8 insertions and 4 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -28,16 +28,16 @@ Original author and date, and relevant c
 
import re
 
import difflib
 
import logging
 

	
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib import helpers as h
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.helpers import escape
 
from kallithea.lib.utils2 import safe_unicode
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def _safe_id(idstring):
 
@@ -215,13 +215,13 @@ def wrapped_diff(filenode_old, filenode_
 
        stats = (0, 0)
 

	
 
    if not html_diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            html_diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
            html_diff = wrap_to_table(h.escape('Submodule %r' % submodules[0]))
 
        else:
 
            html_diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
@@ -254,14 +254,18 @@ def get_gitdiff(filenode_old, filenode_n
 

	
 

	
 
def get_diff(scm_instance, rev1, rev2, path=None, ignore_whitespace=False, context=3):
 
    """
 
    A thin wrapper around vcs lib get_diff.
 
    """
 
    return scm_instance.get_diff(rev1, rev2, path=path,
 
                                 ignore_whitespace=ignore_whitespace, context=context)
 
    try:
 
        return scm_instance.get_diff(rev1, rev2, path=path,
 
                                     ignore_whitespace=ignore_whitespace, context=context)
 
    except MemoryError:
 
        h.flash('MemoryError: Diff is too big', category='error')
 
        return ''
 

	
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
0 comments (0 inline, 0 general)