Changeset - 631e8000eae8
[Not reviewed]
beta
0 1 0
Mads Kiilerich - 13 years ago 2013-04-10 02:54:56
madski@unity3d.com
diff parser: match the header order of hg diff --git patches

The output might look like:

diff --git a/A b/B
old mode 100644
new mode 100755
rename from A
rename to B
--- a/A
+++ b/B

Such files were shown as 'modified binary file chmod 100644 => 100755' without
diff.

Now the chmod and diff will be shown ... but still not the rename.

Correct parsing of headers do require a better parser - one do not just use a
regexp.
1 file changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
rhodecode/lib/diffs.py
Show inline comments
 
@@ -79,197 +79,197 @@ def wrapped_diff(filenode_old, filenode_
 
        stats = diff_processor.stat()
 
        size = len(diff or '')
 
    else:
 
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
 
                               'diff menu to display this diff'))
 
        stats = (0, 0)
 
        size = 0
 
    if not diff:
 
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                            [filenode_new, filenode_old])
 
        if submodules:
 
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
 
        else:
 
            diff = wrap_to_table(_('No changes detected'))
 

	
 
    cs1 = filenode_old.changeset.raw_id
 
    cs2 = filenode_new.changeset.raw_id
 

	
 
    return size, cs1, cs2, diff, stats
 

	
 

	
 
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
 
    """
 
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
 

	
 
    :param ignore_whitespace: ignore whitespaces in diff
 
    """
 
    # make sure we pass in default context
 
    context = context or 3
 
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
 
                        [filenode_new, filenode_old])
 
    if submodules:
 
        return ''
 

	
 
    for filenode in (filenode_old, filenode_new):
 
        if not isinstance(filenode, FileNode):
 
            raise VCSError("Given object should be FileNode object, not %s"
 
                % filenode.__class__)
 

	
 
    repo = filenode_new.changeset.repository
 
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
 

	
 
    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
 
                                ignore_whitespace, context)
 
    return vcs_gitdiff
 

	
 
NEW_FILENODE = 1
 
DEL_FILENODE = 2
 
MOD_FILENODE = 3
 
RENAMED_FILENODE = 4
 
CHMOD_FILENODE = 5
 

	
 

	
 
class DiffLimitExceeded(Exception):
 
    pass
 

	
 

	
 
class LimitedDiffContainer(object):
 

	
 
    def __init__(self, diff_limit, cur_diff_size, diff):
 
        self.diff = diff
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = cur_diff_size
 

	
 
    def __iter__(self):
 
        for l in self.diff:
 
            yield l
 

	
 

	
 
class DiffProcessor(object):
 
    """
 
    Give it a unified or git diff and it returns a list of the files that were
 
    mentioned in the diff together with a dict of meta information that
 
    can be used to render it in a HTML template.
 
    """
 
    _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
    _newline_marker = re.compile(r'^\\ No newline at end of file')
 
    _git_header_re = re.compile(r"""
 
        #^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
           ^rename[ ]from[ ](?P<rename_from>\S+)\n
 
           ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 
    _hg_header_re = re.compile(r"""
 
        #^diff[ ]--git
 
            [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
        (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
 
           ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
 
        (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
           ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
        (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
        (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
        (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
            \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
        (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
 
        (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
 
    """, re.VERBOSE | re.MULTILINE)
 

	
 
    #used for inline highlighter word split
 
    _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
 

	
 
    def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
 
        """
 
        :param diff:   a text in diff format
 
        :param vcs: type of version controll hg or git
 
        :param format: format of diff passed, `udiff` or `gitdiff`
 
        :param diff_limit: define the size of diff that is considered "big"
 
            based on that parameter cut off will be triggered, set to None
 
            to show full diff
 
        """
 
        if not isinstance(diff, basestring):
 
            raise Exception('Diff must be a basestring got %s instead' % type(diff))
 

	
 
        self._diff = diff
 
        self._format = format
 
        self.adds = 0
 
        self.removes = 0
 
        # calculate diff size
 
        self.diff_size = len(diff)
 
        self.diff_limit = diff_limit
 
        self.cur_diff_size = 0
 
        self.parsed = False
 
        self.parsed_diff = []
 
        self.vcs = vcs
 

	
 
        if format == 'gitdiff':
 
            self.differ = self._highlight_line_difflib
 
            self._parser = self._parse_gitdiff
 
        else:
 
            self.differ = self._highlight_line_udiff
 
            self._parser = self._parse_udiff
 

	
 
    def _copy_iterator(self):
 
        """
 
        make a fresh copy of generator, we should not iterate thru
 
        an original as it's needed for repeating operations on
 
        this instance of DiffProcessor
 
        """
 
        self.__udiff, iterator_copy = tee(self.__udiff)
 
        return iterator_copy
 

	
 
    def _escaper(self, string):
 
        """
 
        Escaper for diff escapes special chars and checks the diff limit
 

	
 
        :param string:
 
        :type string:
 
        """
 

	
 
        self.cur_diff_size += len(string)
 

	
 
        # escaper get's iterated on each .next() call and it checks if each
 
        # parsed line doesn't exceed the diff limit
 
        if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
 
            raise DiffLimitExceeded('Diff Limit Exceeded')
 

	
 
        return safe_unicode(string).replace('&', '&amp;')\
 
                .replace('<', '&lt;')\
 
                .replace('>', '&gt;')
 

	
 
    def _line_counter(self, l):
 
        """
 
        Checks each line and bumps total adds/removes for this diff
 

	
 
        :param l:
 
        """
 
        if l.startswith('+') and not l.startswith('+++'):
 
            self.adds += 1
 
        elif l.startswith('-') and not l.startswith('---'):
 
            self.removes += 1
 
        return safe_unicode(l)
 

	
 
    def _highlight_line_difflib(self, line, next_):
 
        """
 
        Highlight inline changes in both lines.
 
        """
 

	
 
        if line['action'] == 'del':
 
            old, new = line, next_
 
        else:
 
            old, new = next_, line
 

	
 
        oldwords = self._token_re.split(old['line'])
 
        newwords = self._token_re.split(new['line'])
 
        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
0 comments (0 inline, 0 general)