diff --git a/scripts/i18n_utils.py b/scripts/i18n_utils.py new file mode 100644 --- /dev/null +++ b/scripts/i18n_utils.py @@ -0,0 +1,197 @@ +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import print_function + +import os +import re +import shutil +import subprocess +import tempfile + + +do_debug = False # set from scripts/i18n --debug + +def debug(*args, **kwargs): + if do_debug: + print(*args, **kwargs) + +def runcmd(cmd, *args, **kwargs): + debug('... Executing command: %s' % ' '.join(cmd)) + subprocess.check_call(cmd, *args, **kwargs) + +header_comment_strip_re = re.compile(r''' + ^ + [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n + | + ^ + [#][ ]FIRST[ ]AUTHOR[ ],[ ]\d+[.] \n + (?:[#] \n)? + | + ^ + (?:[#] \n)? + [#],[ ]fuzzy \n + | + ^ + [#][ ][#],[ ]fuzzy \n + ''', + re.MULTILINE|re.VERBOSE) + +header_normalize_re = re.compile(r''' + ^ " + (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version): + [ ][^\\]*\\n + " \n + ''', + re.MULTILINE|re.IGNORECASE|re.VERBOSE) + +def _normalize_po(raw_content): + r""" + >>> print(_normalize_po(r''' + ... # header comment + ... + ... + ... # comment before header + ... msgid "" + ... msgstr "yada" + ... "POT-Creation-Date: 2019-05-04 21:13+0200\n" + ... "MIME-Version: " + ... "1.0\n" + ... "Last-Translator: Jabba" + ... "the Hutt\n" + ... "X-Generator: Weblate 1.2.3\n" + ... + ... # comment, but not in header + ... msgid "None" + ... msgstr "Ingen" + ... + ... + ... line 2 + ... # third comment + ... + ... msgid "Special" + ... msgstr "" + ... + ... msgid "Specialist" + ... # odd comment + ... msgstr "" + ... "Expert" + ... + ... # crazy fuzzy auto translation by msgmerge, using foo for bar + ... #, fuzzy + ... #| msgid "some foo string" + ... msgid "some bar string." + ... msgstr "translation of foo string" + ... + ... msgid "%d minute" + ... msgid_plural "%d minutes" + ... msgstr[0] "minut" + ... msgstr[1] "minutter" + ... msgstr[2] "" + ... + ... msgid "%d year" + ... msgid_plural "%d years" + ... msgstr[0] "" + ... msgstr[1] "" + ... + ... # last comment + ... ''') + '^^^') + # header comment + + + # comment before header + + msgid "" + msgstr "yada" + "MIME-Version: " + "1.0\n" + + msgid "None" + msgstr "Ingen" + + line 2 + + msgid "Specialist" + msgstr "" + "Expert" + + msgid "%d minute" + msgid_plural "%d minutes" + msgstr[0] "minut" + msgstr[1] "minutter" + msgstr[2] "" + ^^^ + """ + header_start = raw_content.find('\nmsgid ""\n') + 1 + header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content) + chunks = [ + header_comment_strip_re.sub('', raw_content[0:header_start]) + .strip(), + '', + header_normalize_re.sub('', raw_content[header_start:header_end]) + .replace( + r'"Content-Type: text/plain; charset=utf-8\n"', + r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing + .strip(), + ''] # preserve normalized header + # all chunks are separated by empty line + for raw_chunk in raw_content[header_end:].split('\n\n'): + if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format" + continue # drop crazy auto translation that is worse than useless + # strip all comment lines from chunk + chunk_lines = [ + line + for line in raw_chunk.splitlines() + if line + and not line.startswith('#') + ] + if not chunk_lines: + continue + # check lines starting from first msgstr, skip chunk if no translation lines + msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')] + if ( + chunk_lines[0].startswith('msgid') and + msgstr_i and + all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:]) + ): # skip translation chunks that doesn't have any actual translations + continue + chunks.append('\n'.join(chunk_lines) + '\n') + return '\n'.join(chunks) + +def _normalize_po_file(po_file, merge_pot_file=None, strip=False): + if merge_pot_file: + runcmd(['msgmerge', '--width=76', '--backup=none', '--previous', + '--update', po_file, '-q', merge_pot_file]) + if strip: + po_tmp = po_file + '.tmp' + with open(po_file, 'r') as src, open(po_tmp, 'w') as dest: + raw_content = src.read() + normalized_content = _normalize_po(raw_content) + dest.write(normalized_content) + os.rename(po_tmp, po_file) + +def _normalized_diff(file1, file2, merge_pot_file=None, strip=False): + # Create temporary copies of both files + temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1)) + temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2)) + debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name)) + shutil.copyfile(file1, temp1.name) + shutil.copyfile(file2, temp2.name) + # Normalize them in place + _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip) + _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip) + # Now compare + try: + runcmd(['diff', '-u', temp1.name, temp2.name]) + except subprocess.CalledProcessError as e: + return e.returncode