diff --git a/scripts/build/msgfmt.py b/scripts/build/msgfmt.py index c673d177481..b692a300665 100755 --- a/scripts/build/msgfmt.py +++ b/scripts/build/msgfmt.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python2 -# -*- coding: iso-8859-1 -*- +#! /usr/bin/env python3 # Written by Martin v. Löwis """Generate binary message catalog from textual translation description. @@ -31,21 +30,22 @@ import ast import getopt import struct import array +from email.parser import HeaderParser __version__ = "1.1" MESSAGES = {} - + def usage(code, msg=''): - print >> sys.stderr, __doc__ + print(__doc__, file=sys.stderr) if msg: - print >> sys.stderr, msg + print(msg, file=sys.stderr) sys.exit(code) - + def add(id, str, fuzzy): "Add a non-fuzzy translation to the dictionary." global MESSAGES @@ -53,21 +53,20 @@ def add(id, str, fuzzy): MESSAGES[id] = str - + def generate(): "Return the generated output." global MESSAGES - keys = MESSAGES.keys() # the keys are sorted in the .mo file - keys.sort() + keys = sorted(MESSAGES.keys()) offsets = [] - ids = strs = '' + ids = strs = b'' for id in keys: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) - ids += id + '\0' - strs += MESSAGES[id] + '\0' + ids += id + b'\0' + strs += MESSAGES[id] + b'\0' output = '' # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. @@ -84,7 +83,7 @@ def generate(): voffsets += [l2, o2+valuestart] offsets = koffsets + voffsets output = struct.pack("Iiiiiii", - 0x950412deL, # Magic + 0x950412de, # Magic 0, # Version len(keys), # # of entries 7*4, # start of key index @@ -96,7 +95,7 @@ def generate(): return output - + def make(filename, outfile): ID = 1 STR = 2 @@ -110,17 +109,22 @@ def make(filename, outfile): outfile = os.path.splitext(infile)[0] + '.mo' try: - lines = open(infile).readlines() - except IOError, msg: - print >> sys.stderr, msg + lines = open(infile, 'rb').readlines() + except IOError as msg: + print(msg, file=sys.stderr) sys.exit(1) section = None fuzzy = 0 + # Start off assuming Latin-1, so everything decodes without failure, + # until we know the exact encoding + encoding = 'latin-1' + # Parse the catalog lno = 0 for l in lines: + l = l.decode(encoding) lno += 1 # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: @@ -137,34 +141,40 @@ def make(filename, outfile): if l.startswith('msgid') and not l.startswith('msgid_plural'): if section == STR: add(msgid, msgstr, fuzzy) + if not msgid: + # See whether there is an encoding declaration + p = HeaderParser() + charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() + if charset: + encoding = charset section = ID l = l[5:] - msgid = msgstr = '' + msgid = msgstr = b'' is_plural = False # This is a message with plural forms elif l.startswith('msgid_plural'): if section != ID: - print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\ - (infile, lno) + print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), + file=sys.stderr) sys.exit(1) l = l[12:] - msgid += '\0' # separator of singular and plural + msgid += b'\0' # separator of singular and plural is_plural = True # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR if l.startswith('msgstr['): if not is_plural: - print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ - (infile, lno) + print('plural without msgid_plural on %s:%d' % (infile, lno), + file=sys.stderr) sys.exit(1) l = l.split(']', 1)[1] if msgstr: - msgstr += '\0' # Separator of the various plural forms + msgstr += b'\0' # Separator of the various plural forms else: if is_plural: - print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ - (infile, lno) + print('indexed msgstr required for plural on %s:%d' % (infile, lno), + file=sys.stderr) sys.exit(1) l = l[6:] # Skip empty lines @@ -173,13 +183,13 @@ def make(filename, outfile): continue l = ast.literal_eval(l) if section == ID: - msgid += l + msgid += l.encode(encoding) elif section == STR: - msgstr += l + msgstr += l.encode(encoding) else: - print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ - 'before:' - print >> sys.stderr, l + print('Syntax error on %s:%d' % (infile, lno), \ + 'before:', file=sys.stderr) + print(l, file=sys.stderr) sys.exit(1) # Add last entry if section == STR: @@ -190,16 +200,16 @@ def make(filename, outfile): try: open(outfile,"wb").write(output) - except IOError,msg: - print >> sys.stderr, msg + except IOError as msg: + print(msg, file=sys.stderr) + - def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hVo:', ['help', 'version', 'output-file=']) - except getopt.error, msg: + except getopt.error as msg: usage(1, msg) outfile = None @@ -208,14 +218,14 @@ def main(): if opt in ('-h', '--help'): usage(0) elif opt in ('-V', '--version'): - print >> sys.stderr, "msgfmt.py", __version__ + print("msgfmt.py", __version__) sys.exit(0) elif opt in ('-o', '--output-file'): outfile = arg # do it if not args: - print >> sys.stderr, 'No input file given' - print >> sys.stderr, "Try `msgfmt --help' for more information." + print('No input file given', file=sys.stderr) + print("Try `msgfmt --help' for more information.", file=sys.stderr) return for filename in args: