From be7cc43b0845591b2a993731509bbf95e4705b17 Mon Sep 17 00:00:00 2001 From: Miodrag Milanovic Date: Sun, 21 Feb 2016 14:45:38 +0100 Subject: [PATCH] Placed official version, and removed other files since work only with parsing python (nw) --- scripts/build/check_po.py | 490 --------------------------- scripts/build/msgfmt.py | 83 ++--- scripts/build/msgmerge.py | 675 ------------------------------------- scripts/build/pygettext.py | 672 ------------------------------------ 4 files changed, 45 insertions(+), 1875 deletions(-) delete mode 100644 scripts/build/check_po.py delete mode 100644 scripts/build/msgmerge.py delete mode 100644 scripts/build/pygettext.py diff --git a/scripts/build/check_po.py b/scripts/build/check_po.py deleted file mode 100644 index 5328b76b1b4..00000000000 --- a/scripts/build/check_po.py +++ /dev/null @@ -1,490 +0,0 @@ -#! /usr/bin/env python -# -# check_po - a gramps tool to check validity of po files -# -# Copyright (C) 2006-2006 Kees Bakker -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -# -# TODO -# -# * Check for HTML text in msgstr when there is none in msgid -# * Check for matching HTML tag/endtag in msgstr -# - -# Adapted for Umit by Guilherme Polo, original file: -# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po - -import re -import sys -from optparse import OptionParser - -APP = "Umit" - -all_total = {} -all_fuzzy = {} -all_untranslated = {} -all_percent_s = {} -all_named_s = {} -all_bnamed_s = {} -all_context = {} -all_coverage = {} -all_template_coverage = {} - -def strip_quotes(st): - if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"': - st = st.strip()[1:-1] - return st - -# This is a base class for all checks -class Check: - def __init__( self ): - self.msgs = [] - def diag( self ): - if len( self.msgs ): - print - print self.diag_header - for m in self.msgs: - m.diag() - def summary( self ): - print "%-20s%d" % ( self.summary_text, len(self.msgs) ) - -class Check_fmt( Check ): - def __init__( self, fmt ): - Check.__init__( self ) - self.diag_header = "-------- %s mismatches --------------" % fmt - self.summary_text = "%s mismatches:" % fmt - self.fmt = fmt - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - cnt1 = msgid.count( self.fmt ) - cnt2 = msgstr.count( self.fmt ) - if cnt1 != cnt2: - self.msgs.append( msg ) - -class Check_named_fmt( Check ): - # A pattern to find all %() - find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE) - - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- %() name mismatches --------------" - self.summary_text = "%() name mismatches:" - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - # Same number of named formats? - fmts1 = self.find_named_fmt_pat.findall( msgid ) - fmts2 = self.find_named_fmt_pat.findall( msgstr ) - if len( fmts1 ) != len( fmts2 ): - self.msgs.append( msg ) - else: - # Do we have the same named formats? - fmts1.sort() - fmts2.sort() - if fmts1 != fmts2: - self.msgs.append( msg ) - -class Check_missing_sd( Check ): - # A pattern to find %() without s or d - # Here is a command to use for testing - # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ) - find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE) - - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- %() without 's' or 'd' mismatches --------------" - self.summary_text = "%() missing s/d:" - def process( self, msg ): - msgstr = msg.msgstr - fmts = self.find_named_fmt_pat2.findall( msgstr ) - for f in fmts: - if not f in ('s', 'd'): - self.msgs.append( msg ) - break - -class Check_runaway( Check ): - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- Runaway context in translation ---------" - self.summary_text = "Runaway context:" - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - - # Runaway context. In the translated part we only to see - # the translation of the word after the | - if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr: - self.msgs.append( msg ) - -class Check_xml_chars( Check ): - # Special XML characters - # It is not allowed to have a quote, an ampersand or an angle bracket - xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE ) - - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- unescaped XML special characters ---------" - self.summary_text = "XML special chars:" - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - - # XML errors - # Only look at messages in the tips.xml - if msg.is_tips_xml: - if self.xml_chars_pat.search( msgstr ): - self.msgs.append( msg ) - -class Check_last_char( Check ): - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- last character not identical ---------" - self.summary_text = "Last character:" - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - - # Last character of msgid? White space? Period? - if msg.is_fuzzy: - return - - msgid_last = msgid[-1:] - msgstr_last = msgstr[-1:] - if msgid_last.isspace() != msgstr_last.isspace(): - self.msgs.append( msg ) - elif (msgid_last == '.') != (msgstr_last == '.'): - self.msgs.append( msg ) - -class Check_shortcut_trans( Check ): - def __init__( self ): - Check.__init__( self ) - self.diag_header = "-------- shortcut key in translation ---------" - self.summary_text = "Shortcut in msgstr:" - def process( self, msg ): - msgid = msg.msgid - msgstr = msg.msgstr - - if msgid.count('_') == 0 and msgstr.count('_') > 0: - self.msgs.append( msg ) - -class Msgid: - fuzzy_pat = re.compile( 'fuzzy' ) - tips_xml_pat = re.compile( r'tips\.xml' ) - def __init__( self, msgnr, lineno ): - self._msgid = [] - self._msgstr = [] - self.msgid = '' - self.msgstr = '' - self._cmnt = [] - self.nr = msgnr - self.lineno = lineno - self.is_fuzzy = 0 - self.is_tips_xml = 0 - - def diag( self ): - if 1: - print - print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ) - sys.stdout.write( ''.join( self._msgid ) ) - sys.stdout.write( ''.join( self._msgstr ) ) - else: - # Compatible with the old check_po - print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr ) - - def add_msgid( self, line, lineno ): - self._msgid.append( line ) - line = re.sub( r'msgid\s+', '', line ) - line = line.strip() - if line[0] != '"' or line[-1:] != '"': - print "ERROR at line %d: Missing quote." % lineno - line = strip_quotes( line ) - self.msgid += line - - def add_msgstr( self, line, lineno ): - self._msgstr.append( line ) - line = re.sub( r'msgstr\s+', '', line ) - line = line.strip() - if line[0] != '"' or line[-1:] != '"': - print "ERROR at line %d: Missing quote." % lineno - line = strip_quotes( line ) - self.msgstr += line - - def add_cmnt( self, line ): - self._cmnt.append( line ) - if not self.is_fuzzy and self.fuzzy_pat.search( line ): - self.is_fuzzy = 1 - if not self.is_tips_xml and self.tips_xml_pat.search( line ): - self.is_tips_xml = 1 - -def read_msgs( fname ): - empty_pat = re.compile( r'^ \s* $', re.VERBOSE ) - comment_pat = re.compile( r'\#', re.VERBOSE ) - msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE ) - msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE ) - str_pat = re.compile( r'"', re.VERBOSE ) - old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE ) - - msgnr = 0 # This is the message number of the next message to read. The first real message is 1. - f = open( fname ) - lines = f.readlines() - - # parse it like a statemachine - NONE = 0 # Nothing detected, yet - CMNT = 1 # Inside comment part - MSGID = 2 # Inside msgid part - MSGSTR = 3 # Inside msgstr part - STR = 4 # A continuation string - OLD = 5 # An old pattern with #~ - - state = NONE - msg = None - msgs = [] - - for ix in range( len(lines) ): # Use line numbers for messages - line = lines[ix] - lineno = ix + 1 - - m = empty_pat.match( line ) - if m: - continue # Empty lines are not interesting - - # What's the next state? - if old_pat.match( line ): - next_state = OLD - elif comment_pat.match( line ): - next_state = CMNT - elif msgid_pat.match( line ): - next_state = MSGID - elif msgstr_pat.match( line ): - next_state = MSGSTR - elif str_pat.match( line ): - next_state = STR - else: - print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars() - next_state = NONE - - #print "%(state)d->%(next_state)d\t%(line)s" % vars() - if state == NONE: - # expect msgid or comment or old stuff - if next_state == CMNT: - state = CMNT - msg = Msgid( msgnr, lineno ) # Start with an empty new item - msgnr += 1 - msgs.append( msg ) - msg.add_cmnt( line ) - - elif next_state == MSGID: - state = MSGID - msg = Msgid( msgnr, lineno ) # Start with an empty new item - msgnr += 1 - msgs.append( msg ) - msg.add_msgid( line, lineno ) - - elif next_state == MSGSTR: - print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() - state = MSGSTR - msg = Msgid( msgnr, lineno ) # Start with an empty new item - msgnr += 1 - msgs.append( msg ) - msg.add_msgstr( line, lineno ) - - elif next_state == STR: - print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() - - elif next_state == OLD: - pass # Just skip - - elif state == CMNT: - if next_state == CMNT: - if msg: - msg.add_cmnt( line ) - else: - # Note. We may need to do something about these comments - # Skip for now - pass - - elif next_state == MSGID: - state = MSGID - if not msg: - msg = Msgid( msgnr, lineno ) # Start with an empty new item - msgnr += 1 - msgs.append( msg ) - msg.add_msgid( line, lineno ) - - elif next_state == MSGSTR: - print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() - state = MSGSTR - msg = Msgid( msgnr, lineno ) # Start with an empty new item - msgnr += 1 - msgs.append( msg ) - msg.add_msgstr( line, lineno ) - - elif next_state == STR: - print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() - - elif next_state == OLD: - msg = None - pass # Just skip - - elif state == MSGID: - if next_state == CMNT: - # Hmmm. A comment here? - print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars() - - elif next_state == MSGID: - raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() ) - - elif next_state == MSGSTR: - state = MSGSTR - msg.add_msgstr( line, lineno ) - - elif next_state == STR: - msg.add_msgid( line, lineno ) - - elif next_state == OLD: - msg = None - pass # Just skip - - elif state == MSGSTR: - if next_state == CMNT: - # A comment probably starts a new item - state = CMNT - msg = Msgid( msgnr, lineno ) - msgnr += 1 - msgs.append( msg ) - msg.add_cmnt( line ) - - elif next_state == MSGID: - state = MSGID - msg = Msgid( msgnr, lineno ) - msgnr += 1 - msgs.append( msg ) - msg.add_msgid( line, lineno ) - - elif next_state == MSGSTR: - raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() ) - - elif next_state == STR: - msg.add_msgstr( line, lineno ) - - elif next_state == OLD: - msg = None - pass # Just skip - - else: - raise Exception( 'Unexpected state in po parsing (state = %d)' % state ) - - # Strip items with just comments. (Can this happen?) - msgs1 = [] - for m in msgs: - if not m.msgid and not m.msgstr: - #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ) - pass - else: - msgs1.append( m ) - msgs = msgs1 - return msgs - -def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ): - nr_fuzzy = 0 - nr_untranslated = 0 - - checks = [] - checks.append( Check_fmt( '%s' ) ) - checks.append( Check_fmt( '%d' ) ) - checks.append( Check_named_fmt() ) - checks.append( Check_missing_sd() ) - checks.append( Check_runaway() ) - checks.append( Check_xml_chars() ) - checks.append( Check_last_char() ) - checks.append( Check_shortcut_trans() ) - - for msg in msgs: - msgid = msg.msgid - msgstr = msg.msgstr - #print - #print "msgid: %(msgid)s" % vars() - #print "msgstr: %(msgstr)s" % vars() - - if not msgstr: - nr_untranslated += 1 - continue - - if msg.is_fuzzy: - nr_fuzzy += 1 - if options.skip_fuzzy: - continue - - for c in checks: - c.process( msg ) - - nr_msgs = len(msgs) - if nth > 0: - print - print "=====================================" - print "%-20s%s" % ( "File:", fname ) - print "%-20s%d" % ( "Template total:", nr_templates ) - print "%-20s%d" % ( "PO total:", nr_msgs ) - print "%-20s%d" % ( "Fuzzy:", nr_fuzzy ) - print "%-20s%d" % ( "Untranslated:", nr_untranslated ) - - for c in checks: - c.summary() - - po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100 - print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage ) - - template_coverage = po_coverage * float(nr_msgs) / float(nr_templates) - print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ) - - if not options.only_summary: - for c in checks: - c.diag() - -def main(args): - if len(sys.argv) < 2: - print "Error: Especify the umit.pot file path" - sys.exit(1) - - parser = OptionParser(description="This program validates a PO file for " - "%s." % APP, usage='%prog [options] po-file...' ) - - parser.add_option("", "--skip-fuzzy", - action="store_true", dest="skip_fuzzy", default=False, - help="skip fuzzies") - - parser.add_option("-s", "--only-summary", - action="store_true", dest="only_summary", default=False, - help="only give the summary") - - options, args = parser.parse_args() - - try: - pot_msgs = read_msgs(sys.argv[1]) - nr_templates = len(pot_msgs) - nth = 0 - for fname in args: - msgs = read_msgs(fname) - analyze_msgs(options, fname, msgs, nr_templates, nth) - nth += 1 - - except Exception, e: - print e - -if __name__ == "__main__": - main(sys.argv) diff --git a/scripts/build/msgfmt.py b/scripts/build/msgfmt.py index 0ac8fd60a8d..f28c2fcf6c6 100644 --- a/scripts/build/msgfmt.py +++ b/scripts/build/msgfmt.py @@ -1,14 +1,6 @@ -#! /usr/bin/env python +#!/usr/bin/env python2 # -*- coding: iso-8859-1 -*- # Written by Martin v. Löwis -# -# Changelog: (Guilherme Polo) -# 2008-04-11 -# - Support for files with BOM UTF8 mark. -# -# 2008-04-10 -# - Support for fuzzy strings in output. -# - Bumped to version 1.1.1 """Generate binary message catalog from textual translation description. @@ -24,10 +16,6 @@ Options: Specify the output file to write to. If omitted, output will go to a file named filename.mo (based off the input file name). - -f - --use-fuzzy - Use fuzzy entries in output - -h --help Print this message and exit. @@ -35,23 +23,21 @@ Options: -V --version Display version information and exit. - -Before using the -f (fuzzy) option, read this: - http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en """ -import sys import os +import sys +import ast import getopt import struct import array -import codecs -__version__ = "1.1.1" +__version__ = "1.1" MESSAGES = {} + def usage(code, msg=''): print >> sys.stderr, __doc__ if msg: @@ -59,13 +45,15 @@ def usage(code, msg=''): sys.exit(code) -def add(id, str, fuzzy, use_fuzzy): - "Add a translation to the dictionary." + +def add(id, str, fuzzy): + "Add a non-fuzzy translation to the dictionary." global MESSAGES - if (not fuzzy or use_fuzzy) and str: + if not fuzzy and str: MESSAGES[id] = str + def generate(): "Return the generated output." global MESSAGES @@ -108,7 +96,8 @@ def generate(): return output -def make(filename, outfile, use_fuzzy): + +def make(filename, outfile): ID = 1 STR = 2 @@ -122,8 +111,6 @@ def make(filename, outfile, use_fuzzy): try: lines = open(infile).readlines() - if lines[0].startswith(codecs.BOM_UTF8): - lines[0] = lines[0][len(codecs.BOM_UTF8):] except IOError, msg: print >> sys.stderr, msg sys.exit(1) @@ -137,7 +124,7 @@ def make(filename, outfile, use_fuzzy): lno += 1 # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: - add(msgid, msgstr, fuzzy, use_fuzzy) + add(msgid, msgstr, fuzzy) section = None fuzzy = 0 # Record a fuzzy mark @@ -147,22 +134,44 @@ def make(filename, outfile, use_fuzzy): if l[0] == '#': continue # Now we are in a msgid section, output previous section - if l.startswith('msgid'): + if l.startswith('msgid') and not l.startswith('msgid_plural'): if section == STR: - add(msgid, msgstr, fuzzy, use_fuzzy) + add(msgid, msgstr, fuzzy) section = ID l = l[5:] msgid = msgstr = '' + is_plural = False + # This is a message with plural forms + elif l.startswith('msgid_plural'): + if section != ID: + print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\ + (infile, lno) + sys.exit(1) + l = l[12:] + msgid += '\0' # separator of singular and plural + is_plural = True # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR - l = l[6:] + if l.startswith('msgstr['): + if not is_plural: + print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ + (infile, lno) + sys.exit(1) + l = l.split(']', 1)[1] + if msgstr: + msgstr += '\0' # Separator of the various plural forms + else: + if is_plural: + print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ + (infile, lno) + sys.exit(1) + l = l[6:] # Skip empty lines l = l.strip() if not l: continue - # XXX: Does this always follow Python escape semantics? - l = eval(l) + l = ast.literal_eval(l) if section == ID: msgid += l elif section == STR: @@ -174,7 +183,7 @@ def make(filename, outfile, use_fuzzy): sys.exit(1) # Add last entry if section == STR: - add(msgid, msgstr, fuzzy, use_fuzzy) + add(msgid, msgstr, fuzzy) # Compute output output = generate() @@ -185,15 +194,15 @@ def make(filename, outfile, use_fuzzy): print >> sys.stderr, msg + def main(): try: - opts, args = getopt.getopt(sys.argv[1:], 'hVo:f', - ['help', 'version', 'output-file=', 'use-fuzzy']) + opts, args = getopt.getopt(sys.argv[1:], 'hVo:', + ['help', 'version', 'output-file=']) except getopt.error, msg: usage(1, msg) outfile = None - use_fuzzy = False # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -201,8 +210,6 @@ def main(): elif opt in ('-V', '--version'): print >> sys.stderr, "msgfmt.py", __version__ sys.exit(0) - elif opt in ('-f', '--use-fuzzy'): - use_fuzzy = True elif opt in ('-o', '--output-file'): outfile = arg # do it @@ -212,7 +219,7 @@ def main(): return for filename in args: - make(filename, outfile, use_fuzzy) + make(filename, outfile) if __name__ == '__main__': diff --git a/scripts/build/msgmerge.py b/scripts/build/msgmerge.py deleted file mode 100644 index 82fe3efdb97..00000000000 --- a/scripts/build/msgmerge.py +++ /dev/null @@ -1,675 +0,0 @@ -#! /usr/bin/env python -# -*- coding: iso-8859-1 -*- -# -# Copyright Terje Røsten Nov. 2003. -# -'''Merge two Uniforum style .po files together. - -This is a implementation (not complete) in Python of the GNU -msgmerge(1) program. It can be used on the command line (or as a Python -module). - -Usage: msgmerge.py [OPTIONS] def.po ref.pot - -The def.po file is an existing PO file with translations. The ref.pot -file is the last created PO file with up-to-date source references but -old translations, or a PO Template file. - -Options: - -U, --update update def.po, - do nothing if def.po is already up to date. - -o, --output-file=FILE write output to file FILE. Output is written to - stdout if set to - or if the option is not present. - -D, --docstrings don\'t remove docstring flag. - -h, --help display help text and exit. - -V, --version display version and exit. - -q, --quiet, --silent suppress progress indicators. -''' -from __future__ import generators - -if not __name__ == '__main__': - __doc__ += '''\ - -When used as module the interesting functions are merge() and -merge_dir(). - -The merge() function does the same as the command line version, and -the arguments are as follows. The first argument is the def.po file, -then the ref.pot file. The third argument controls whether do work in -update mode or not, then the next argument sets the output file. Set -the next argument to False to remove docstring flags. The last -argument can be used to suppress progress indicators. The default is -to work in update mode with progress indicators. - -Example: - merge("def.po", "ref.pot") - merge the files def.po and ref.pot and write output to def.po if - there are any changes. - merge("def.po", "red.pot", docstrings = False, verbose = False, - update = False, outfile = "-") - merge the files def.po and ref.pot and write output to stdout, - remove docstring flag and be quiet. - -The merge_dir() function is useful when merging a directory of po -files. The only required argument is the name of the directory with po -files and the pot file. It will use simple glob to find the files. The -second argument can be used to specify the pot file (in the -directory). Third argument is a list of po files (then globbing will -not be used) and the next argument is list of filename to exclude. The -last argument can be used to suppress progress indicators. Docstring -flag will not be removed. - -Example: - merge_dir("po") - merge (and update) all po files in directory po with the single pot - file in the same directory. - -The module raises the MsgmergeError exception in case of error. -''' -__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $' -__version__ = '0.1' -name = 'msgmerge.py' - -__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ] - -import sys -import re -import string -import getopt -import difflib -import glob -import os.path -import codecs - -try: - True, False -except NameError: - True, False = 1, 0 - -class Msgs: - '''Class to hold information about messages.''' - width = 80 - file = '' - def __init__(self, msgid, msgstr, flag, lno, entry, **kwds): - self.id = msgid - self.str = msgstr - self.cmt = kwds.get('cmt', '') - self.ref = kwds.get('ref', '') - self.autocmt = kwds.get('autocmt', '') - self.flag = flag - self.entry = entry - self.lno = lno - self.count = 0 - def wash(self): - self.id = wash(self.id, width = self.width, - filename = self.file, lno = self.lno) - self.str = wash(self.str, 'msgstr', width = self.width, - filename = self.file, lno = self.lno) - def used(self): - self.count += 1 - def get_clean_id(self): - return self.id.replace('msgid "','', 1) - def obsolete(self): - self.width -= len('#~ ') - self.wash() - t = [ '#~ %s\n' % s for s in self.id.splitlines() ] - self.id = ''.join(t) - t = [ '#~ %s\n' % s for s in self.str.splitlines() ] - self.str = ''.join(t) - -class Options: - '''Class to hold options''' - def __init__(self, cmdline = False, **kwds): - if not cmdline: - self.update = kwds.get('update', True) - self.outfile = kwds.get('outfile', '-') - self.docstrings = kwds.get('docstrings', True) - self.verbose = kwds.get('verbose', False) - self.suffix = kwds.get('suffix', '~') - self.backup = kwds.get('backup', True) - else: - self.update = False - self.outfile = False - self.docstrings = False - self.verbose = True - self.suffix = '~' - self.backup = True - -class MsgmergeError(Exception): - '''Exception class for msgmerge''' - -def gen(lines): - ''' - Generator which returns a line (with the obsolete prefix removed) - from the list of lines in , the line number is also - returned. - ''' - lno = 0 - for l in lines: - lno += 1 - yield l.replace('#~ ', '', 1), lno - yield l, lno - -def slurp(s, g, sign): - ''' - The string returned from iterator \'s next() method is added to - the string if string returned is beginning with the string - . The return value is the first returned string which do not - start with , the line number, the iterator and the - (possibly) updated string . - ''' - l, lno = g.next() - while l.startswith(sign) or (sign == '# ' and l.strip() == '#'): - s += l - l, lno = g.next() - return l, lno, g, s - -def splitted_fit(chunk, line, width, break_always, break_after_space): - ''' - Check if string can be splitted by newline to fit into - string with width smaller than . The return value is - a tuple where the first element is the part of chunk which fits - and the second element is the rest of chunk. - ''' - ret = '', chunk - l = len(chunk) - for i in range(l - 1, -1, -1): - if chunk[i] in break_always and len(chunk[0:i] + line) <= width: - ret = chunk[0:i], chunk[i:] - break - elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '': - ret = chunk[0:i], chunk[i:] - break - elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \ - and len(chunk[0:i] + line) <= width: - ret = chunk[0:i], chunk[i:] - break - return ret - -def wrap(msg, width): - ''' - Accept a list of strings to wrap, each string is wrapped to - width and surrounded with a pair of ". The return value is - a string with these wrapped strings joined together with newlines. - ''' - if msg.isspace() or not msg: - return '"%s"' % msg - - # \ and " is here, but " is special in po files. - break_always = '$%+({[' - # XXX what about: « © » ¦ § etc? - break_after_space = '_-=^`~\'<|>&*#@' - enders = '.:,;!?/])}|%-' - extra = string.punctuation - for c in enders: - extra = extra.replace(c, '') - escaped = { 'enders' : re.escape(enders), - 'extra' : re.escape(extra) } - regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped - r = re.compile(regex, re.UNICODE) - msg = [ m for m in r.split(msg) if not m == ''] - - lines = [] - line = msg.pop(0) - - # Handle \n on end of line - if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \ - and msg[-2][-1] == '\\': - msg[-2] += msg[-1] - msg.pop() - # Do not allow a single \n on a line - if len(msg) > 2 and msg[-1] == '\\n': - msg[-2] += msg[-1] - msg.pop() - - for m in msg: - if len(line) > width or len(m) > width or len(line + m) > width: - fit, rest = splitted_fit(m, line, width, break_always, - break_after_space) - line += fit - lines.append(line) - line = rest - else: - line += m - lines.append(line) - lines = [ '"%s"' % l for l in lines ] - return '\n'.join(lines) - -def normalize(lines): - ''' - Normalize : e.g "\n\nText\n\n" becomes: - "\n" - "\n" - "Text\n" - "\n" - ''' - if 0 < lines.find('\\n') < len(lines) - 3: - if lines[-3:] == '\\n"': - lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \ - + '\\n"' - else: - lines = lines.replace('\\n','\\n"\n"').replace('""\n','') - return lines - -def wash(msg, idx = 'msgid', width = 80, **kwds): - ''' - Do washing on the msgstr or msgid fields. Wrap the text to fit in - width . is a list of lines that makes up the field. - indicate msgid or msgstr, holds the width. - and (line number) is picked up from . - Returns the washed field as a string. - ''' - msg = normalize(msg) - lines = msg.splitlines() - size = len(lines) - if size > 1 or len(msg) > width: - washed = [] - # The first line is special - m = re.match('^%s "(.*)"$' % (idx, ), lines[0]) - if not m: - print lines[0] - kwds['lno'] -= size + 1 - raise MsgmergeError('parse error: %(filename)s:%(lno)s.' - % kwds) - washed.append(m.group(1)) - if m.group(1).endswith(r'\n'): - washed.append('') - i = 0 - for line in lines[1:]: - m = re.match('^"(\s*.*)"$', line) - i += 1 - if not m: - print line - kwds['lno'] -= size - i + 1 - raise MsgmergeError('parse error: %(filename)s:%(lno)s.' - % kwds) - washed[-1] += m.group(1) - if m.group(1).endswith(r'\n'): - washed.append('') - if washed[0] == '': - washed.pop(0) - if washed[-1] == '': - washed.pop() - - washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed. - - # One line or multiline - if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width: - washed = '%s %s\n' % (idx, washed[0]) - else: - washed = '%s ""\n%s\n' % (idx, '\n'.join(washed)) - else: - washed = msg - - return washed - -def parse(filename, entry): - ''' - Parse po or pot file with name . Set the variable - to msgid/msgstr to indicate pot/po file. The return value - is a dict with msgid (washed) as key and Msgs instances as - values. - ''' - lines = io(filename).readlines() - Msgs.file = filename - messages = {} - last = len(lines) - g = gen(lines) - cmt = autocmt = ref = flag = '' - msgid = False - lno = 0 - while not lno == last: - l, lno = g.next() - if l.startswith('# '): - l, lno, g, cmt = slurp(l, g, '# ') - if l.startswith('#.'): - l, lno, g, autocmt = slurp(l, g, '#.') - if l.startswith('#:'): - l, lno, g, ref = slurp(l, g, '#:') - if l.startswith('#,'): - l, lno, g, flag = slurp(l, g, '#,') - if l.startswith('msgid'): - l, lno, g, msgid = slurp(l, g, '"') - if l.startswith('msgstr'): - l, lno, g, msgstr = slurp(l, g, '"') - - if not lno == last and not l.strip() == '': - raise MsgmergeError('parse error: %s:%s.' % (filename, lno)) - - if msgid and entry == 'msgstr': - idx = wash(msgid, filename = filename, lno = lno) - messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt) - msgid = False; msgstr = cmt = autocmt = ref = flag = '' - elif msgid and entry == 'msgid': - idx = wash(msgid, filename = filename, lno = lno) - messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, - autocmt = autocmt, ref = ref) - msgid = False; msgstr = cmt = autocmt = ref = flag = '' - - for m in messages.values(): - m.wash() - return messages - -def fuzzy_match(pot, defs): - ''' - Try to find the best difflib match (with ratio > 0.6) between - id of Msgs object and Msgs in the dict . - Return value is the Msgs object in with highest ratio, - False is returned if no suitable Msgs is found. - ''' - limit = 0.6 - l, po = limit - 0.01, False - s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id()) - len2 = len(pot.get_clean_id()) - for candidate in defs.values(): - if candidate.str == 'msgstr ""\n': # Empty translation - continue - if candidate.id == 'msgid ""\n': # Empty msgid (header) - continue - len1 = len(candidate.get_clean_id()) - if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first - continue - s.set_seq1(candidate.get_clean_id()) - if s.quick_ratio() < l: - continue - r = s.ratio() # This is expensive - if r > l: - l, po = r, candidate - return po - -def flags(po, pot, fuzzy = False, obs = False): - ''' - Create flag field from flag field in Msgs objects and - . When is true \'s flags are ignored and the - fuzzy flag is added. If is set then most flags but fuzzy are - removed. If the global variable option.docstrings is set then - docstring flags will not be removed. The return value is a string - which holds the combined flag. - ''' - global option - flag = '' - if po.flag or pot.flag or fuzzy: - if not fuzzy: - flag = '%s, %s' % (po.flag.strip(), pot.flag.strip()) - else: - flag = '%s, %s' % ('#, fuzzy', pot.flag.strip()) - flag = flag.split(', ') - fl = {} - flag = [fl.setdefault(f, f) for f in flag if f not in fl and f] - if not option.docstrings: - try: - flag.remove('docstring') - except ValueError: - pass - if obs: - removes = ['c-format', 'python-format', 'docstring'] - for remove in removes: - try: - flag.remove(remove) - except ValueError: - pass - # Put fuzzy first - if 'fuzzy' in flag and not flag.index('fuzzy') == 1: - i = flag.index('fuzzy') - flag[1], flag[i] = flag[i], flag[1] - - if len(flag) == 1: - flag = '' - else: - flag = ', '.join(flag) + '\n' - return flag - -def add(pot, po, fuzzy = False): - ''' - Build a new entry from the Msgs objects and . If - is true, \'s flag field is ignored (in - flags()). Returns a multiline string with a up to date entry. - ''' - msg = [] - msg.append(po.cmt) - msg.append(pot.autocmt) - msg.append(pot.ref) - msg.append(flags(po, pot, fuzzy = fuzzy)) - msg.append(pot.id) - msg.append(po.str) - return ''.join(msg) - -def header(pot, defs): - ''' - Update date in header entry. Returns the updated header entry. - ''' - try: - [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ] - except ValueError: - raise MsgmergeError('Error: did not find header in po file.') - - r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)', - re.MULTILINE | re.DOTALL) - m = r.match(pot.str) - if not m: - raise MsgmergeError( - 'Error: did not find POT-Creation-Date field in pot file.') - - subs = '\\1%s\\3' % m.group(2) - _, count = r.subn(subs, po.str) - if not count == 1: - raise MsgmergeError( - 'Error: did not find POT-Creation-Date field in po file.') - return po - -def match(defs, refs): - ''' - Try to match Msgs objects in with Msgs objects in - . The return value is a list with po entries. - ''' - global option - matches = [] - empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') - deco = [(r.lno, r) for r in refs.values()] - deco.sort() - po = header(deco.pop(0)[1], defs) # Header entry - matches.append(add(empty, po)) - po.used() - sorted = [ a[1] for a in deco ] - for pot in sorted: - if option.verbose: - sys.stderr.write('.') - po = defs.get(pot.id, False) # Perfect match - if po: - matches.append(add(pot, po)) - po.used(); pot.used() - continue - po = fuzzy_match(pot, defs) # Fuzzy match - if po: - matches.append(add(pot, po, fuzzy = True)) - po.used(); pot.used() - continue - matches.append(add(pot, empty)) # No match - - obsolete(defs, matches) - return matches - -def obsolete(defs, matches): - '''Handle obsolete translations.''' - deco = [ (d.lno, d) for d in defs.values() if - d.count == 0 and not d.str == 'msgstr ""\n' ] - deco.sort() - empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') - obs = [ o[1] for o in deco ] - for o in obs: - o.flag = flags(o, empty, obs = True) - o.obsolete() - matches.append('%s%s%s' % (o.flag, o.id, o.str)) - -def help(): - '''Print help text and exit.''' - print __doc__ - sys.exit(0) - -def cmdline(): - '''Parse options and arguments from command line.''' - advice = 'Try `%(name)s --help\' for more information.' - try: - long_opt = ['help', 'version', 'update', 'output-file=', - 'quiet', 'silent', 'docstrings', 'suffix', 'backup'] - opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt) - except getopt.error, msg: - print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals() - sys.exit(1) - - option = Options(cmdline = True) - for opt, arg in opts: - if opt in ['-h', '--help']: - help() - elif opt in ['-V', '--version']: - print '%(name)s %(__version__)s' % globals() - sys.exit(0) - elif opt in ['-o', '--output-file']: - option.outfile = arg - elif opt in ['-U', '--update']: - option.update = True - elif opt in ['-q', '--silent', '--quiet']: - option.verbose = False - elif opt in ['-D', '--docstrings']: - option.docstrings = True - elif opt in ['--suffix']: - option.suffix = arg - elif opt in ['--backup']: - option.backup = arg - - # Sanity checks - warn = False - if option.update and option.outfile: - warn = '--update and --output-file are mutually exclusive.' - if len(args) == 0: - warn = 'no input files given.' - elif len(args) == 1 or len(args) > 2: - warn = 'exactly 2 input files required.' - if warn: - print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals() - sys.exit(1) - - if option.update: - option.outfile = args[0] - elif not option.outfile: - option.outfile = '-' - - defs, refs = args - - try: - merge(defs, refs, option = option) - except MsgmergeError, err: - print '%(name)s: ' % globals() + '%s' % err - sys.exit(1) - -def io(iofile, mode = 'rU'): - '''Wrapper around open().''' - try: - fo = open(iofile, mode) - if 'r' in mode and fo.read(3) != codecs.BOM_UTF8: - fo.seek(0) - - except IOError, msg: - raise MsgmergeError('error while opening file: %s: %s.' % - (msg[1], iofile)) - return fo - -def backup(infile): - '''Handle backup of files in update mode''' - os.environ.get('VERSION_CONTROL', '') - suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~') - - backup_file = '%s%s' % (infile, suffix) - -def changes(new, old): - return cmp(''.join(old), '\n'.join(new)) - -def write(matches, outfile): - '''Write the list to file ''' - if not outfile == '-': - fd = io(outfile, 'w') - else: - fd = sys.stdout - fd.write('\n'.join(matches)) - -def merge(def_file, ref_file, update = True, outfile = '-', - docstrings = True, suffix = '~', backup = True, - verbose = True, **kwds): - ''' - Merge po file with pot file . If is - set to True then only update if there are changes to the po - file. Set outfile to write updated po file to an another file. Set - to `-\' for writing to standard out. If docstrings is False - docstrings flag will removed. Set verbose to False to suppress - progress indicators. is used to pass options from the - command line interface. - ''' - global option - option = kwds.get('option', Options(update = update, - outfile = outfile, - docstrings = docstrings, - suffix = suffix, - backup = backup, - verbose = verbose)) - def_msgs = parse(def_file, 'msgstr') - ref_msgs = parse(ref_file, 'msgid') - if verbose and not __name__ == '__main__': - print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file) - updated_lines = match(def_msgs, ref_msgs) - if option.verbose: - print >> sys.stderr, ' done.' - if not option.update: - write(updated_lines, option.outfile) - elif option.update and changes(updated_lines, io(def_file).readlines()): - write(updated_lines, def_file) - -def merge_dir(directory, pot = False, include = [], exclude = [], - verbose = True): - ''' - Tries to merge a directory of po files. Uses simple glob to find - po files and pot file. The parameter can be used to specify - the pot file in the directory. If the list is given only - files in this list is merged. Use the list to exclude - files to be merged. This function is only useful if po files and - pot file are in the same directory. Set to get - information when running. - ''' - if directory[-1] == '/': - directory = os.path.dirname(directory) - if pot: - pot = os.path.basename(pot) - else: - pot = glob.glob('%s/*.pot' % directory) - if not pot: - raise MsgmergeError('No pot file found.') - elif len(pot) > 1: - raise MsgmergeError('More than one pot file found: %s.' % pot) - pot = os.path.basename(pot[0]) - - if not include: - pos = glob.glob('%s/*po' % directory) - if not len(pos) > 1: - raise MsgmergeError('No po file(s) found.') - pos = [ os.path.basename(po) for po in pos ] - else: - pos = [ os.path.basename(po) for po in include ] - - for po in exclude: - try: - pos.remove(po) - except ValueError: - pass - format = '%s/%s' - for po in pos: - try: - merge(format % (directory, po), format % (directory, pot), - update = True, verbose = verbose, - outfile = format % (directory, po)) - except MsgmergeError, err: - if verbose: - print >> sys.stderr, '%s Not updated.' % err - else: - print >> sys.stderr, '%s %s not updated.' % (err, po) - -if __name__ == '__main__': - cmdline() diff --git a/scripts/build/pygettext.py b/scripts/build/pygettext.py deleted file mode 100644 index c20e4320d7a..00000000000 --- a/scripts/build/pygettext.py +++ /dev/null @@ -1,672 +0,0 @@ -#! /usr/bin/env python -# -*- coding: iso-8859-1 -*- -# Originally written by Barry Warsaw -# -# Minimally patched to make it even more xgettext compatible -# by Peter Funk -# -# 2002-11-22 Jürgen Hermann -# Added checks that _() only contains string literals, and -# command line args are resolved to module lists, i.e. you -# can now pass a filename, a module or package name, or a -# directory (including globbing chars, important for Win32). -# Made docstring fit in 80 chars wide displays using pydoc. -# - -# for selftesting -try: - import fintl - _ = fintl.gettext -except ImportError: - _ = lambda s: s - -__doc__ = _("""pygettext -- Python equivalent of xgettext(1) - -Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the -internationalization of C programs. Most of these tools are independent of -the programming language and can be used from within Python programs. -Martin von Loewis' work[1] helps considerably in this regard. - -There's one problem though; xgettext is the program that scans source code -looking for message strings, but it groks only C (or C++). Python -introduces a few wrinkles, such as dual quoting characters, triple quoted -strings, and raw strings. xgettext understands none of this. - -Enter pygettext, which uses Python's standard tokenize module to scan -Python source code, generating .pot files identical to what GNU xgettext[2] -generates for C and C++ code. From there, the standard GNU tools can be -used. - -A word about marking Python strings as candidates for translation. GNU -xgettext recognizes the following keywords: gettext, dgettext, dcgettext, -and gettext_noop. But those can be a lot of text to include all over your -code. C and C++ have a trick: they use the C preprocessor. Most -internationalized C source includes a #define for gettext() to _() so that -what has to be written in the source is much less. Thus these are both -translatable strings: - - gettext("Translatable String") - _("Translatable String") - -Python of course has no preprocessor so this doesn't work so well. Thus, -pygettext searches only for _() by default, but see the -k/--keyword flag -below for how to augment this. - - [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html - [2] http://www.gnu.org/software/gettext/gettext.html - -NOTE: pygettext attempts to be option and feature compatible with GNU -xgettext where ever possible. However some options are still missing or are -not fully implemented. Also, xgettext's use of command line switches with -option arguments is broken, and in these cases, pygettext just defines -additional switches. - -Usage: pygettext [options] inputfile ... - -Options: - - -a - --extract-all - Extract all strings. - - -d name - --default-domain=name - Rename the default output file from messages.pot to name.pot. - - -E - --escape - Replace non-ASCII characters with octal escape sequences. - - -D - --docstrings - Extract module, class, method, and function docstrings. These do - not need to be wrapped in _() markers, and in fact cannot be for - Python to consider them docstrings. (See also the -X option). - - -h - --help - Print this help message and exit. - - -k word - --keyword=word - Keywords to look for in addition to the default set, which are: - %(DEFAULTKEYWORDS)s - - You can have multiple -k flags on the command line. - - -K - --no-default-keywords - Disable the default set of keywords (see above). Any keywords - explicitly added with the -k/--keyword option are still recognized. - - --no-location - Do not write filename/lineno location comments. - - -n - --add-location - Write filename/lineno location comments indicating where each - extracted string is found in the source. These lines appear before - each msgid. The style of comments is controlled by the -S/--style - option. This is the default. - - -o filename - --output=filename - Rename the default output file from messages.pot to filename. If - filename is `-' then the output is sent to standard out. - - -p dir - --output-dir=dir - Output files will be placed in directory dir. - - -S stylename - --style stylename - Specify which style to use for location comments. Two styles are - supported: - - Solaris # File: filename, line: line-number - GNU #: filename:line - - The style name is case insensitive. GNU style is the default. - - -v - --verbose - Print the names of the files being processed. - - -V - --version - Print the version of pygettext and exit. - - -w columns - --width=columns - Set width of output to columns. - - -x filename - --exclude-file=filename - Specify a file that contains a list of strings that are not be - extracted from the input files. Each string to be excluded must - appear on a line by itself in the file. - - -X filename - --no-docstrings=filename - Specify a file that contains a list of files (one per line) that - should not have their docstrings extracted. This is only useful in - conjunction with the -D option above. - -If `inputfile' is -, standard input is read. -""") - -import os -import imp -import sys -import glob -import time -import getopt -import token -import tokenize -import operator - -from umit.pm.core.const import PM_VERSION - -__version__ = '1.5' - -default_keywords = ['_'] -DEFAULTKEYWORDS = ', '.join(default_keywords) - -EMPTYSTRING = '' - - - -# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's -# there. -pot_header = _('''\ -# PacketManipulator catalog. -# Copyright (C) 2009 Adriano Montero Marques -# Francesco Piccinno , 2009 -# -msgid "" -msgstr "" -"Project-Id-Version: PacketManipulator %(pm_version)s\\n" -"POT-Creation-Date: %(time)s\\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" -"Last-Translator: FULL NAME \\n" -"Language-Team: LANGUAGE \\n" -"MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=UTF-8\\n" -"Content-Transfer-Encoding: 8bit\\n" -"Generated-By: pygettext.py %(version)s\\n" - -''') - - -def usage(code, msg=''): - print >> sys.stderr, __doc__ % globals() - if msg: - print >> sys.stderr, msg - sys.exit(code) - - - -escapes = [] - -def make_escapes(pass_iso8859): - global escapes - if pass_iso8859: - # Allow iso-8859 characters to pass through so that e.g. 'msgid - # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we - # escape any character outside the 32..126 range. - mod = 128 - else: - mod = 256 - for i in range(256): - if 32 <= (i % mod) <= 126: - escapes.append(chr(i)) - else: - escapes.append("\\%03o" % i) - escapes[ord('\\')] = '\\\\' - escapes[ord('\t')] = '\\t' - escapes[ord('\r')] = '\\r' - escapes[ord('\n')] = '\\n' - escapes[ord('\"')] = '\\"' - - -def escape(s): - global escapes - s = list(s) - for i in range(len(s)): - s[i] = escapes[ord(s[i])] - return EMPTYSTRING.join(s) - - -def safe_eval(s): - # unwrap quotes, safely - return eval(s, {'__builtins__':{}}, {}) - - -def normalize(s): - # This converts the various Python string types into a format that is - # appropriate for .po files, namely much closer to C style. - lines = s.split('\n') - if len(lines) == 1: - s = '"' + escape(s) + '"' - else: - if not lines[-1]: - del lines[-1] - lines[-1] = lines[-1] + '\n' - for i in range(len(lines)): - lines[i] = escape(lines[i]) - lineterm = '\\n"\n"' - s = '""\n"' + lineterm.join(lines) + '"' - return s - - -def containsAny(str, set): - """Check whether 'str' contains ANY of the chars in 'set'""" - return 1 in [c in str for c in set] - - -def _visit_pyfiles(list, dirname, names): - """Helper for getFilesForName().""" - # get extension for python source files - if not globals().has_key('_py_ext'): - global _py_ext - _py_ext = [triple[0] for triple in imp.get_suffixes() - if triple[2] == imp.PY_SOURCE][0] - - # don't recurse into CVS directories - if 'CVS' in names: - names.remove('CVS') - - # add all *.py files to list - list.extend( - [os.path.join(dirname, file) for file in names - if os.path.splitext(file)[1] == _py_ext] - ) - - -def _get_modpkg_path(dotted_name, pathlist=None): - """Get the filesystem path for a module or a package. - - Return the file system path to a file for a module, and to a directory for - a package. Return None if the name is not found, or is a builtin or - extension module. - """ - # split off top-most name - parts = dotted_name.split('.', 1) - - if len(parts) > 1: - # we have a dotted path, import top-level package - try: - file, pathname, description = imp.find_module(parts[0], pathlist) - if file: file.close() - except ImportError: - return None - - # check if it's indeed a package - if description[2] == imp.PKG_DIRECTORY: - # recursively handle the remaining name parts - pathname = _get_modpkg_path(parts[1], [pathname]) - else: - pathname = None - else: - # plain name - try: - file, pathname, description = imp.find_module( - dotted_name, pathlist) - if file: - file.close() - if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: - pathname = None - except ImportError: - pathname = None - - return pathname - - -def getFilesForName(name): - """Get a list of module files for a filename, a module or package name, - or a directory. - """ - if not os.path.exists(name): - # check for glob chars - if containsAny(name, "*?[]"): - files = glob.glob(name) - list = [] - for file in files: - list.extend(getFilesForName(file)) - return list - - # try to find module or package - name = _get_modpkg_path(name) - if not name: - return [] - - if os.path.isdir(name): - # find all python files in directory - list = [] - os.path.walk(name, _visit_pyfiles, list) - return list - elif os.path.exists(name): - # a single file - return [name] - - return [] - - -class TokenEater: - def __init__(self, options): - self.__options = options - self.__messages = {} - self.__state = self.__waiting - self.__data = [] - self.__lineno = -1 - self.__freshmodule = 1 - self.__curfile = None - - def __call__(self, ttype, tstring, stup, etup, line): - # dispatch -## import token -## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ -## 'tstring:', tstring - self.__state(ttype, tstring, stup[0]) - - def __waiting(self, ttype, tstring, lineno): - opts = self.__options - # Do docstring extractions, if enabled - if opts.docstrings and not opts.nodocstrings.get(self.__curfile): - # module docstring? - if self.__freshmodule: - if ttype == tokenize.STRING: - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) - self.__freshmodule = 0 - elif ttype not in (tokenize.COMMENT, tokenize.NL): - self.__freshmodule = 0 - return - # class docstring? - if ttype == tokenize.NAME and tstring in ('class', 'def'): - self.__state = self.__suiteseen - return - if ttype == tokenize.NAME and tstring in opts.keywords: - self.__state = self.__keywordseen - - def __suiteseen(self, ttype, tstring, lineno): - # ignore anything until we see the colon - if ttype == tokenize.OP and tstring == ':': - self.__state = self.__suitedocstring - - def __suitedocstring(self, ttype, tstring, lineno): - # ignore any intervening noise - if ttype == tokenize.STRING: - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) - self.__state = self.__waiting - elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, - tokenize.COMMENT): - # there was no class docstring - self.__state = self.__waiting - - def __keywordseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == '(': - self.__data = [] - self.__lineno = lineno - self.__state = self.__openseen - else: - self.__state = self.__waiting - - def __openseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == ')': - # We've seen the last of the translatable strings. Record the - # line number of the first line of the strings and update the list - # of messages seen. Reset state for the next batch. If there - # were no strings inside _(), then just ignore this entry. - if self.__data: - self.__addentry(EMPTYSTRING.join(self.__data)) - self.__state = self.__waiting - elif ttype == tokenize.STRING: - self.__data.append(safe_eval(tstring)) - elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, - token.NEWLINE, tokenize.NL]: - # warn if we see anything else than STRING or whitespace - print >> sys.stderr, _( - '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' - ) % { - 'token': tstring, - 'file': self.__curfile, - 'lineno': self.__lineno - } - self.__state = self.__waiting - - def __addentry(self, msg, lineno=None, isdocstring=0): - if lineno is None: - lineno = self.__lineno - if not msg in self.__options.toexclude: - entry = (self.__curfile, lineno) - self.__messages.setdefault(msg, {})[entry] = isdocstring - - def set_filename(self, filename): - self.__curfile = filename - self.__freshmodule = 1 - - def write(self, fp): - options = self.__options - timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') - # The time stamp in the header doesn't have the same format as that - # generated by xgettext... - print >> fp, pot_header % {'time': timestamp, 'version': __version__, - 'pm_version': PM_VERSION} - # Sort the entries. First sort each particular entry's keys, then - # sort all the entries by their first item. - reverse = {} - for k, v in self.__messages.items(): - keys = v.keys() - keys.sort() - reverse.setdefault(tuple(keys), []).append((k, v)) - rkeys = reverse.keys() - rkeys.sort() - for rkey in rkeys: - rentries = reverse[rkey] - rentries.sort() - for k, v in rentries: - isdocstring = 0 - # If the entry was gleaned out of a docstring, then add a - # comment stating so. This is to aid translators who may wish - # to skip translating some unimportant docstrings. - if reduce(operator.__add__, v.values()): - isdocstring = 1 - # k is the message string, v is a dictionary-set of (filename, - # lineno) tuples. We want to sort the entries in v first by - # file name and then by line number. - v = v.keys() - v.sort() - if not options.writelocations: - pass - # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print >>fp, _( - '# File: %(filename)s, line: %(lineno)d') % d - elif options.locationstyle == options.GNU: - # fit as many locations on one line, as long as the - # resulting line length doesn't exceeds 'options.width' - locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d - if len(locline) + len(s) <= options.width: - locline = locline + s - else: - print >> fp, locline - locline = "#:" + s - if len(locline) > 2: - print >> fp, locline - if isdocstring: - print >> fp, '#, docstring' - print >> fp, 'msgid', normalize(k) - print >> fp, 'msgstr ""\n' - - - -def main(): - global default_keywords - try: - opts, args = getopt.getopt( - sys.argv[1:], - 'ad:DEhk:Kno:p:S:Vvw:x:X:', - ['extract-all', 'default-domain=', 'escape', 'help', - 'keyword=', 'no-default-keywords', - 'add-location', 'no-location', 'output=', 'output-dir=', - 'style=', 'verbose', 'version', 'width=', 'exclude-file=', - 'docstrings', 'no-docstrings', - ]) - except getopt.error, msg: - usage(1, msg) - - # for holding option values - class Options: - # constants - GNU = 1 - SOLARIS = 2 - # defaults - extractall = 0 # FIXME: currently this option has no effect at all. - escape = 0 - keywords = [] - outpath = '' - outfile = 'messages.pot' - writelocations = 1 - locationstyle = GNU - verbose = 0 - width = 78 - excludefilename = '' - docstrings = 0 - nodocstrings = {} - - options = Options() - locations = {'gnu' : options.GNU, - 'solaris' : options.SOLARIS, - } - - # parse options - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-a', '--extract-all'): - options.extractall = 1 - elif opt in ('-d', '--default-domain'): - options.outfile = arg + '.pot' - elif opt in ('-E', '--escape'): - options.escape = 1 - elif opt in ('-D', '--docstrings'): - options.docstrings = 1 - elif opt in ('-k', '--keyword'): - options.keywords.append(arg) - elif opt in ('-K', '--no-default-keywords'): - default_keywords = [] - elif opt in ('-n', '--add-location'): - options.writelocations = 1 - elif opt in ('--no-location',): - options.writelocations = 0 - elif opt in ('-S', '--style'): - options.locationstyle = locations.get(arg.lower()) - if options.locationstyle is None: - usage(1, _('Invalid value for --style: %s') % arg) - elif opt in ('-o', '--output'): - options.outfile = arg - elif opt in ('-p', '--output-dir'): - options.outpath = arg - elif opt in ('-v', '--verbose'): - options.verbose = 1 - elif opt in ('-V', '--version'): - print _('pygettext.py (xgettext for Python) %s') % __version__ - sys.exit(0) - elif opt in ('-w', '--width'): - try: - options.width = int(arg) - except ValueError: - usage(1, _('--width argument must be an integer: %s') % arg) - elif opt in ('-x', '--exclude-file'): - options.excludefilename = arg - elif opt in ('-X', '--no-docstrings'): - fp = open(arg) - try: - while 1: - line = fp.readline() - if not line: - break - options.nodocstrings[line[:-1]] = 1 - finally: - fp.close() - - # calculate escapes - make_escapes(options.escape) - - # calculate all keywords - options.keywords.extend(default_keywords) - - # initialize list of strings to exclude - if options.excludefilename: - try: - fp = open(options.excludefilename) - options.toexclude = fp.readlines() - fp.close() - except IOError: - print >> sys.stderr, _( - "Can't read --exclude-file: %s") % options.excludefilename - sys.exit(1) - else: - options.toexclude = [] - - # resolve args to module lists - expanded = [] - for arg in args: - if arg == '-': - expanded.append(arg) - else: - expanded.extend(getFilesForName(arg)) - args = expanded - - # slurp through all the files - eater = TokenEater(options) - for filename in args: - if filename == '-': - if options.verbose: - print _('Reading standard input') - fp = sys.stdin - closep = 0 - else: - if options.verbose: - print _('Working on %s') % filename - fp = open(filename) - closep = 1 - try: - eater.set_filename(filename) - try: - tokenize.tokenize(fp.readline, eater) - except tokenize.TokenError, e: - print >> sys.stderr, '%s: %s, line %d, column %d' % ( - e[0], filename, e[1][0], e[1][1]) - finally: - if closep: - fp.close() - - # write the output - if options.outfile == '-': - fp = sys.stdout - closep = 0 - else: - if options.outpath: - options.outfile = os.path.join(options.outpath, options.outfile) - fp = open(options.outfile, 'w') - closep = 1 - try: - eater.write(fp) - finally: - if closep: - fp.close() - - -if __name__ == '__main__': - main() - # some more test strings - _(u'a unicode string') - # this one creates a warning - _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} - _('more' 'than' 'one' 'string')