diff --git a/scripts/build/check_po.py b/scripts/build/check_po.py new file mode 100644 index 00000000000..5328b76b1b4 --- /dev/null +++ b/scripts/build/check_po.py @@ -0,0 +1,490 @@ +#! /usr/bin/env python +# +# check_po - a gramps tool to check validity of po files +# +# Copyright (C) 2006-2006 Kees Bakker +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# +# TODO +# +# * Check for HTML text in msgstr when there is none in msgid +# * Check for matching HTML tag/endtag in msgstr +# + +# Adapted for Umit by Guilherme Polo, original file: +# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po + +import re +import sys +from optparse import OptionParser + +APP = "Umit" + +all_total = {} +all_fuzzy = {} +all_untranslated = {} +all_percent_s = {} +all_named_s = {} +all_bnamed_s = {} +all_context = {} +all_coverage = {} +all_template_coverage = {} + +def strip_quotes(st): + if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"': + st = st.strip()[1:-1] + return st + +# This is a base class for all checks +class Check: + def __init__( self ): + self.msgs = [] + def diag( self ): + if len( self.msgs ): + print + print self.diag_header + for m in self.msgs: + m.diag() + def summary( self ): + print "%-20s%d" % ( self.summary_text, len(self.msgs) ) + +class Check_fmt( Check ): + def __init__( self, fmt ): + Check.__init__( self ) + self.diag_header = "-------- %s mismatches --------------" % fmt + self.summary_text = "%s mismatches:" % fmt + self.fmt = fmt + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + cnt1 = msgid.count( self.fmt ) + cnt2 = msgstr.count( self.fmt ) + if cnt1 != cnt2: + self.msgs.append( msg ) + +class Check_named_fmt( Check ): + # A pattern to find all %() + find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE) + + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- %() name mismatches --------------" + self.summary_text = "%() name mismatches:" + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + # Same number of named formats? + fmts1 = self.find_named_fmt_pat.findall( msgid ) + fmts2 = self.find_named_fmt_pat.findall( msgstr ) + if len( fmts1 ) != len( fmts2 ): + self.msgs.append( msg ) + else: + # Do we have the same named formats? + fmts1.sort() + fmts2.sort() + if fmts1 != fmts2: + self.msgs.append( msg ) + +class Check_missing_sd( Check ): + # A pattern to find %() without s or d + # Here is a command to use for testing + # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ) + find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE) + + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- %() without 's' or 'd' mismatches --------------" + self.summary_text = "%() missing s/d:" + def process( self, msg ): + msgstr = msg.msgstr + fmts = self.find_named_fmt_pat2.findall( msgstr ) + for f in fmts: + if not f in ('s', 'd'): + self.msgs.append( msg ) + break + +class Check_runaway( Check ): + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- Runaway context in translation ---------" + self.summary_text = "Runaway context:" + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + + # Runaway context. In the translated part we only to see + # the translation of the word after the | + if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr: + self.msgs.append( msg ) + +class Check_xml_chars( Check ): + # Special XML characters + # It is not allowed to have a quote, an ampersand or an angle bracket + xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE ) + + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- unescaped XML special characters ---------" + self.summary_text = "XML special chars:" + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + + # XML errors + # Only look at messages in the tips.xml + if msg.is_tips_xml: + if self.xml_chars_pat.search( msgstr ): + self.msgs.append( msg ) + +class Check_last_char( Check ): + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- last character not identical ---------" + self.summary_text = "Last character:" + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + + # Last character of msgid? White space? Period? + if msg.is_fuzzy: + return + + msgid_last = msgid[-1:] + msgstr_last = msgstr[-1:] + if msgid_last.isspace() != msgstr_last.isspace(): + self.msgs.append( msg ) + elif (msgid_last == '.') != (msgstr_last == '.'): + self.msgs.append( msg ) + +class Check_shortcut_trans( Check ): + def __init__( self ): + Check.__init__( self ) + self.diag_header = "-------- shortcut key in translation ---------" + self.summary_text = "Shortcut in msgstr:" + def process( self, msg ): + msgid = msg.msgid + msgstr = msg.msgstr + + if msgid.count('_') == 0 and msgstr.count('_') > 0: + self.msgs.append( msg ) + +class Msgid: + fuzzy_pat = re.compile( 'fuzzy' ) + tips_xml_pat = re.compile( r'tips\.xml' ) + def __init__( self, msgnr, lineno ): + self._msgid = [] + self._msgstr = [] + self.msgid = '' + self.msgstr = '' + self._cmnt = [] + self.nr = msgnr + self.lineno = lineno + self.is_fuzzy = 0 + self.is_tips_xml = 0 + + def diag( self ): + if 1: + print + print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ) + sys.stdout.write( ''.join( self._msgid ) ) + sys.stdout.write( ''.join( self._msgstr ) ) + else: + # Compatible with the old check_po + print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr ) + + def add_msgid( self, line, lineno ): + self._msgid.append( line ) + line = re.sub( r'msgid\s+', '', line ) + line = line.strip() + if line[0] != '"' or line[-1:] != '"': + print "ERROR at line %d: Missing quote." % lineno + line = strip_quotes( line ) + self.msgid += line + + def add_msgstr( self, line, lineno ): + self._msgstr.append( line ) + line = re.sub( r'msgstr\s+', '', line ) + line = line.strip() + if line[0] != '"' or line[-1:] != '"': + print "ERROR at line %d: Missing quote." % lineno + line = strip_quotes( line ) + self.msgstr += line + + def add_cmnt( self, line ): + self._cmnt.append( line ) + if not self.is_fuzzy and self.fuzzy_pat.search( line ): + self.is_fuzzy = 1 + if not self.is_tips_xml and self.tips_xml_pat.search( line ): + self.is_tips_xml = 1 + +def read_msgs( fname ): + empty_pat = re.compile( r'^ \s* $', re.VERBOSE ) + comment_pat = re.compile( r'\#', re.VERBOSE ) + msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE ) + msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE ) + str_pat = re.compile( r'"', re.VERBOSE ) + old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE ) + + msgnr = 0 # This is the message number of the next message to read. The first real message is 1. + f = open( fname ) + lines = f.readlines() + + # parse it like a statemachine + NONE = 0 # Nothing detected, yet + CMNT = 1 # Inside comment part + MSGID = 2 # Inside msgid part + MSGSTR = 3 # Inside msgstr part + STR = 4 # A continuation string + OLD = 5 # An old pattern with #~ + + state = NONE + msg = None + msgs = [] + + for ix in range( len(lines) ): # Use line numbers for messages + line = lines[ix] + lineno = ix + 1 + + m = empty_pat.match( line ) + if m: + continue # Empty lines are not interesting + + # What's the next state? + if old_pat.match( line ): + next_state = OLD + elif comment_pat.match( line ): + next_state = CMNT + elif msgid_pat.match( line ): + next_state = MSGID + elif msgstr_pat.match( line ): + next_state = MSGSTR + elif str_pat.match( line ): + next_state = STR + else: + print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars() + next_state = NONE + + #print "%(state)d->%(next_state)d\t%(line)s" % vars() + if state == NONE: + # expect msgid or comment or old stuff + if next_state == CMNT: + state = CMNT + msg = Msgid( msgnr, lineno ) # Start with an empty new item + msgnr += 1 + msgs.append( msg ) + msg.add_cmnt( line ) + + elif next_state == MSGID: + state = MSGID + msg = Msgid( msgnr, lineno ) # Start with an empty new item + msgnr += 1 + msgs.append( msg ) + msg.add_msgid( line, lineno ) + + elif next_state == MSGSTR: + print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() + state = MSGSTR + msg = Msgid( msgnr, lineno ) # Start with an empty new item + msgnr += 1 + msgs.append( msg ) + msg.add_msgstr( line, lineno ) + + elif next_state == STR: + print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() + + elif next_state == OLD: + pass # Just skip + + elif state == CMNT: + if next_state == CMNT: + if msg: + msg.add_cmnt( line ) + else: + # Note. We may need to do something about these comments + # Skip for now + pass + + elif next_state == MSGID: + state = MSGID + if not msg: + msg = Msgid( msgnr, lineno ) # Start with an empty new item + msgnr += 1 + msgs.append( msg ) + msg.add_msgid( line, lineno ) + + elif next_state == MSGSTR: + print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() + state = MSGSTR + msg = Msgid( msgnr, lineno ) # Start with an empty new item + msgnr += 1 + msgs.append( msg ) + msg.add_msgstr( line, lineno ) + + elif next_state == STR: + print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() + + elif next_state == OLD: + msg = None + pass # Just skip + + elif state == MSGID: + if next_state == CMNT: + # Hmmm. A comment here? + print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars() + + elif next_state == MSGID: + raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() ) + + elif next_state == MSGSTR: + state = MSGSTR + msg.add_msgstr( line, lineno ) + + elif next_state == STR: + msg.add_msgid( line, lineno ) + + elif next_state == OLD: + msg = None + pass # Just skip + + elif state == MSGSTR: + if next_state == CMNT: + # A comment probably starts a new item + state = CMNT + msg = Msgid( msgnr, lineno ) + msgnr += 1 + msgs.append( msg ) + msg.add_cmnt( line ) + + elif next_state == MSGID: + state = MSGID + msg = Msgid( msgnr, lineno ) + msgnr += 1 + msgs.append( msg ) + msg.add_msgid( line, lineno ) + + elif next_state == MSGSTR: + raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() ) + + elif next_state == STR: + msg.add_msgstr( line, lineno ) + + elif next_state == OLD: + msg = None + pass # Just skip + + else: + raise Exception( 'Unexpected state in po parsing (state = %d)' % state ) + + # Strip items with just comments. (Can this happen?) + msgs1 = [] + for m in msgs: + if not m.msgid and not m.msgstr: + #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ) + pass + else: + msgs1.append( m ) + msgs = msgs1 + return msgs + +def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ): + nr_fuzzy = 0 + nr_untranslated = 0 + + checks = [] + checks.append( Check_fmt( '%s' ) ) + checks.append( Check_fmt( '%d' ) ) + checks.append( Check_named_fmt() ) + checks.append( Check_missing_sd() ) + checks.append( Check_runaway() ) + checks.append( Check_xml_chars() ) + checks.append( Check_last_char() ) + checks.append( Check_shortcut_trans() ) + + for msg in msgs: + msgid = msg.msgid + msgstr = msg.msgstr + #print + #print "msgid: %(msgid)s" % vars() + #print "msgstr: %(msgstr)s" % vars() + + if not msgstr: + nr_untranslated += 1 + continue + + if msg.is_fuzzy: + nr_fuzzy += 1 + if options.skip_fuzzy: + continue + + for c in checks: + c.process( msg ) + + nr_msgs = len(msgs) + if nth > 0: + print + print "=====================================" + print "%-20s%s" % ( "File:", fname ) + print "%-20s%d" % ( "Template total:", nr_templates ) + print "%-20s%d" % ( "PO total:", nr_msgs ) + print "%-20s%d" % ( "Fuzzy:", nr_fuzzy ) + print "%-20s%d" % ( "Untranslated:", nr_untranslated ) + + for c in checks: + c.summary() + + po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100 + print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage ) + + template_coverage = po_coverage * float(nr_msgs) / float(nr_templates) + print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ) + + if not options.only_summary: + for c in checks: + c.diag() + +def main(args): + if len(sys.argv) < 2: + print "Error: Especify the umit.pot file path" + sys.exit(1) + + parser = OptionParser(description="This program validates a PO file for " + "%s." % APP, usage='%prog [options] po-file...' ) + + parser.add_option("", "--skip-fuzzy", + action="store_true", dest="skip_fuzzy", default=False, + help="skip fuzzies") + + parser.add_option("-s", "--only-summary", + action="store_true", dest="only_summary", default=False, + help="only give the summary") + + options, args = parser.parse_args() + + try: + pot_msgs = read_msgs(sys.argv[1]) + nr_templates = len(pot_msgs) + nth = 0 + for fname in args: + msgs = read_msgs(fname) + analyze_msgs(options, fname, msgs, nr_templates, nth) + nth += 1 + + except Exception, e: + print e + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/build/msgfmt.py b/scripts/build/msgfmt.py index 5dd5430a524..0ac8fd60a8d 100644 --- a/scripts/build/msgfmt.py +++ b/scripts/build/msgfmt.py @@ -1,6 +1,14 @@ #! /usr/bin/env python # -*- coding: iso-8859-1 -*- # Written by Martin v. Löwis +# +# Changelog: (Guilherme Polo) +# 2008-04-11 +# - Support for files with BOM UTF8 mark. +# +# 2008-04-10 +# - Support for fuzzy strings in output. +# - Bumped to version 1.1.1 """Generate binary message catalog from textual translation description. @@ -16,6 +24,10 @@ Options: Specify the output file to write to. If omitted, output will go to a file named filename.mo (based off the input file name). + -f + --use-fuzzy + Use fuzzy entries in output + -h --help Print this message and exit. @@ -23,6 +35,9 @@ Options: -V --version Display version information and exit. + +Before using the -f (fuzzy) option, read this: + http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en """ import sys @@ -30,13 +45,13 @@ import os import getopt import struct import array +import codecs -__version__ = "1.1" +__version__ = "1.1.1" MESSAGES = {} - def usage(code, msg=''): print >> sys.stderr, __doc__ if msg: @@ -44,15 +59,13 @@ def usage(code, msg=''): sys.exit(code) - -def add(id, str, fuzzy): - "Add a non-fuzzy translation to the dictionary." +def add(id, str, fuzzy, use_fuzzy): + "Add a translation to the dictionary." global MESSAGES - if not fuzzy and str: + if (not fuzzy or use_fuzzy) and str: MESSAGES[id] = str - def generate(): "Return the generated output." global MESSAGES @@ -95,8 +108,7 @@ def generate(): return output - -def make(filename, outfile): +def make(filename, outfile, use_fuzzy): ID = 1 STR = 2 @@ -110,6 +122,8 @@ def make(filename, outfile): try: lines = open(infile).readlines() + if lines[0].startswith(codecs.BOM_UTF8): + lines[0] = lines[0][len(codecs.BOM_UTF8):] except IOError, msg: print >> sys.stderr, msg sys.exit(1) @@ -123,7 +137,7 @@ def make(filename, outfile): lno += 1 # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: - add(msgid, msgstr, fuzzy) + add(msgid, msgstr, fuzzy, use_fuzzy) section = None fuzzy = 0 # Record a fuzzy mark @@ -133,39 +147,16 @@ def make(filename, outfile): if l[0] == '#': continue # Now we are in a msgid section, output previous section - if l.startswith('msgid') and not l.startswith('msgid_plural'): + if l.startswith('msgid'): if section == STR: - add(msgid, msgstr, fuzzy) + add(msgid, msgstr, fuzzy, use_fuzzy) section = ID l = l[5:] msgid = msgstr = '' - is_plural = False - # This is a message with plural forms - elif l.startswith('msgid_plural'): - if section != ID: - print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\ - (infile, lno) - sys.exit(1) - l = l[12:] - msgid += '\0' # separator of singular and plural - is_plural = True # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR - if l.startswith('msgstr['): - if not is_plural: - print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ - (infile, lno) - sys.exit(1) - l = l.split(']', 1)[1] - if msgstr: - msgstr += '\0' # Separator of the various plural forms - else: - if is_plural: - print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ - (infile, lno) - sys.exit(1) - l = l[6:] + l = l[6:] # Skip empty lines l = l.strip() if not l: @@ -183,7 +174,7 @@ def make(filename, outfile): sys.exit(1) # Add last entry if section == STR: - add(msgid, msgstr, fuzzy) + add(msgid, msgstr, fuzzy, use_fuzzy) # Compute output output = generate() @@ -194,15 +185,15 @@ def make(filename, outfile): print >> sys.stderr, msg - def main(): try: - opts, args = getopt.getopt(sys.argv[1:], 'hVo:', - ['help', 'version', 'output-file=']) + opts, args = getopt.getopt(sys.argv[1:], 'hVo:f', + ['help', 'version', 'output-file=', 'use-fuzzy']) except getopt.error, msg: usage(1, msg) outfile = None + use_fuzzy = False # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -210,6 +201,8 @@ def main(): elif opt in ('-V', '--version'): print >> sys.stderr, "msgfmt.py", __version__ sys.exit(0) + elif opt in ('-f', '--use-fuzzy'): + use_fuzzy = True elif opt in ('-o', '--output-file'): outfile = arg # do it @@ -219,7 +212,7 @@ def main(): return for filename in args: - make(filename, outfile) + make(filename, outfile, use_fuzzy) if __name__ == '__main__': diff --git a/scripts/build/msgmerge.py b/scripts/build/msgmerge.py new file mode 100644 index 00000000000..82fe3efdb97 --- /dev/null +++ b/scripts/build/msgmerge.py @@ -0,0 +1,675 @@ +#! /usr/bin/env python +# -*- coding: iso-8859-1 -*- +# +# Copyright Terje Røsten Nov. 2003. +# +'''Merge two Uniforum style .po files together. + +This is a implementation (not complete) in Python of the GNU +msgmerge(1) program. It can be used on the command line (or as a Python +module). + +Usage: msgmerge.py [OPTIONS] def.po ref.pot + +The def.po file is an existing PO file with translations. The ref.pot +file is the last created PO file with up-to-date source references but +old translations, or a PO Template file. + +Options: + -U, --update update def.po, + do nothing if def.po is already up to date. + -o, --output-file=FILE write output to file FILE. Output is written to + stdout if set to - or if the option is not present. + -D, --docstrings don\'t remove docstring flag. + -h, --help display help text and exit. + -V, --version display version and exit. + -q, --quiet, --silent suppress progress indicators. +''' +from __future__ import generators + +if not __name__ == '__main__': + __doc__ += '''\ + +When used as module the interesting functions are merge() and +merge_dir(). + +The merge() function does the same as the command line version, and +the arguments are as follows. The first argument is the def.po file, +then the ref.pot file. The third argument controls whether do work in +update mode or not, then the next argument sets the output file. Set +the next argument to False to remove docstring flags. The last +argument can be used to suppress progress indicators. The default is +to work in update mode with progress indicators. + +Example: + merge("def.po", "ref.pot") + merge the files def.po and ref.pot and write output to def.po if + there are any changes. + merge("def.po", "red.pot", docstrings = False, verbose = False, + update = False, outfile = "-") + merge the files def.po and ref.pot and write output to stdout, + remove docstring flag and be quiet. + +The merge_dir() function is useful when merging a directory of po +files. The only required argument is the name of the directory with po +files and the pot file. It will use simple glob to find the files. The +second argument can be used to specify the pot file (in the +directory). Third argument is a list of po files (then globbing will +not be used) and the next argument is list of filename to exclude. The +last argument can be used to suppress progress indicators. Docstring +flag will not be removed. + +Example: + merge_dir("po") + merge (and update) all po files in directory po with the single pot + file in the same directory. + +The module raises the MsgmergeError exception in case of error. +''' +__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $' +__version__ = '0.1' +name = 'msgmerge.py' + +__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ] + +import sys +import re +import string +import getopt +import difflib +import glob +import os.path +import codecs + +try: + True, False +except NameError: + True, False = 1, 0 + +class Msgs: + '''Class to hold information about messages.''' + width = 80 + file = '' + def __init__(self, msgid, msgstr, flag, lno, entry, **kwds): + self.id = msgid + self.str = msgstr + self.cmt = kwds.get('cmt', '') + self.ref = kwds.get('ref', '') + self.autocmt = kwds.get('autocmt', '') + self.flag = flag + self.entry = entry + self.lno = lno + self.count = 0 + def wash(self): + self.id = wash(self.id, width = self.width, + filename = self.file, lno = self.lno) + self.str = wash(self.str, 'msgstr', width = self.width, + filename = self.file, lno = self.lno) + def used(self): + self.count += 1 + def get_clean_id(self): + return self.id.replace('msgid "','', 1) + def obsolete(self): + self.width -= len('#~ ') + self.wash() + t = [ '#~ %s\n' % s for s in self.id.splitlines() ] + self.id = ''.join(t) + t = [ '#~ %s\n' % s for s in self.str.splitlines() ] + self.str = ''.join(t) + +class Options: + '''Class to hold options''' + def __init__(self, cmdline = False, **kwds): + if not cmdline: + self.update = kwds.get('update', True) + self.outfile = kwds.get('outfile', '-') + self.docstrings = kwds.get('docstrings', True) + self.verbose = kwds.get('verbose', False) + self.suffix = kwds.get('suffix', '~') + self.backup = kwds.get('backup', True) + else: + self.update = False + self.outfile = False + self.docstrings = False + self.verbose = True + self.suffix = '~' + self.backup = True + +class MsgmergeError(Exception): + '''Exception class for msgmerge''' + +def gen(lines): + ''' + Generator which returns a line (with the obsolete prefix removed) + from the list of lines in , the line number is also + returned. + ''' + lno = 0 + for l in lines: + lno += 1 + yield l.replace('#~ ', '', 1), lno + yield l, lno + +def slurp(s, g, sign): + ''' + The string returned from iterator \'s next() method is added to + the string if string returned is beginning with the string + . The return value is the first returned string which do not + start with , the line number, the iterator and the + (possibly) updated string . + ''' + l, lno = g.next() + while l.startswith(sign) or (sign == '# ' and l.strip() == '#'): + s += l + l, lno = g.next() + return l, lno, g, s + +def splitted_fit(chunk, line, width, break_always, break_after_space): + ''' + Check if string can be splitted by newline to fit into + string with width smaller than . The return value is + a tuple where the first element is the part of chunk which fits + and the second element is the rest of chunk. + ''' + ret = '', chunk + l = len(chunk) + for i in range(l - 1, -1, -1): + if chunk[i] in break_always and len(chunk[0:i] + line) <= width: + ret = chunk[0:i], chunk[i:] + break + elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '': + ret = chunk[0:i], chunk[i:] + break + elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \ + and len(chunk[0:i] + line) <= width: + ret = chunk[0:i], chunk[i:] + break + return ret + +def wrap(msg, width): + ''' + Accept a list of strings to wrap, each string is wrapped to + width and surrounded with a pair of ". The return value is + a string with these wrapped strings joined together with newlines. + ''' + if msg.isspace() or not msg: + return '"%s"' % msg + + # \ and " is here, but " is special in po files. + break_always = '$%+({[' + # XXX what about: « © » ¦ § etc? + break_after_space = '_-=^`~\'<|>&*#@' + enders = '.:,;!?/])}|%-' + extra = string.punctuation + for c in enders: + extra = extra.replace(c, '') + escaped = { 'enders' : re.escape(enders), + 'extra' : re.escape(extra) } + regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped + r = re.compile(regex, re.UNICODE) + msg = [ m for m in r.split(msg) if not m == ''] + + lines = [] + line = msg.pop(0) + + # Handle \n on end of line + if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \ + and msg[-2][-1] == '\\': + msg[-2] += msg[-1] + msg.pop() + # Do not allow a single \n on a line + if len(msg) > 2 and msg[-1] == '\\n': + msg[-2] += msg[-1] + msg.pop() + + for m in msg: + if len(line) > width or len(m) > width or len(line + m) > width: + fit, rest = splitted_fit(m, line, width, break_always, + break_after_space) + line += fit + lines.append(line) + line = rest + else: + line += m + lines.append(line) + lines = [ '"%s"' % l for l in lines ] + return '\n'.join(lines) + +def normalize(lines): + ''' + Normalize : e.g "\n\nText\n\n" becomes: + "\n" + "\n" + "Text\n" + "\n" + ''' + if 0 < lines.find('\\n') < len(lines) - 3: + if lines[-3:] == '\\n"': + lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \ + + '\\n"' + else: + lines = lines.replace('\\n','\\n"\n"').replace('""\n','') + return lines + +def wash(msg, idx = 'msgid', width = 80, **kwds): + ''' + Do washing on the msgstr or msgid fields. Wrap the text to fit in + width . is a list of lines that makes up the field. + indicate msgid or msgstr, holds the width. + and (line number) is picked up from . + Returns the washed field as a string. + ''' + msg = normalize(msg) + lines = msg.splitlines() + size = len(lines) + if size > 1 or len(msg) > width: + washed = [] + # The first line is special + m = re.match('^%s "(.*)"$' % (idx, ), lines[0]) + if not m: + print lines[0] + kwds['lno'] -= size + 1 + raise MsgmergeError('parse error: %(filename)s:%(lno)s.' + % kwds) + washed.append(m.group(1)) + if m.group(1).endswith(r'\n'): + washed.append('') + i = 0 + for line in lines[1:]: + m = re.match('^"(\s*.*)"$', line) + i += 1 + if not m: + print line + kwds['lno'] -= size - i + 1 + raise MsgmergeError('parse error: %(filename)s:%(lno)s.' + % kwds) + washed[-1] += m.group(1) + if m.group(1).endswith(r'\n'): + washed.append('') + if washed[0] == '': + washed.pop(0) + if washed[-1] == '': + washed.pop() + + washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed. + + # One line or multiline + if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width: + washed = '%s %s\n' % (idx, washed[0]) + else: + washed = '%s ""\n%s\n' % (idx, '\n'.join(washed)) + else: + washed = msg + + return washed + +def parse(filename, entry): + ''' + Parse po or pot file with name . Set the variable + to msgid/msgstr to indicate pot/po file. The return value + is a dict with msgid (washed) as key and Msgs instances as + values. + ''' + lines = io(filename).readlines() + Msgs.file = filename + messages = {} + last = len(lines) + g = gen(lines) + cmt = autocmt = ref = flag = '' + msgid = False + lno = 0 + while not lno == last: + l, lno = g.next() + if l.startswith('# '): + l, lno, g, cmt = slurp(l, g, '# ') + if l.startswith('#.'): + l, lno, g, autocmt = slurp(l, g, '#.') + if l.startswith('#:'): + l, lno, g, ref = slurp(l, g, '#:') + if l.startswith('#,'): + l, lno, g, flag = slurp(l, g, '#,') + if l.startswith('msgid'): + l, lno, g, msgid = slurp(l, g, '"') + if l.startswith('msgstr'): + l, lno, g, msgstr = slurp(l, g, '"') + + if not lno == last and not l.strip() == '': + raise MsgmergeError('parse error: %s:%s.' % (filename, lno)) + + if msgid and entry == 'msgstr': + idx = wash(msgid, filename = filename, lno = lno) + messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt) + msgid = False; msgstr = cmt = autocmt = ref = flag = '' + elif msgid and entry == 'msgid': + idx = wash(msgid, filename = filename, lno = lno) + messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, + autocmt = autocmt, ref = ref) + msgid = False; msgstr = cmt = autocmt = ref = flag = '' + + for m in messages.values(): + m.wash() + return messages + +def fuzzy_match(pot, defs): + ''' + Try to find the best difflib match (with ratio > 0.6) between + id of Msgs object and Msgs in the dict . + Return value is the Msgs object in with highest ratio, + False is returned if no suitable Msgs is found. + ''' + limit = 0.6 + l, po = limit - 0.01, False + s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id()) + len2 = len(pot.get_clean_id()) + for candidate in defs.values(): + if candidate.str == 'msgstr ""\n': # Empty translation + continue + if candidate.id == 'msgid ""\n': # Empty msgid (header) + continue + len1 = len(candidate.get_clean_id()) + if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first + continue + s.set_seq1(candidate.get_clean_id()) + if s.quick_ratio() < l: + continue + r = s.ratio() # This is expensive + if r > l: + l, po = r, candidate + return po + +def flags(po, pot, fuzzy = False, obs = False): + ''' + Create flag field from flag field in Msgs objects and + . When is true \'s flags are ignored and the + fuzzy flag is added. If is set then most flags but fuzzy are + removed. If the global variable option.docstrings is set then + docstring flags will not be removed. The return value is a string + which holds the combined flag. + ''' + global option + flag = '' + if po.flag or pot.flag or fuzzy: + if not fuzzy: + flag = '%s, %s' % (po.flag.strip(), pot.flag.strip()) + else: + flag = '%s, %s' % ('#, fuzzy', pot.flag.strip()) + flag = flag.split(', ') + fl = {} + flag = [fl.setdefault(f, f) for f in flag if f not in fl and f] + if not option.docstrings: + try: + flag.remove('docstring') + except ValueError: + pass + if obs: + removes = ['c-format', 'python-format', 'docstring'] + for remove in removes: + try: + flag.remove(remove) + except ValueError: + pass + # Put fuzzy first + if 'fuzzy' in flag and not flag.index('fuzzy') == 1: + i = flag.index('fuzzy') + flag[1], flag[i] = flag[i], flag[1] + + if len(flag) == 1: + flag = '' + else: + flag = ', '.join(flag) + '\n' + return flag + +def add(pot, po, fuzzy = False): + ''' + Build a new entry from the Msgs objects and . If + is true, \'s flag field is ignored (in + flags()). Returns a multiline string with a up to date entry. + ''' + msg = [] + msg.append(po.cmt) + msg.append(pot.autocmt) + msg.append(pot.ref) + msg.append(flags(po, pot, fuzzy = fuzzy)) + msg.append(pot.id) + msg.append(po.str) + return ''.join(msg) + +def header(pot, defs): + ''' + Update date in header entry. Returns the updated header entry. + ''' + try: + [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ] + except ValueError: + raise MsgmergeError('Error: did not find header in po file.') + + r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)', + re.MULTILINE | re.DOTALL) + m = r.match(pot.str) + if not m: + raise MsgmergeError( + 'Error: did not find POT-Creation-Date field in pot file.') + + subs = '\\1%s\\3' % m.group(2) + _, count = r.subn(subs, po.str) + if not count == 1: + raise MsgmergeError( + 'Error: did not find POT-Creation-Date field in po file.') + return po + +def match(defs, refs): + ''' + Try to match Msgs objects in with Msgs objects in + . The return value is a list with po entries. + ''' + global option + matches = [] + empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') + deco = [(r.lno, r) for r in refs.values()] + deco.sort() + po = header(deco.pop(0)[1], defs) # Header entry + matches.append(add(empty, po)) + po.used() + sorted = [ a[1] for a in deco ] + for pot in sorted: + if option.verbose: + sys.stderr.write('.') + po = defs.get(pot.id, False) # Perfect match + if po: + matches.append(add(pot, po)) + po.used(); pot.used() + continue + po = fuzzy_match(pot, defs) # Fuzzy match + if po: + matches.append(add(pot, po, fuzzy = True)) + po.used(); pot.used() + continue + matches.append(add(pot, empty)) # No match + + obsolete(defs, matches) + return matches + +def obsolete(defs, matches): + '''Handle obsolete translations.''' + deco = [ (d.lno, d) for d in defs.values() if + d.count == 0 and not d.str == 'msgstr ""\n' ] + deco.sort() + empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') + obs = [ o[1] for o in deco ] + for o in obs: + o.flag = flags(o, empty, obs = True) + o.obsolete() + matches.append('%s%s%s' % (o.flag, o.id, o.str)) + +def help(): + '''Print help text and exit.''' + print __doc__ + sys.exit(0) + +def cmdline(): + '''Parse options and arguments from command line.''' + advice = 'Try `%(name)s --help\' for more information.' + try: + long_opt = ['help', 'version', 'update', 'output-file=', + 'quiet', 'silent', 'docstrings', 'suffix', 'backup'] + opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt) + except getopt.error, msg: + print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals() + sys.exit(1) + + option = Options(cmdline = True) + for opt, arg in opts: + if opt in ['-h', '--help']: + help() + elif opt in ['-V', '--version']: + print '%(name)s %(__version__)s' % globals() + sys.exit(0) + elif opt in ['-o', '--output-file']: + option.outfile = arg + elif opt in ['-U', '--update']: + option.update = True + elif opt in ['-q', '--silent', '--quiet']: + option.verbose = False + elif opt in ['-D', '--docstrings']: + option.docstrings = True + elif opt in ['--suffix']: + option.suffix = arg + elif opt in ['--backup']: + option.backup = arg + + # Sanity checks + warn = False + if option.update and option.outfile: + warn = '--update and --output-file are mutually exclusive.' + if len(args) == 0: + warn = 'no input files given.' + elif len(args) == 1 or len(args) > 2: + warn = 'exactly 2 input files required.' + if warn: + print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals() + sys.exit(1) + + if option.update: + option.outfile = args[0] + elif not option.outfile: + option.outfile = '-' + + defs, refs = args + + try: + merge(defs, refs, option = option) + except MsgmergeError, err: + print '%(name)s: ' % globals() + '%s' % err + sys.exit(1) + +def io(iofile, mode = 'rU'): + '''Wrapper around open().''' + try: + fo = open(iofile, mode) + if 'r' in mode and fo.read(3) != codecs.BOM_UTF8: + fo.seek(0) + + except IOError, msg: + raise MsgmergeError('error while opening file: %s: %s.' % + (msg[1], iofile)) + return fo + +def backup(infile): + '''Handle backup of files in update mode''' + os.environ.get('VERSION_CONTROL', '') + suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~') + + backup_file = '%s%s' % (infile, suffix) + +def changes(new, old): + return cmp(''.join(old), '\n'.join(new)) + +def write(matches, outfile): + '''Write the list to file ''' + if not outfile == '-': + fd = io(outfile, 'w') + else: + fd = sys.stdout + fd.write('\n'.join(matches)) + +def merge(def_file, ref_file, update = True, outfile = '-', + docstrings = True, suffix = '~', backup = True, + verbose = True, **kwds): + ''' + Merge po file with pot file . If is + set to True then only update if there are changes to the po + file. Set outfile to write updated po file to an another file. Set + to `-\' for writing to standard out. If docstrings is False + docstrings flag will removed. Set verbose to False to suppress + progress indicators. is used to pass options from the + command line interface. + ''' + global option + option = kwds.get('option', Options(update = update, + outfile = outfile, + docstrings = docstrings, + suffix = suffix, + backup = backup, + verbose = verbose)) + def_msgs = parse(def_file, 'msgstr') + ref_msgs = parse(ref_file, 'msgid') + if verbose and not __name__ == '__main__': + print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file) + updated_lines = match(def_msgs, ref_msgs) + if option.verbose: + print >> sys.stderr, ' done.' + if not option.update: + write(updated_lines, option.outfile) + elif option.update and changes(updated_lines, io(def_file).readlines()): + write(updated_lines, def_file) + +def merge_dir(directory, pot = False, include = [], exclude = [], + verbose = True): + ''' + Tries to merge a directory of po files. Uses simple glob to find + po files and pot file. The parameter can be used to specify + the pot file in the directory. If the list is given only + files in this list is merged. Use the list to exclude + files to be merged. This function is only useful if po files and + pot file are in the same directory. Set to get + information when running. + ''' + if directory[-1] == '/': + directory = os.path.dirname(directory) + if pot: + pot = os.path.basename(pot) + else: + pot = glob.glob('%s/*.pot' % directory) + if not pot: + raise MsgmergeError('No pot file found.') + elif len(pot) > 1: + raise MsgmergeError('More than one pot file found: %s.' % pot) + pot = os.path.basename(pot[0]) + + if not include: + pos = glob.glob('%s/*po' % directory) + if not len(pos) > 1: + raise MsgmergeError('No po file(s) found.') + pos = [ os.path.basename(po) for po in pos ] + else: + pos = [ os.path.basename(po) for po in include ] + + for po in exclude: + try: + pos.remove(po) + except ValueError: + pass + format = '%s/%s' + for po in pos: + try: + merge(format % (directory, po), format % (directory, pot), + update = True, verbose = verbose, + outfile = format % (directory, po)) + except MsgmergeError, err: + if verbose: + print >> sys.stderr, '%s Not updated.' % err + else: + print >> sys.stderr, '%s %s not updated.' % (err, po) + +if __name__ == '__main__': + cmdline() diff --git a/scripts/build/pygettext.py b/scripts/build/pygettext.py new file mode 100644 index 00000000000..c20e4320d7a --- /dev/null +++ b/scripts/build/pygettext.py @@ -0,0 +1,672 @@ +#! /usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Originally written by Barry Warsaw +# +# Minimally patched to make it even more xgettext compatible +# by Peter Funk +# +# 2002-11-22 Jürgen Hermann +# Added checks that _() only contains string literals, and +# command line args are resolved to module lists, i.e. you +# can now pass a filename, a module or package name, or a +# directory (including globbing chars, important for Win32). +# Made docstring fit in 80 chars wide displays using pydoc. +# + +# for selftesting +try: + import fintl + _ = fintl.gettext +except ImportError: + _ = lambda s: s + +__doc__ = _("""pygettext -- Python equivalent of xgettext(1) + +Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the +internationalization of C programs. Most of these tools are independent of +the programming language and can be used from within Python programs. +Martin von Loewis' work[1] helps considerably in this regard. + +There's one problem though; xgettext is the program that scans source code +looking for message strings, but it groks only C (or C++). Python +introduces a few wrinkles, such as dual quoting characters, triple quoted +strings, and raw strings. xgettext understands none of this. + +Enter pygettext, which uses Python's standard tokenize module to scan +Python source code, generating .pot files identical to what GNU xgettext[2] +generates for C and C++ code. From there, the standard GNU tools can be +used. + +A word about marking Python strings as candidates for translation. GNU +xgettext recognizes the following keywords: gettext, dgettext, dcgettext, +and gettext_noop. But those can be a lot of text to include all over your +code. C and C++ have a trick: they use the C preprocessor. Most +internationalized C source includes a #define for gettext() to _() so that +what has to be written in the source is much less. Thus these are both +translatable strings: + + gettext("Translatable String") + _("Translatable String") + +Python of course has no preprocessor so this doesn't work so well. Thus, +pygettext searches only for _() by default, but see the -k/--keyword flag +below for how to augment this. + + [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html + [2] http://www.gnu.org/software/gettext/gettext.html + +NOTE: pygettext attempts to be option and feature compatible with GNU +xgettext where ever possible. However some options are still missing or are +not fully implemented. Also, xgettext's use of command line switches with +option arguments is broken, and in these cases, pygettext just defines +additional switches. + +Usage: pygettext [options] inputfile ... + +Options: + + -a + --extract-all + Extract all strings. + + -d name + --default-domain=name + Rename the default output file from messages.pot to name.pot. + + -E + --escape + Replace non-ASCII characters with octal escape sequences. + + -D + --docstrings + Extract module, class, method, and function docstrings. These do + not need to be wrapped in _() markers, and in fact cannot be for + Python to consider them docstrings. (See also the -X option). + + -h + --help + Print this help message and exit. + + -k word + --keyword=word + Keywords to look for in addition to the default set, which are: + %(DEFAULTKEYWORDS)s + + You can have multiple -k flags on the command line. + + -K + --no-default-keywords + Disable the default set of keywords (see above). Any keywords + explicitly added with the -k/--keyword option are still recognized. + + --no-location + Do not write filename/lineno location comments. + + -n + --add-location + Write filename/lineno location comments indicating where each + extracted string is found in the source. These lines appear before + each msgid. The style of comments is controlled by the -S/--style + option. This is the default. + + -o filename + --output=filename + Rename the default output file from messages.pot to filename. If + filename is `-' then the output is sent to standard out. + + -p dir + --output-dir=dir + Output files will be placed in directory dir. + + -S stylename + --style stylename + Specify which style to use for location comments. Two styles are + supported: + + Solaris # File: filename, line: line-number + GNU #: filename:line + + The style name is case insensitive. GNU style is the default. + + -v + --verbose + Print the names of the files being processed. + + -V + --version + Print the version of pygettext and exit. + + -w columns + --width=columns + Set width of output to columns. + + -x filename + --exclude-file=filename + Specify a file that contains a list of strings that are not be + extracted from the input files. Each string to be excluded must + appear on a line by itself in the file. + + -X filename + --no-docstrings=filename + Specify a file that contains a list of files (one per line) that + should not have their docstrings extracted. This is only useful in + conjunction with the -D option above. + +If `inputfile' is -, standard input is read. +""") + +import os +import imp +import sys +import glob +import time +import getopt +import token +import tokenize +import operator + +from umit.pm.core.const import PM_VERSION + +__version__ = '1.5' + +default_keywords = ['_'] +DEFAULTKEYWORDS = ', '.join(default_keywords) + +EMPTYSTRING = '' + + + +# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's +# there. +pot_header = _('''\ +# PacketManipulator catalog. +# Copyright (C) 2009 Adriano Montero Marques +# Francesco Piccinno , 2009 +# +msgid "" +msgstr "" +"Project-Id-Version: PacketManipulator %(pm_version)s\\n" +"POT-Creation-Date: %(time)s\\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" +"Last-Translator: FULL NAME \\n" +"Language-Team: LANGUAGE \\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=UTF-8\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Generated-By: pygettext.py %(version)s\\n" + +''') + + +def usage(code, msg=''): + print >> sys.stderr, __doc__ % globals() + if msg: + print >> sys.stderr, msg + sys.exit(code) + + + +escapes = [] + +def make_escapes(pass_iso8859): + global escapes + if pass_iso8859: + # Allow iso-8859 characters to pass through so that e.g. 'msgid + # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we + # escape any character outside the 32..126 range. + mod = 128 + else: + mod = 256 + for i in range(256): + if 32 <= (i % mod) <= 126: + escapes.append(chr(i)) + else: + escapes.append("\\%03o" % i) + escapes[ord('\\')] = '\\\\' + escapes[ord('\t')] = '\\t' + escapes[ord('\r')] = '\\r' + escapes[ord('\n')] = '\\n' + escapes[ord('\"')] = '\\"' + + +def escape(s): + global escapes + s = list(s) + for i in range(len(s)): + s[i] = escapes[ord(s[i])] + return EMPTYSTRING.join(s) + + +def safe_eval(s): + # unwrap quotes, safely + return eval(s, {'__builtins__':{}}, {}) + + +def normalize(s): + # This converts the various Python string types into a format that is + # appropriate for .po files, namely much closer to C style. + lines = s.split('\n') + if len(lines) == 1: + s = '"' + escape(s) + '"' + else: + if not lines[-1]: + del lines[-1] + lines[-1] = lines[-1] + '\n' + for i in range(len(lines)): + lines[i] = escape(lines[i]) + lineterm = '\\n"\n"' + s = '""\n"' + lineterm.join(lines) + '"' + return s + + +def containsAny(str, set): + """Check whether 'str' contains ANY of the chars in 'set'""" + return 1 in [c in str for c in set] + + +def _visit_pyfiles(list, dirname, names): + """Helper for getFilesForName().""" + # get extension for python source files + if not globals().has_key('_py_ext'): + global _py_ext + _py_ext = [triple[0] for triple in imp.get_suffixes() + if triple[2] == imp.PY_SOURCE][0] + + # don't recurse into CVS directories + if 'CVS' in names: + names.remove('CVS') + + # add all *.py files to list + list.extend( + [os.path.join(dirname, file) for file in names + if os.path.splitext(file)[1] == _py_ext] + ) + + +def _get_modpkg_path(dotted_name, pathlist=None): + """Get the filesystem path for a module or a package. + + Return the file system path to a file for a module, and to a directory for + a package. Return None if the name is not found, or is a builtin or + extension module. + """ + # split off top-most name + parts = dotted_name.split('.', 1) + + if len(parts) > 1: + # we have a dotted path, import top-level package + try: + file, pathname, description = imp.find_module(parts[0], pathlist) + if file: file.close() + except ImportError: + return None + + # check if it's indeed a package + if description[2] == imp.PKG_DIRECTORY: + # recursively handle the remaining name parts + pathname = _get_modpkg_path(parts[1], [pathname]) + else: + pathname = None + else: + # plain name + try: + file, pathname, description = imp.find_module( + dotted_name, pathlist) + if file: + file.close() + if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: + pathname = None + except ImportError: + pathname = None + + return pathname + + +def getFilesForName(name): + """Get a list of module files for a filename, a module or package name, + or a directory. + """ + if not os.path.exists(name): + # check for glob chars + if containsAny(name, "*?[]"): + files = glob.glob(name) + list = [] + for file in files: + list.extend(getFilesForName(file)) + return list + + # try to find module or package + name = _get_modpkg_path(name) + if not name: + return [] + + if os.path.isdir(name): + # find all python files in directory + list = [] + os.path.walk(name, _visit_pyfiles, list) + return list + elif os.path.exists(name): + # a single file + return [name] + + return [] + + +class TokenEater: + def __init__(self, options): + self.__options = options + self.__messages = {} + self.__state = self.__waiting + self.__data = [] + self.__lineno = -1 + self.__freshmodule = 1 + self.__curfile = None + + def __call__(self, ttype, tstring, stup, etup, line): + # dispatch +## import token +## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ +## 'tstring:', tstring + self.__state(ttype, tstring, stup[0]) + + def __waiting(self, ttype, tstring, lineno): + opts = self.__options + # Do docstring extractions, if enabled + if opts.docstrings and not opts.nodocstrings.get(self.__curfile): + # module docstring? + if self.__freshmodule: + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__freshmodule = 0 + elif ttype not in (tokenize.COMMENT, tokenize.NL): + self.__freshmodule = 0 + return + # class docstring? + if ttype == tokenize.NAME and tstring in ('class', 'def'): + self.__state = self.__suiteseen + return + if ttype == tokenize.NAME and tstring in opts.keywords: + self.__state = self.__keywordseen + + def __suiteseen(self, ttype, tstring, lineno): + # ignore anything until we see the colon + if ttype == tokenize.OP and tstring == ':': + self.__state = self.__suitedocstring + + def __suitedocstring(self, ttype, tstring, lineno): + # ignore any intervening noise + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__state = self.__waiting + elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, + tokenize.COMMENT): + # there was no class docstring + self.__state = self.__waiting + + def __keywordseen(self, ttype, tstring, lineno): + if ttype == tokenize.OP and tstring == '(': + self.__data = [] + self.__lineno = lineno + self.__state = self.__openseen + else: + self.__state = self.__waiting + + def __openseen(self, ttype, tstring, lineno): + if ttype == tokenize.OP and tstring == ')': + # We've seen the last of the translatable strings. Record the + # line number of the first line of the strings and update the list + # of messages seen. Reset state for the next batch. If there + # were no strings inside _(), then just ignore this entry. + if self.__data: + self.__addentry(EMPTYSTRING.join(self.__data)) + self.__state = self.__waiting + elif ttype == tokenize.STRING: + self.__data.append(safe_eval(tstring)) + elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, + token.NEWLINE, tokenize.NL]: + # warn if we see anything else than STRING or whitespace + print >> sys.stderr, _( + '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' + ) % { + 'token': tstring, + 'file': self.__curfile, + 'lineno': self.__lineno + } + self.__state = self.__waiting + + def __addentry(self, msg, lineno=None, isdocstring=0): + if lineno is None: + lineno = self.__lineno + if not msg in self.__options.toexclude: + entry = (self.__curfile, lineno) + self.__messages.setdefault(msg, {})[entry] = isdocstring + + def set_filename(self, filename): + self.__curfile = filename + self.__freshmodule = 1 + + def write(self, fp): + options = self.__options + timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') + # The time stamp in the header doesn't have the same format as that + # generated by xgettext... + print >> fp, pot_header % {'time': timestamp, 'version': __version__, + 'pm_version': PM_VERSION} + # Sort the entries. First sort each particular entry's keys, then + # sort all the entries by their first item. + reverse = {} + for k, v in self.__messages.items(): + keys = v.keys() + keys.sort() + reverse.setdefault(tuple(keys), []).append((k, v)) + rkeys = reverse.keys() + rkeys.sort() + for rkey in rkeys: + rentries = reverse[rkey] + rentries.sort() + for k, v in rentries: + isdocstring = 0 + # If the entry was gleaned out of a docstring, then add a + # comment stating so. This is to aid translators who may wish + # to skip translating some unimportant docstrings. + if reduce(operator.__add__, v.values()): + isdocstring = 1 + # k is the message string, v is a dictionary-set of (filename, + # lineno) tuples. We want to sort the entries in v first by + # file name and then by line number. + v = v.keys() + v.sort() + if not options.writelocations: + pass + # location comments are different b/w Solaris and GNU: + elif options.locationstyle == options.SOLARIS: + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} + print >>fp, _( + '# File: %(filename)s, line: %(lineno)d') % d + elif options.locationstyle == options.GNU: + # fit as many locations on one line, as long as the + # resulting line length doesn't exceeds 'options.width' + locline = '#:' + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} + s = _(' %(filename)s:%(lineno)d') % d + if len(locline) + len(s) <= options.width: + locline = locline + s + else: + print >> fp, locline + locline = "#:" + s + if len(locline) > 2: + print >> fp, locline + if isdocstring: + print >> fp, '#, docstring' + print >> fp, 'msgid', normalize(k) + print >> fp, 'msgstr ""\n' + + + +def main(): + global default_keywords + try: + opts, args = getopt.getopt( + sys.argv[1:], + 'ad:DEhk:Kno:p:S:Vvw:x:X:', + ['extract-all', 'default-domain=', 'escape', 'help', + 'keyword=', 'no-default-keywords', + 'add-location', 'no-location', 'output=', 'output-dir=', + 'style=', 'verbose', 'version', 'width=', 'exclude-file=', + 'docstrings', 'no-docstrings', + ]) + except getopt.error, msg: + usage(1, msg) + + # for holding option values + class Options: + # constants + GNU = 1 + SOLARIS = 2 + # defaults + extractall = 0 # FIXME: currently this option has no effect at all. + escape = 0 + keywords = [] + outpath = '' + outfile = 'messages.pot' + writelocations = 1 + locationstyle = GNU + verbose = 0 + width = 78 + excludefilename = '' + docstrings = 0 + nodocstrings = {} + + options = Options() + locations = {'gnu' : options.GNU, + 'solaris' : options.SOLARIS, + } + + # parse options + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-a', '--extract-all'): + options.extractall = 1 + elif opt in ('-d', '--default-domain'): + options.outfile = arg + '.pot' + elif opt in ('-E', '--escape'): + options.escape = 1 + elif opt in ('-D', '--docstrings'): + options.docstrings = 1 + elif opt in ('-k', '--keyword'): + options.keywords.append(arg) + elif opt in ('-K', '--no-default-keywords'): + default_keywords = [] + elif opt in ('-n', '--add-location'): + options.writelocations = 1 + elif opt in ('--no-location',): + options.writelocations = 0 + elif opt in ('-S', '--style'): + options.locationstyle = locations.get(arg.lower()) + if options.locationstyle is None: + usage(1, _('Invalid value for --style: %s') % arg) + elif opt in ('-o', '--output'): + options.outfile = arg + elif opt in ('-p', '--output-dir'): + options.outpath = arg + elif opt in ('-v', '--verbose'): + options.verbose = 1 + elif opt in ('-V', '--version'): + print _('pygettext.py (xgettext for Python) %s') % __version__ + sys.exit(0) + elif opt in ('-w', '--width'): + try: + options.width = int(arg) + except ValueError: + usage(1, _('--width argument must be an integer: %s') % arg) + elif opt in ('-x', '--exclude-file'): + options.excludefilename = arg + elif opt in ('-X', '--no-docstrings'): + fp = open(arg) + try: + while 1: + line = fp.readline() + if not line: + break + options.nodocstrings[line[:-1]] = 1 + finally: + fp.close() + + # calculate escapes + make_escapes(options.escape) + + # calculate all keywords + options.keywords.extend(default_keywords) + + # initialize list of strings to exclude + if options.excludefilename: + try: + fp = open(options.excludefilename) + options.toexclude = fp.readlines() + fp.close() + except IOError: + print >> sys.stderr, _( + "Can't read --exclude-file: %s") % options.excludefilename + sys.exit(1) + else: + options.toexclude = [] + + # resolve args to module lists + expanded = [] + for arg in args: + if arg == '-': + expanded.append(arg) + else: + expanded.extend(getFilesForName(arg)) + args = expanded + + # slurp through all the files + eater = TokenEater(options) + for filename in args: + if filename == '-': + if options.verbose: + print _('Reading standard input') + fp = sys.stdin + closep = 0 + else: + if options.verbose: + print _('Working on %s') % filename + fp = open(filename) + closep = 1 + try: + eater.set_filename(filename) + try: + tokenize.tokenize(fp.readline, eater) + except tokenize.TokenError, e: + print >> sys.stderr, '%s: %s, line %d, column %d' % ( + e[0], filename, e[1][0], e[1][1]) + finally: + if closep: + fp.close() + + # write the output + if options.outfile == '-': + fp = sys.stdout + closep = 0 + else: + if options.outpath: + options.outfile = os.path.join(options.outpath, options.outfile) + fp = open(options.outfile, 'w') + closep = 1 + try: + eater.write(fp) + finally: + if closep: + fp.close() + + +if __name__ == '__main__': + main() + # some more test strings + _(u'a unicode string') + # this one creates a warning + _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} + _('more' 'than' 'one' 'string')