Placed official version, and removed other files since work only with parsing python (nw)

2025-10-04 16:34:53 +03:00 · 2016-02-21 14:45:38 +01:00 · 2016-02-21 14:45:38 +01:00 · be7cc43b08
commit be7cc43b08
parent 78ada55150
4 changed files with 45 additions and 1875 deletions
--- a/scripts/build/check_po.py
+++ b/scripts/build/check_po.py
@ -1,490 +0,0 @@
-#! /usr/bin/env python
-#
-# check_po - a gramps tool to check validity of po files
-#
-# Copyright (C) 2006-2006  Kees Bakker
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-#
-# TODO
-#
-# * Check for HTML text in msgstr when there is none in msgid
-# * Check for matching HTML tag/endtag in msgstr
-#
-
-# Adapted for Umit by Guilherme Polo, original file:
-# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po
-
-import re
-import sys
-from optparse import OptionParser
-
-APP = "Umit"
-
-all_total = {}
-all_fuzzy = {}
-all_untranslated = {}
-all_percent_s = {}
-all_named_s = {}
-all_bnamed_s = {}
-all_context = {}
-all_coverage = {}
-all_template_coverage = {}
-
-def strip_quotes(st):
-    if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
-        st = st.strip()[1:-1]
-    return st
-
-# This is a base class for all checks
-class Check:
-    def __init__( self ):
-        self.msgs = []
-    def diag( self ):
-        if len( self.msgs ):
-            print
-            print self.diag_header
-            for m in self.msgs:
-                m.diag()
-    def summary( self ):
-        print "%-20s%d" % ( self.summary_text, len(self.msgs) )
-
-class Check_fmt( Check ):
-    def __init__( self, fmt ):
-        Check.__init__( self )
-        self.diag_header = "-------- %s mismatches --------------" % fmt
-        self.summary_text = "%s mismatches:" % fmt
-        self.fmt = fmt
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-        cnt1 = msgid.count( self.fmt )
-        cnt2 = msgstr.count( self.fmt )
-        if cnt1 != cnt2:
-            self.msgs.append( msg )
-
-class Check_named_fmt( Check ):
-    # A pattern to find all %()
-    find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)
-
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- %() name mismatches --------------"
-        self.summary_text = "%() name mismatches:"
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-        # Same number of named formats?
-        fmts1 = self.find_named_fmt_pat.findall( msgid )
-        fmts2 = self.find_named_fmt_pat.findall( msgstr )
-        if len( fmts1 ) != len( fmts2 ):
-            self.msgs.append( msg )
-        else:
-            # Do we have the same named formats?
-            fmts1.sort()
-            fmts2.sort()
-            if fmts1 != fmts2:
-                self.msgs.append( msg )
-
-class Check_missing_sd( Check ):
-    # A pattern to find %() without s or d
-    # Here is a command to use for testing
-    # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
-    find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)
-
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
-        self.summary_text = "%() missing s/d:"
-    def process( self, msg ):
-        msgstr = msg.msgstr
-        fmts = self.find_named_fmt_pat2.findall( msgstr )
-        for f in fmts:
-            if not f in ('s', 'd'):
-                self.msgs.append( msg )
-                break
-
-class Check_runaway( Check ):
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- Runaway context in translation ---------"
-        self.summary_text = "Runaway context:"
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-
-        # Runaway context. In the translated part we only to see
-        # the translation of the word after the |
-        if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr:
-            self.msgs.append( msg )
-
-class Check_xml_chars( Check ):
-    # Special XML characters
-    # It is not allowed to have a quote, an ampersand or an angle bracket
-    xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )
-
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- unescaped XML special characters ---------"
-        self.summary_text = "XML special chars:"
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-
-        # XML errors
-        # Only look at messages in the tips.xml
-        if msg.is_tips_xml:
-            if self.xml_chars_pat.search( msgstr ):
-                self.msgs.append( msg )
-
-class Check_last_char( Check ):
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- last character not identical ---------"
-        self.summary_text = "Last character:"
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-
-        # Last character of msgid? White space? Period?
-        if msg.is_fuzzy:
-            return
-
-        msgid_last = msgid[-1:]
-        msgstr_last = msgstr[-1:]
-        if msgid_last.isspace() != msgstr_last.isspace():
-            self.msgs.append( msg )
-        elif (msgid_last == '.') != (msgstr_last == '.'):
-            self.msgs.append( msg )
-
-class Check_shortcut_trans( Check ):
-    def __init__( self ):
-        Check.__init__( self )
-        self.diag_header = "-------- shortcut key in translation ---------"
-        self.summary_text = "Shortcut in msgstr:"
-    def process( self, msg ):
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-
-        if msgid.count('_') == 0 and msgstr.count('_') > 0:
-            self.msgs.append( msg )
-
-class Msgid:
-    fuzzy_pat = re.compile( 'fuzzy' )
-    tips_xml_pat = re.compile( r'tips\.xml' )
-    def __init__( self, msgnr, lineno ):
-        self._msgid = []
-        self._msgstr = []
-        self.msgid = ''
-        self.msgstr = ''
-        self._cmnt = []
-        self.nr = msgnr
-        self.lineno = lineno
-        self.is_fuzzy = 0
-        self.is_tips_xml = 0
-
-    def diag( self ):
-        if 1:
-            print
-            print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" )
-            sys.stdout.write( ''.join( self._msgid ) )
-            sys.stdout.write( ''.join( self._msgstr ) )
-        else:
-            # Compatible with the old check_po
-            print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr )
-
-    def add_msgid( self, line, lineno ):
-        self._msgid.append( line )
-        line = re.sub( r'msgid\s+', '', line )
-        line = line.strip()
-        if line[0] != '"' or line[-1:] != '"':
-            print "ERROR at line %d: Missing quote." % lineno
-        line = strip_quotes( line )
-        self.msgid += line
-
-    def add_msgstr( self, line, lineno ):
-        self._msgstr.append( line )
-        line = re.sub( r'msgstr\s+', '', line )
-        line = line.strip()
-        if line[0] != '"' or line[-1:] != '"':
-            print "ERROR at line %d: Missing quote." % lineno
-        line = strip_quotes( line )
-        self.msgstr += line
-
-    def add_cmnt( self, line ):
-        self._cmnt.append( line )
-        if not self.is_fuzzy and self.fuzzy_pat.search( line ):
-            self.is_fuzzy = 1
-        if not self.is_tips_xml and self.tips_xml_pat.search( line ):
-            self.is_tips_xml = 1
-
-def read_msgs( fname ):
-    empty_pat   = re.compile( r'^ \s* $',      re.VERBOSE )
-    comment_pat = re.compile( r'\#',           re.VERBOSE )
-    msgid_pat   = re.compile( r'msgid \s+ "',  re.VERBOSE )
-    msgstr_pat  = re.compile( r'msgstr \s+ "', re.VERBOSE )
-    str_pat     = re.compile( r'"',            re.VERBOSE )
-    old_pat     = re.compile( r'\#~ \s+ ',     re.VERBOSE )
-
-    msgnr = 0			# This is the message number of the next message to read. The first real message is 1.
-    f = open( fname )
-    lines = f.readlines()
-
-    # parse it like a statemachine
-    NONE   = 0			# Nothing detected, yet
-    CMNT   = 1			# Inside comment part
-    MSGID  = 2			# Inside msgid part
-    MSGSTR = 3			# Inside msgstr part
-    STR    = 4			# A continuation string
-    OLD    = 5			# An old pattern with #~
-
-    state = NONE
-    msg = None
-    msgs = []
-
-    for ix in range( len(lines) ):	# Use line numbers for messages
-        line = lines[ix]
-        lineno = ix + 1
-
-        m = empty_pat.match( line )
-        if m:
-            continue	# Empty lines are not interesting
-
-        # What's the next state?
-        if  old_pat.match( line ):
-            next_state = OLD
-        elif comment_pat.match( line ):
-            next_state = CMNT
-        elif msgid_pat.match( line ):
-            next_state = MSGID
-        elif msgstr_pat.match( line ):
-            next_state = MSGSTR
-        elif str_pat.match( line ):
-            next_state = STR
-        else:
-            print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars()
-            next_state = NONE
-
-        #print "%(state)d->%(next_state)d\t%(line)s" % vars()
-        if state == NONE:
-            # expect msgid or comment or old stuff
-            if next_state == CMNT:
-                state = CMNT
-                msg = Msgid( msgnr, lineno ) # Start with an empty new item
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_cmnt( line )
-
-            elif next_state == MSGID:
-                state = MSGID
-                msg = Msgid( msgnr, lineno ) # Start with an empty new item
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_msgid( line, lineno )
-
-            elif next_state == MSGSTR:
-                print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
-                state = MSGSTR
-                msg = Msgid( msgnr, lineno ) # Start with an empty new item
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_msgstr( line, lineno )
-
-            elif next_state == STR:
-                print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
-
-            elif next_state == OLD:
-                pass	# Just skip
-
-        elif state == CMNT:
-            if next_state == CMNT:
-                if msg:
-                    msg.add_cmnt( line )
-                else:
-                    # Note. We may need to do something about these comments
-                    # Skip for now
-                    pass
-
-            elif next_state == MSGID:
-                state = MSGID
-                if not msg:
-                    msg = Msgid( msgnr, lineno ) # Start with an empty new item
-                    msgnr += 1
-                    msgs.append( msg )
-                msg.add_msgid( line, lineno )
-
-            elif next_state == MSGSTR:
-                print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
-                state = MSGSTR
-                msg = Msgid( msgnr, lineno ) # Start with an empty new item
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_msgstr( line, lineno )
-
-            elif next_state == STR:
-                print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
-
-            elif next_state == OLD:
-                msg = None
-                pass	# Just skip
-
-        elif state == MSGID:
-            if next_state == CMNT:
-                # Hmmm. A comment here?
-                print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
-
-            elif next_state == MSGID:
-                raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
-
-            elif next_state == MSGSTR:
-                state = MSGSTR
-                msg.add_msgstr( line, lineno )
-
-            elif next_state == STR:
-                msg.add_msgid( line, lineno )
-
-            elif next_state == OLD:
-                msg = None
-                pass	# Just skip
-
-        elif state == MSGSTR:
-            if next_state == CMNT:
-                # A comment probably starts a new item
-                state = CMNT
-                msg = Msgid( msgnr, lineno )
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_cmnt( line )
-
-            elif next_state == MSGID:
-                state = MSGID
-                msg = Msgid( msgnr, lineno )
-                msgnr += 1
-                msgs.append( msg )
-                msg.add_msgid( line, lineno )
-
-            elif next_state == MSGSTR:
-                raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
-
-            elif next_state == STR:
-                msg.add_msgstr( line, lineno )
-
-            elif next_state == OLD:
-                msg = None
-                pass	# Just skip
-
-        else:
-            raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
-
-    # Strip items with just comments. (Can this happen?)
-    msgs1 = []
-    for m in msgs:
-        if not m.msgid and not m.msgstr:
-            #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
-            pass
-        else:
-            msgs1.append( m )
-    msgs = msgs1
-    return msgs
-
-def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ):
-    nr_fuzzy = 0
-    nr_untranslated = 0
-
-    checks = []
-    checks.append( Check_fmt( '%s' ) )
-    checks.append( Check_fmt( '%d' ) )
-    checks.append( Check_named_fmt() )
-    checks.append( Check_missing_sd() )
-    checks.append( Check_runaway() )
-    checks.append( Check_xml_chars() )
-    checks.append( Check_last_char() )
-    checks.append( Check_shortcut_trans() )
-
-    for msg in msgs:
-        msgid = msg.msgid
-        msgstr = msg.msgstr
-        #print
-        #print "msgid: %(msgid)s" % vars()
-        #print "msgstr: %(msgstr)s" % vars()
-
-        if not msgstr:
-            nr_untranslated += 1
-            continue
-
-        if msg.is_fuzzy:
-            nr_fuzzy += 1
-            if options.skip_fuzzy:
-                continue
-
-        for c in checks:
-            c.process( msg )
-
-    nr_msgs = len(msgs)
-    if nth > 0:
-        print
-        print "====================================="
-    print "%-20s%s"     % ( "File:",              fname )
-    print "%-20s%d"     % ( "Template total:",    nr_templates )
-    print "%-20s%d"     % ( "PO total:",          nr_msgs )
-    print "%-20s%d"     % ( "Fuzzy:",             nr_fuzzy )
-    print "%-20s%d"     % ( "Untranslated:",      nr_untranslated )
-
-    for c in checks:
-        c.summary()
-
-    po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
-    print "%-20s%5.2f%%" % ( "PO Coverage:",       po_coverage )
-
-    template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
-    print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
-
-    if not options.only_summary:
-        for c in checks:
-            c.diag()
-
-def main(args):
-    if len(sys.argv) < 2:
-        print "Error: Especify the umit.pot file path"
-        sys.exit(1)
-
-    parser = OptionParser(description="This program validates a PO file for "
-                          "%s." % APP, usage='%prog [options] po-file...' )
-
-    parser.add_option("", "--skip-fuzzy",
-                      action="store_true", dest="skip_fuzzy", default=False,
-                      help="skip fuzzies")
-
-    parser.add_option("-s", "--only-summary",
-                      action="store_true", dest="only_summary", default=False,
-                      help="only give the summary")
-
-    options, args = parser.parse_args()
-
-    try:
-        pot_msgs = read_msgs(sys.argv[1])
-        nr_templates = len(pot_msgs)
-        nth = 0
-        for fname in args:
-            msgs = read_msgs(fname)
-            analyze_msgs(options, fname, msgs, nr_templates, nth)
-            nth += 1
-
-    except Exception, e:
-        print e
-
-if __name__ == "__main__":
-    main(sys.argv)
--- a/scripts/build/msgfmt.py
+++ b/scripts/build/msgfmt.py
@ -1,14 +1,6 @@
-#! /usr/bin/env python
+#!/usr/bin/env python2
 # -*- coding: iso-8859-1 -*-
 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
-#
-# Changelog: (Guilherme Polo)
-#   2008-04-11
-#    - Support for files with BOM UTF8 mark.
-#
-#   2008-04-10
-#    - Support for fuzzy strings in output.
-#    - Bumped to version 1.1.1

 """Generate binary message catalog from textual translation description.

@ -24,10 +16,6 @@ Options:
        Specify the output file to write to.  If omitted, output will go to a
        file named filename.mo (based off the input file name).

-    -f
-    --use-fuzzy
-        Use fuzzy entries in output
-
    -h
    --help
        Print this message and exit.
@ -35,23 +23,21 @@ Options:
    -V
    --version
        Display version information and exit.
-
-Before using the -f (fuzzy) option, read this:
-    http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en
 """

-import sys
 import os
+import sys
+import ast
 import getopt
 import struct
 import array
-import codecs

-__version__ = "1.1.1"
+__version__ = "1.1"

 MESSAGES = {}


+
 def usage(code, msg=''):
    print >> sys.stderr, __doc__
    if msg:
@ -59,13 +45,15 @@ def usage(code, msg=''):
    sys.exit(code)


-def add(id, str, fuzzy, use_fuzzy):
-    "Add a translation to the dictionary."
+
+def add(id, str, fuzzy):
+    "Add a non-fuzzy translation to the dictionary."
    global MESSAGES
-    if (not fuzzy or use_fuzzy) and str:
+    if not fuzzy and str:
        MESSAGES[id] = str


+
 def generate():
    "Return the generated output."
    global MESSAGES
@ -108,7 +96,8 @@ def generate():
    return output


-def make(filename, outfile, use_fuzzy):
+
+def make(filename, outfile):
    ID = 1
    STR = 2

@ -122,8 +111,6 @@ def make(filename, outfile, use_fuzzy):

    try:
        lines = open(infile).readlines()
-        if lines[0].startswith(codecs.BOM_UTF8):
-            lines[0] = lines[0][len(codecs.BOM_UTF8):]
    except IOError, msg:
        print >> sys.stderr, msg
        sys.exit(1)
@ -137,7 +124,7 @@ def make(filename, outfile, use_fuzzy):
        lno += 1
        # If we get a comment line after a msgstr, this is a new entry
        if l[0] == '#' and section == STR:
-            add(msgid, msgstr, fuzzy, use_fuzzy)
+            add(msgid, msgstr, fuzzy)
            section = None
            fuzzy = 0
        # Record a fuzzy mark
@ -147,22 +134,44 @@ def make(filename, outfile, use_fuzzy):
        if l[0] == '#':
            continue
        # Now we are in a msgid section, output previous section
-        if l.startswith('msgid'):
+        if l.startswith('msgid') and not l.startswith('msgid_plural'):
            if section == STR:
-                add(msgid, msgstr, fuzzy, use_fuzzy)
+                add(msgid, msgstr, fuzzy)
            section = ID
            l = l[5:]
            msgid = msgstr = ''
+            is_plural = False
+        # This is a message with plural forms
+        elif l.startswith('msgid_plural'):
+            if section != ID:
+                print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
+                    (infile, lno)
+                sys.exit(1)
+            l = l[12:]
+            msgid += '\0' # separator of singular and plural
+            is_plural = True
        # Now we are in a msgstr section
        elif l.startswith('msgstr'):
            section = STR
-            l = l[6:]
+            if l.startswith('msgstr['):
+                if not is_plural:
+                    print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
+                        (infile, lno)
+                    sys.exit(1)
+                l = l.split(']', 1)[1]
+                if msgstr:
+                    msgstr += '\0' # Separator of the various plural forms
+            else:
+                if is_plural:
+                    print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
+                        (infile, lno)
+                    sys.exit(1)
+                l = l[6:]
        # Skip empty lines
        l = l.strip()
        if not l:
            continue
-        # XXX: Does this always follow Python escape semantics?
-        l = eval(l)
+        l = ast.literal_eval(l)
        if section == ID:
            msgid += l
        elif section == STR:
@ -174,7 +183,7 @@ def make(filename, outfile, use_fuzzy):
            sys.exit(1)
    # Add last entry
    if section == STR:
-        add(msgid, msgstr, fuzzy, use_fuzzy)
+        add(msgid, msgstr, fuzzy)

    # Compute output
    output = generate()
@ -185,15 +194,15 @@ def make(filename, outfile, use_fuzzy):
        print >> sys.stderr, msg


+
 def main():
    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'hVo:f',
-            ['help', 'version', 'output-file=', 'use-fuzzy'])
+        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
+                                   ['help', 'version', 'output-file='])
    except getopt.error, msg:
        usage(1, msg)

    outfile = None
-    use_fuzzy = False
    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
@ -201,8 +210,6 @@ def main():
        elif opt in ('-V', '--version'):
            print >> sys.stderr, "msgfmt.py", __version__
            sys.exit(0)
-        elif opt in ('-f', '--use-fuzzy'):
-            use_fuzzy = True
        elif opt in ('-o', '--output-file'):
            outfile = arg
    # do it
@ -212,7 +219,7 @@ def main():
        return

    for filename in args:
-        make(filename, outfile, use_fuzzy)
+        make(filename, outfile)


 if __name__ == '__main__':
--- a/scripts/build/msgmerge.py
+++ b/scripts/build/msgmerge.py
@ -1,675 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-#
-# Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003.
-# 
-'''Merge two Uniforum style .po files together.
-
-This is a implementation (not complete) in Python of the GNU
-msgmerge(1) program. It can be used on the command line (or as a Python
-module).
-
-Usage: msgmerge.py [OPTIONS] def.po ref.pot
-
-The def.po file is an existing PO file with translations. The ref.pot
-file is the last created PO file with up-to-date source references but
-old translations, or a PO Template file.
-
-Options:
-  -U, --update           update def.po,
-                         do nothing if def.po is already up to date.
-  -o, --output-file=FILE write output to file FILE. Output is written to
-                         stdout if set to - or if the option is not present.
-  -D, --docstrings       don\'t remove docstring flag.
-  -h, --help             display help text and exit.
-  -V, --version          display version and exit.
-  -q, --quiet, --silent  suppress progress indicators.
-'''
-from __future__ import generators
-
-if not __name__ == '__main__':
-    __doc__ += '''\
-
-When used as module the interesting functions are merge() and
-merge_dir().
-
-The merge() function does the same as the command line version, and
-the arguments are as follows. The first argument is the def.po file,
-then the ref.pot file. The third argument controls whether do work in
-update mode or not, then the next argument sets the output file. Set
-the next argument to False to remove docstring flags. The last
-argument can be used to suppress progress indicators. The default is
-to work in update mode with progress indicators.
-
-Example:
- merge("def.po", "ref.pot")
-  merge the files def.po and ref.pot and write output to def.po if
-  there are any changes.
- merge("def.po", "red.pot", docstrings = False, verbose = False,
-       update = False, outfile = "-")
-  merge the files def.po and ref.pot and write output to stdout,
-  remove docstring flag and be quiet.
-
-The merge_dir() function is useful when merging a directory of po
-files. The only required argument is the name of the directory with po
-files and the pot file. It will use simple glob to find the files. The
-second argument can be used to specify the pot file (in the
-directory). Third argument is a list of po files (then globbing will
-not be used) and the next argument is list of filename to exclude. The
-last argument can be used to suppress progress indicators. Docstring
-flag will not be removed.
-
-Example:
- merge_dir("po")
-  merge (and update) all po files in directory po with the single pot
-  file in the same directory.
-
-The module raises the MsgmergeError exception in case of error.
-'''
-__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $'
-__version__ = '0.1'
-name = 'msgmerge.py'
-
-__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ]
-
-import sys
-import re
-import string
-import getopt
-import difflib
-import glob
-import os.path
-import codecs
-
-try:
-    True, False
-except NameError:
-    True, False = 1, 0
-
-class Msgs:
-    '''Class to hold information about messages.'''
-    width = 80
-    file = ''
-    def __init__(self, msgid, msgstr, flag, lno, entry, **kwds):
-        self.id = msgid
-        self.str = msgstr
-        self.cmt = kwds.get('cmt', '')
-        self.ref = kwds.get('ref', '')
-        self.autocmt = kwds.get('autocmt', '')
-        self.flag = flag
-        self.entry = entry
-        self.lno = lno
-        self.count = 0
-    def wash(self):
-        self.id = wash(self.id, width = self.width,
-                       filename = self.file, lno = self.lno)
-        self.str = wash(self.str, 'msgstr', width = self.width,
-                        filename = self.file, lno = self.lno)
-    def used(self):
-        self.count += 1
-    def get_clean_id(self):
-        return self.id.replace('msgid "','', 1)
-    def obsolete(self):
-        self.width -= len('#~ ')
-        self.wash()
-        t = [ '#~ %s\n' % s for s in self.id.splitlines() ]
-        self.id = ''.join(t)
-        t = [ '#~ %s\n' % s for s in self.str.splitlines() ]
-        self.str = ''.join(t)
-
-class Options:
-    '''Class to hold options'''
-    def __init__(self, cmdline = False, **kwds):
-        if not cmdline:
-            self.update = kwds.get('update', True)
-            self.outfile = kwds.get('outfile', '-')
-            self.docstrings = kwds.get('docstrings', True)
-            self.verbose = kwds.get('verbose', False)
-            self.suffix = kwds.get('suffix', '~')
-            self.backup = kwds.get('backup', True)
-        else:
-            self.update = False
-            self.outfile = False
-            self.docstrings = False
-            self.verbose = True
-            self.suffix = '~'
-            self.backup = True
-
-class MsgmergeError(Exception):
-    '''Exception class for msgmerge'''
-
-def gen(lines):
-    '''
-    Generator which returns a line (with the obsolete prefix removed)
-    from the list of lines in <lines>, the line number is also
-    returned.
-    '''
-    lno = 0
-    for l in lines:
-        lno += 1
-        yield l.replace('#~ ', '', 1), lno
-    yield l, lno
-
-def slurp(s, g, sign):
-    '''
-    The string returned from iterator <g>\'s next() method is added to
-    the string <s> if string returned is beginning with the string
-    <sign>. The return value is the first returned string which do not
-    start with <sign>, the line number, the iterator <g> and the
-    (possibly) updated string <s>.
-    '''
-    l, lno = g.next()
-    while l.startswith(sign) or (sign == '# ' and l.strip() == '#'):
-        s += l
-        l, lno = g.next()
-    return l, lno, g, s
-
-def splitted_fit(chunk, line, width, break_always, break_after_space):
-    '''
-    Check if string <chunk> can be splitted by newline to fit into
-    string <line> with width smaller than <width>. The return value is
-    a tuple where the first element is the part of chunk which fits
-    and the second element is the rest of chunk.
-    '''
-    ret = '', chunk
-    l = len(chunk)
-    for i in range(l - 1, -1, -1):
-        if chunk[i] in break_always and len(chunk[0:i] + line) <= width:
-            ret = chunk[0:i], chunk[i:]
-            break
-        elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '':
-            ret = chunk[0:i], chunk[i:]
-            break
-        elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \
-             and len(chunk[0:i] + line) <= width:
-            ret = chunk[0:i], chunk[i:]
-            break
-    return ret
-
-def wrap(msg, width):
-    '''
-    Accept a list <msg> of strings to wrap, each string is wrapped to
-    width <width> and surrounded with a pair of ". The return value is
-    a string with these wrapped strings joined together with newlines.
-    '''
-    if msg.isspace() or not msg:
-        return '"%s"' % msg
-
-    # \ and " is here, but " is special in po files.
-    break_always = '$%+({['
-    # XXX what about: « © » ¦ § etc?
-    break_after_space = '_-=^`~\'<|>&*#@'
-    enders = '.:,;!?/])}|%-'
-    extra = string.punctuation
-    for c in enders:
-        extra = extra.replace(c, '')
-    escaped = { 'enders' : re.escape(enders),
-                'extra'  : re.escape(extra) }
-    regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped
-    r = re.compile(regex, re.UNICODE)
-    msg = [ m for m in r.split(msg) if not m == '']
-
-    lines = []
-    line = msg.pop(0)
-    
-    # Handle \n on end of line
-    if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \
-           and msg[-2][-1] == '\\':
-        msg[-2] += msg[-1]
-        msg.pop()
-    # Do not allow a single \n on a line
-    if len(msg) > 2 and msg[-1] == '\\n':
-        msg[-2] += msg[-1]
-        msg.pop()
-
-    for m in msg:
-        if len(line) > width or len(m) > width or len(line + m) > width:
-            fit, rest = splitted_fit(m, line, width, break_always,
-                                     break_after_space)
-            line += fit
-            lines.append(line)
-            line = rest
-        else:
-            line += m
-    lines.append(line)
-    lines = [ '"%s"' % l for l in lines ]
-    return '\n'.join(lines)
-
-def normalize(lines):
-    '''
-    Normalize <lines>: e.g "\n\nText\n\n" becomes:
-    "\n"
-    "\n"
-    "Text\n"
-    "\n"
-    '''
-    if  0 < lines.find('\\n') < len(lines) - 3:
-        if lines[-3:] == '\\n"':    
-            lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \
-                    + '\\n"'
-        else:
-            lines = lines.replace('\\n','\\n"\n"').replace('""\n','')
-    return lines
-
-def wash(msg, idx = 'msgid', width = 80, **kwds):
-    '''
-    Do washing on the msgstr or msgid fields. Wrap the text to fit in
-    width <width>. <msg> is a list of lines that makes up the field.
-    <idx> indicate msgid or msgstr, <width> holds the width. <filename>
-    and <lno> (line number) is picked up from <kwds>.
-    Returns the washed field as a string.
-    '''
-    msg = normalize(msg)
-    lines = msg.splitlines()
-    size = len(lines)
-    if size > 1 or len(msg) > width:
-        washed = []
-        # The first line is special
-        m = re.match('^%s "(.*)"$' % (idx, ), lines[0])
-        if not m:
-            print lines[0]
-            kwds['lno'] -= size + 1            
-            raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
-                                % kwds)
-        washed.append(m.group(1))
-        if m.group(1).endswith(r'\n'):
-            washed.append('')
-        i = 0
-        for line in lines[1:]:
-            m = re.match('^"(\s*.*)"$', line)
-            i += 1
-            if not m:
-                print line
-                kwds['lno'] -= size - i + 1
-                raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
-                                    % kwds)
-            washed[-1] += m.group(1)
-            if m.group(1).endswith(r'\n'):
-                washed.append('')
-        if washed[0] == '':
-            washed.pop(0)
-        if washed[-1] == '':
-            washed.pop()
-    
-        washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed.
-
-        # One line or multiline
-        if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width:
-            washed = '%s %s\n' % (idx, washed[0])
-        else:
-            washed = '%s ""\n%s\n' % (idx, '\n'.join(washed))
-    else:
-        washed = msg
-
-    return washed
-
-def parse(filename, entry):
-    '''
-    Parse po or pot file with name <filename>. Set the variable
-    <entry> to msgid/msgstr to indicate pot/po file.  The return value
-    is a dict with msgid (washed) as key and Msgs instances as
-    values.
-    '''
-    lines = io(filename).readlines()
-    Msgs.file = filename
-    messages = {}
-    last = len(lines)
-    g = gen(lines)            
-    cmt = autocmt = ref = flag = ''
-    msgid = False
-    lno = 0
-    while not lno == last:
-        l, lno = g.next()
-        if l.startswith('# '):
-            l, lno, g, cmt  = slurp(l, g, '# ')
-        if l.startswith('#.'):
-            l, lno, g, autocmt = slurp(l, g, '#.')
-        if l.startswith('#:'):
-            l, lno, g, ref = slurp(l, g, '#:')
-        if l.startswith('#,'):
-            l, lno, g, flag = slurp(l, g, '#,')
-        if l.startswith('msgid'):
-            l, lno, g, msgid = slurp(l, g, '"')
-        if l.startswith('msgstr'):
-            l, lno, g, msgstr = slurp(l, g, '"')
-
-        if not lno == last and not l.strip() == '':
-            raise MsgmergeError('parse error: %s:%s.' % (filename, lno))
-
-        if msgid and entry == 'msgstr':
-            idx = wash(msgid, filename = filename, lno = lno)
-            messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt)
-            msgid = False; msgstr = cmt = autocmt = ref = flag = ''
-        elif msgid and entry == 'msgid':
-            idx = wash(msgid, filename = filename, lno = lno)
-            messages[idx] = Msgs(msgid, msgstr, flag, lno, entry,
-                                 autocmt = autocmt, ref = ref)
-            msgid = False; msgstr = cmt = autocmt = ref = flag = ''
-
-    for m in messages.values():
-        m.wash()
-    return messages
-
-def fuzzy_match(pot, defs):
-    '''
-    Try to find the best difflib match (with ratio > 0.6) between
-    id of Msgs object <pot> and Msgs in the dict <defs>.
-    Return value is the Msgs object in <defs> with highest ratio,
-    False is returned if no suitable Msgs is found.
-    '''
-    limit = 0.6
-    l, po = limit - 0.01, False
-    s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id())
-    len2 = len(pot.get_clean_id())   
-    for candidate in defs.values():
-        if candidate.str == 'msgstr ""\n':       # Empty translation
-            continue
-        if candidate.id == 'msgid ""\n':         # Empty msgid (header)
-            continue
-        len1 = len(candidate.get_clean_id())
-        if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first
-            continue
-        s.set_seq1(candidate.get_clean_id())
-        if s.quick_ratio() < l:
-            continue
-        r = s.ratio()                            # This is expensive
-        if r > l:
-            l, po = r, candidate
-    return po
-
-def flags(po, pot, fuzzy = False, obs = False):
-    '''
-    Create flag field from flag field in Msgs objects <po> and
-    <pot>. When <fuzzy> is true <po>\'s flags are ignored and the
-    fuzzy flag is added. If <obs> is set then most flags but fuzzy are
-    removed. If the global variable option.docstrings is set then
-    docstring flags will not be removed. The return value is a string
-    which holds the combined flag.
-    '''
-    global option
-    flag = ''
-    if po.flag or pot.flag or fuzzy:
-        if not fuzzy:
-            flag = '%s, %s' % (po.flag.strip(), pot.flag.strip())
-        else:
-            flag = '%s, %s' % ('#, fuzzy', pot.flag.strip())
-        flag = flag.split(', ')
-        fl = {}
-        flag = [fl.setdefault(f, f) for f in flag if f not in fl and f]
-        if not option.docstrings:
-            try:
-                flag.remove('docstring')
-            except ValueError:
-                pass
-        if obs:
-            removes = ['c-format', 'python-format', 'docstring']
-            for remove in removes:
-                try:
-                    flag.remove(remove)
-                except ValueError:
-                    pass
-        # Put fuzzy first
-        if 'fuzzy' in flag and not flag.index('fuzzy') == 1:
-            i = flag.index('fuzzy')
-            flag[1], flag[i] = flag[i], flag[1]
-
-        if len(flag) == 1:
-            flag = ''
-        else:
-            flag = ', '.join(flag) + '\n'
-    return flag
-
-def add(pot, po, fuzzy = False):
-    '''
-    Build a new entry from the Msgs objects <pot> and <pot>. If
-    <fuzzy> is true, <po>\'s flag field is ignored (in
-    flags()). Returns a multiline string with a up to date entry.
-    '''
-    msg = []
-    msg.append(po.cmt)
-    msg.append(pot.autocmt)
-    msg.append(pot.ref)
-    msg.append(flags(po, pot, fuzzy = fuzzy))
-    msg.append(pot.id)
-    msg.append(po.str)
-    return ''.join(msg)
-
-def header(pot, defs):
-    '''
-    Update date in header entry. Returns the updated header entry.
-    '''
-    try:
-        [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ]
-    except ValueError:
-        raise MsgmergeError('Error: did not find header in po file.')
-
-    r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)',
-                   re.MULTILINE | re.DOTALL)
-    m = r.match(pot.str)
-    if not m:
-        raise MsgmergeError(
-            'Error: did not find POT-Creation-Date field in pot file.')
-
-    subs = '\\1%s\\3' % m.group(2)
-    _, count = r.subn(subs, po.str)
-    if not count == 1:
-        raise MsgmergeError(
-            'Error: did not find POT-Creation-Date field in po file.')
-    return po
-
-def match(defs, refs):
-    '''
-    Try to match Msgs objects in <refs> with Msgs objects in
-    <defs>. The return value is a list with po entries.
-    '''
-    global option
-    matches = []
-    empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
-    deco = [(r.lno, r) for r in refs.values()]
-    deco.sort()
-    po = header(deco.pop(0)[1], defs)       # Header entry
-    matches.append(add(empty, po))
-    po.used()
-    sorted = [ a[1] for a in deco ]
-    for pot in sorted:
-        if option.verbose:
-            sys.stderr.write('.')
-        po = defs.get(pot.id, False)        # Perfect match
-        if po:
-            matches.append(add(pot, po))
-            po.used(); pot.used()
-            continue
-        po = fuzzy_match(pot, defs)         # Fuzzy match
-        if po:
-            matches.append(add(pot, po, fuzzy = True))
-            po.used(); pot.used()
-            continue
-        matches.append(add(pot, empty))     # No match
-
-    obsolete(defs, matches)
-    return matches
-
-def obsolete(defs, matches):
-    '''Handle obsolete translations.'''
-    deco = [ (d.lno, d) for d in defs.values() if
-             d.count == 0 and not d.str == 'msgstr ""\n' ]
-    deco.sort()
-    empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
-    obs = [ o[1] for o in deco ]
-    for o in obs:
-        o.flag = flags(o, empty, obs = True) 
-        o.obsolete()
-        matches.append('%s%s%s' % (o.flag, o.id, o.str))
-
-def help():
-    '''Print help text and exit.'''
-    print __doc__
-    sys.exit(0)
-
-def cmdline():
-    '''Parse options and arguments from command line.'''
-    advice = 'Try `%(name)s --help\' for more information.'
-    try:
-        long_opt = ['help', 'version', 'update', 'output-file=',
-                    'quiet', 'silent', 'docstrings', 'suffix', 'backup']
-        opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt)
-    except getopt.error, msg:
-        print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals()
-        sys.exit(1)
-        
-    option = Options(cmdline = True)
-    for opt, arg in opts:
-        if opt in ['-h', '--help']:
-            help()
-        elif opt in ['-V', '--version']:
-            print '%(name)s %(__version__)s' % globals()
-            sys.exit(0)
-        elif opt in ['-o', '--output-file']:
-            option.outfile = arg
-        elif opt in ['-U', '--update']:
-            option.update = True
-        elif opt in ['-q', '--silent', '--quiet']:
-            option.verbose = False
-        elif opt in ['-D', '--docstrings']:
-            option.docstrings = True
-        elif opt in ['--suffix']:
-            option.suffix = arg
-        elif opt in ['--backup']:
-            option.backup = arg
-            
-    # Sanity checks
-    warn = False
-    if option.update and option.outfile:
-        warn = '--update and --output-file are mutually exclusive.'
-    if len(args) == 0:
-        warn = 'no input files given.'
-    elif len(args) == 1 or len(args) > 2:
-        warn = 'exactly 2 input files required.'
-    if warn:
-        print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals()
-        sys.exit(1)
-
-    if option.update:
-        option.outfile = args[0]
-    elif not option.outfile:
-        option.outfile = '-'
-
-    defs, refs = args
-
-    try:
-        merge(defs, refs, option = option)
-    except MsgmergeError, err:
-        print '%(name)s: ' % globals() + '%s' % err
-        sys.exit(1)
-
-def io(iofile, mode = 'rU'):
-    '''Wrapper around open().'''
-    try:
-        fo = open(iofile, mode)        
-        if 'r' in mode and fo.read(3) != codecs.BOM_UTF8:
-            fo.seek(0)
-
-    except IOError, msg:
-        raise MsgmergeError('error while opening file: %s: %s.' %
-                            (msg[1], iofile))
-    return fo
-
-def backup(infile):
-    '''Handle backup of files in update mode'''
-    os.environ.get('VERSION_CONTROL', '')
-    suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~')
-    
-    backup_file = '%s%s' % (infile, suffix)
-    
-def changes(new, old):
-    return cmp(''.join(old), '\n'.join(new))
-
-def write(matches, outfile):
-    '''Write the list <matches> to file <outfile>'''
-    if not outfile == '-':
-        fd = io(outfile, 'w')
-    else:
-        fd = sys.stdout
-    fd.write('\n'.join(matches))
-    
-def merge(def_file, ref_file, update = True, outfile = '-',
-          docstrings = True, suffix = '~', backup = True,
-          verbose = True, **kwds):
-    '''
-    Merge po file <def_file> with pot file <ref_file> . If <update> is
-    set to True then only update if there are changes to the po
-    file. Set outfile to write updated po file to an another file. Set
-    to `-\' for writing to standard out. If docstrings is False
-    docstrings flag will removed. Set verbose to False to suppress
-    progress indicators. <kwds> is used to pass options from the
-    command line interface.
-    '''
-    global option
-    option = kwds.get('option', Options(update = update,
-                                        outfile = outfile,
-                                        docstrings = docstrings,
-                                        suffix = suffix,
-                                        backup = backup,
-                                        verbose = verbose))
-    def_msgs = parse(def_file, 'msgstr')
-    ref_msgs = parse(ref_file, 'msgid')
-    if verbose and not __name__ == '__main__':
-        print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file)
-    updated_lines = match(def_msgs, ref_msgs)
-    if option.verbose:
-        print >> sys.stderr, ' done.'
-    if not option.update:
-        write(updated_lines, option.outfile)
-    elif option.update and changes(updated_lines, io(def_file).readlines()):
-        write(updated_lines, def_file)
-        
-def merge_dir(directory, pot = False, include = [], exclude = [],
-              verbose = True):
-    '''
-    Tries to merge a directory of po files. Uses simple glob to find
-    po files and pot file. The parameter <pot> can be used to specify
-    the pot file in the directory. If the list <include> is given only
-    files in this list is merged. Use the list <exclude> to exclude
-    files to be merged. This function is only useful if po files and
-    pot file are in the same directory. Set <verbose> to get
-    information when running.
-    '''
-    if directory[-1] == '/':
-        directory = os.path.dirname(directory)
-    if pot:
-        pot = os.path.basename(pot)
-    else:
-        pot = glob.glob('%s/*.pot' % directory)
-        if not pot:
-            raise MsgmergeError('No pot file found.')
-        elif len(pot) > 1:
-            raise MsgmergeError('More than one pot file found: %s.' % pot)
-        pot = os.path.basename(pot[0])
-    
-    if not include:
-        pos = glob.glob('%s/*po' % directory)
-        if not len(pos) > 1:
-            raise MsgmergeError('No po file(s) found.')
-        pos = [ os.path.basename(po) for po in pos ]
-    else:
-        pos = [ os.path.basename(po) for po in include ]
-    
-    for po in exclude:
-        try:
-            pos.remove(po)
-        except ValueError:
-            pass
-    format = '%s/%s'
-    for po in pos:
-        try:
-            merge(format % (directory, po), format % (directory, pot),
-                  update = True, verbose = verbose,
-                  outfile = format % (directory, po))
-        except MsgmergeError, err:            
-            if verbose:
-                print >> sys.stderr, '%s Not updated.' % err
-            else:
-                print >> sys.stderr, '%s %s not updated.' % (err, po)
-
-if __name__ == '__main__':
-    cmdline()
--- a/scripts/build/pygettext.py
+++ b/scripts/build/pygettext.py
@ -1,672 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-# Originally written by Barry Warsaw <barry@zope.com>
-#
-# Minimally patched to make it even more xgettext compatible
-# by Peter Funk <pf@artcom-gmbh.de>
-#
-# 2002-11-22 Jürgen Hermann <jh@web.de>
-# Added checks that _() only contains string literals, and
-# command line args are resolved to module lists, i.e. you
-# can now pass a filename, a module or package name, or a
-# directory (including globbing chars, important for Win32).
-# Made docstring fit in 80 chars wide displays using pydoc.
-#
-
-# for selftesting
-try:
-    import fintl
-    _ = fintl.gettext
-except ImportError:
-    _ = lambda s: s
-
-__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
-
-Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
-internationalization of C programs. Most of these tools are independent of
-the programming language and can be used from within Python programs.
-Martin von Loewis' work[1] helps considerably in this regard.
-
-There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python
-introduces a few wrinkles, such as dual quoting characters, triple quoted
-strings, and raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan
-Python source code, generating .pot files identical to what GNU xgettext[2]
-generates for C and C++ code. From there, the standard GNU tools can be
-used.
-
-A word about marking Python strings as candidates for translation. GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
-and gettext_noop. But those can be a lot of text to include all over your
-code. C and C++ have a trick: they use the C preprocessor. Most
-internationalized C source includes a #define for gettext() to _() so that
-what has to be written in the source is much less. Thus these are both
-translatable strings:
-
-    gettext("Translatable String")
-    _("Translatable String")
-
-Python of course has no preprocessor so this doesn't work so well.  Thus,
-pygettext searches only for _() by default, but see the -k/--keyword flag
-below for how to augment this.
-
- [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
- [2] http://www.gnu.org/software/gettext/gettext.html
-
-NOTE: pygettext attempts to be option and feature compatible with GNU
-xgettext where ever possible. However some options are still missing or are
-not fully implemented. Also, xgettext's use of command line switches with
-option arguments is broken, and in these cases, pygettext just defines
-additional switches.
-
-Usage: pygettext [options] inputfile ...
-
-Options:
-
-    -a
-    --extract-all
-        Extract all strings.
-
-    -d name
-    --default-domain=name
-        Rename the default output file from messages.pot to name.pot.
-
-    -E
-    --escape
-        Replace non-ASCII characters with octal escape sequences.
-
-    -D
-    --docstrings
-        Extract module, class, method, and function docstrings.  These do
-        not need to be wrapped in _() markers, and in fact cannot be for
-        Python to consider them docstrings. (See also the -X option).
-
-    -h
-    --help
-        Print this help message and exit.
-
-    -k word
-    --keyword=word
-        Keywords to look for in addition to the default set, which are:
-        %(DEFAULTKEYWORDS)s
-
-        You can have multiple -k flags on the command line.
-
-    -K
-    --no-default-keywords
-        Disable the default set of keywords (see above).  Any keywords
-        explicitly added with the -k/--keyword option are still recognized.
-
-    --no-location
-        Do not write filename/lineno location comments.
-
-    -n
-    --add-location
-        Write filename/lineno location comments indicating where each
-        extracted string is found in the source.  These lines appear before
-        each msgid.  The style of comments is controlled by the -S/--style
-        option.  This is the default.
-
-    -o filename
-    --output=filename
-        Rename the default output file from messages.pot to filename.  If
-        filename is `-' then the output is sent to standard out.
-
-    -p dir
-    --output-dir=dir
-        Output files will be placed in directory dir.
-
-    -S stylename
-    --style stylename
-        Specify which style to use for location comments.  Two styles are
-        supported:
-
-        Solaris  # File: filename, line: line-number
-        GNU      #: filename:line
-
-        The style name is case insensitive.  GNU style is the default.
-
-    -v
-    --verbose
-        Print the names of the files being processed.
-
-    -V
-    --version
-        Print the version of pygettext and exit.
-
-    -w columns
-    --width=columns
-        Set width of output to columns.
-
-    -x filename
-    --exclude-file=filename
-        Specify a file that contains a list of strings that are not be
-        extracted from the input files.  Each string to be excluded must
-        appear on a line by itself in the file.
-
-    -X filename
-    --no-docstrings=filename
-        Specify a file that contains a list of files (one per line) that
-        should not have their docstrings extracted.  This is only useful in
-        conjunction with the -D option above.
-
-If `inputfile' is -, standard input is read.
-""")
-
-import os
-import imp
-import sys
-import glob
-import time
-import getopt
-import token
-import tokenize
-import operator
-
-from umit.pm.core.const import PM_VERSION
-
-__version__ = '1.5'
-
-default_keywords = ['_']
-DEFAULTKEYWORDS = ', '.join(default_keywords)
-
-EMPTYSTRING = ''
-
-
-
-# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
-# there.
-pot_header = _('''\
-# PacketManipulator catalog.
-# Copyright (C) 2009 Adriano Montero Marques
-# Francesco Piccinno <stack.box@gmail.com>, 2009
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: PacketManipulator %(pm_version)s\\n"
-"POT-Creation-Date: %(time)s\\n"
-"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
-"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
-"Language-Team: LANGUAGE <LL@li.org>\\n"
-"MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=UTF-8\\n"
-"Content-Transfer-Encoding: 8bit\\n"
-"Generated-By: pygettext.py %(version)s\\n"
-
-''')
-
-
-def usage(code, msg=''):
-    print >> sys.stderr, __doc__ % globals()
-    if msg:
-        print >> sys.stderr, msg
-    sys.exit(code)
-
-
-
-escapes = []
-
-def make_escapes(pass_iso8859):
-    global escapes
-    if pass_iso8859:
-        # Allow iso-8859 characters to pass through so that e.g. 'msgid
-        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
-        # escape any character outside the 32..126 range.
-        mod = 128
-    else:
-        mod = 256
-    for i in range(256):
-        if 32 <= (i % mod) <= 126:
-            escapes.append(chr(i))
-        else:
-            escapes.append("\\%03o" % i)
-    escapes[ord('\\')] = '\\\\'
-    escapes[ord('\t')] = '\\t'
-    escapes[ord('\r')] = '\\r'
-    escapes[ord('\n')] = '\\n'
-    escapes[ord('\"')] = '\\"'
-
-
-def escape(s):
-    global escapes
-    s = list(s)
-    for i in range(len(s)):
-        s[i] = escapes[ord(s[i])]
-    return EMPTYSTRING.join(s)
-
-
-def safe_eval(s):
-    # unwrap quotes, safely
-    return eval(s, {'__builtins__':{}}, {})
-
-
-def normalize(s):
-    # This converts the various Python string types into a format that is
-    # appropriate for .po files, namely much closer to C style.
-    lines = s.split('\n')
-    if len(lines) == 1:
-        s = '"' + escape(s) + '"'
-    else:
-        if not lines[-1]:
-            del lines[-1]
-            lines[-1] = lines[-1] + '\n'
-        for i in range(len(lines)):
-            lines[i] = escape(lines[i])
-        lineterm = '\\n"\n"'
-        s = '""\n"' + lineterm.join(lines) + '"'
-    return s
-
-
-def containsAny(str, set):
-    """Check whether 'str' contains ANY of the chars in 'set'"""
-    return 1 in [c in str for c in set]
-
-
-def _visit_pyfiles(list, dirname, names):
-    """Helper for getFilesForName()."""
-    # get extension for python source files
-    if not globals().has_key('_py_ext'):
-        global _py_ext
-        _py_ext = [triple[0] for triple in imp.get_suffixes()
-                   if triple[2] == imp.PY_SOURCE][0]
-
-    # don't recurse into CVS directories
-    if 'CVS' in names:
-        names.remove('CVS')
-
-    # add all *.py files to list
-    list.extend(
-        [os.path.join(dirname, file) for file in names
-         if os.path.splitext(file)[1] == _py_ext]
-        )
-
-
-def _get_modpkg_path(dotted_name, pathlist=None):
-    """Get the filesystem path for a module or a package.
-
-    Return the file system path to a file for a module, and to a directory for
-    a package. Return None if the name is not found, or is a builtin or
-    extension module.
-    """
-    # split off top-most name
-    parts = dotted_name.split('.', 1)
-
-    if len(parts) > 1:
-        # we have a dotted path, import top-level package
-        try:
-            file, pathname, description = imp.find_module(parts[0], pathlist)
-            if file: file.close()
-        except ImportError:
-            return None
-
-        # check if it's indeed a package
-        if description[2] == imp.PKG_DIRECTORY:
-            # recursively handle the remaining name parts
-            pathname = _get_modpkg_path(parts[1], [pathname])
-        else:
-            pathname = None
-    else:
-        # plain name
-        try:
-            file, pathname, description = imp.find_module(
-                dotted_name, pathlist)
-            if file:
-                file.close()
-            if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
-                pathname = None
-        except ImportError:
-            pathname = None
-
-    return pathname
-
-
-def getFilesForName(name):
-    """Get a list of module files for a filename, a module or package name,
-    or a directory.
-    """
-    if not os.path.exists(name):
-        # check for glob chars
-        if containsAny(name, "*?[]"):
-            files = glob.glob(name)
-            list = []
-            for file in files:
-                list.extend(getFilesForName(file))
-            return list
-
-        # try to find module or package
-        name = _get_modpkg_path(name)
-        if not name:
-            return []
-
-    if os.path.isdir(name):
-        # find all python files in directory
-        list = []
-        os.path.walk(name, _visit_pyfiles, list)
-        return list
-    elif os.path.exists(name):
-        # a single file
-        return [name]
-
-    return []
-
-
-class TokenEater:
-    def __init__(self, options):
-        self.__options = options
-        self.__messages = {}
-        self.__state = self.__waiting
-        self.__data = []
-        self.__lineno = -1
-        self.__freshmodule = 1
-        self.__curfile = None
-
-    def __call__(self, ttype, tstring, stup, etup, line):
-        # dispatch
-##        import token
-##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-##              'tstring:', tstring
-        self.__state(ttype, tstring, stup[0])
-
-    def __waiting(self, ttype, tstring, lineno):
-        opts = self.__options
-        # Do docstring extractions, if enabled
-        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
-            # module docstring?
-            if self.__freshmodule:
-                if ttype == tokenize.STRING:
-                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
-                    self.__freshmodule = 0
-                elif ttype not in (tokenize.COMMENT, tokenize.NL):
-                    self.__freshmodule = 0
-                return
-            # class docstring?
-            if ttype == tokenize.NAME and tstring in ('class', 'def'):
-                self.__state = self.__suiteseen
-                return
-        if ttype == tokenize.NAME and tstring in opts.keywords:
-            self.__state = self.__keywordseen
-
-    def __suiteseen(self, ttype, tstring, lineno):
-        # ignore anything until we see the colon
-        if ttype == tokenize.OP and tstring == ':':
-            self.__state = self.__suitedocstring
-
-    def __suitedocstring(self, ttype, tstring, lineno):
-        # ignore any intervening noise
-        if ttype == tokenize.STRING:
-            self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
-            self.__state = self.__waiting
-        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
-                           tokenize.COMMENT):
-            # there was no class docstring
-            self.__state = self.__waiting
-
-    def __keywordseen(self, ttype, tstring, lineno):
-        if ttype == tokenize.OP and tstring == '(':
-            self.__data = []
-            self.__lineno = lineno
-            self.__state = self.__openseen
-        else:
-            self.__state = self.__waiting
-
-    def __openseen(self, ttype, tstring, lineno):
-        if ttype == tokenize.OP and tstring == ')':
-            # We've seen the last of the translatable strings.  Record the
-            # line number of the first line of the strings and update the list
-            # of messages seen.  Reset state for the next batch.  If there
-            # were no strings inside _(), then just ignore this entry.
-            if self.__data:
-                self.__addentry(EMPTYSTRING.join(self.__data))
-            self.__state = self.__waiting
-        elif ttype == tokenize.STRING:
-            self.__data.append(safe_eval(tstring))
-        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
-                           token.NEWLINE, tokenize.NL]:
-            # warn if we see anything else than STRING or whitespace
-            print >> sys.stderr, _(
-                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
-                ) % {
-                'token': tstring,
-                'file': self.__curfile,
-                'lineno': self.__lineno
-                }
-            self.__state = self.__waiting
-
-    def __addentry(self, msg, lineno=None, isdocstring=0):
-        if lineno is None:
-            lineno = self.__lineno
-        if not msg in self.__options.toexclude:
-            entry = (self.__curfile, lineno)
-            self.__messages.setdefault(msg, {})[entry] = isdocstring
-
-    def set_filename(self, filename):
-        self.__curfile = filename
-        self.__freshmodule = 1
-
-    def write(self, fp):
-        options = self.__options
-        timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
-        # The time stamp in the header doesn't have the same format as that
-        # generated by xgettext...
-        print >> fp, pot_header % {'time': timestamp, 'version': __version__,
-                                   'pm_version': PM_VERSION}
-        # Sort the entries.  First sort each particular entry's keys, then
-        # sort all the entries by their first item.
-        reverse = {}
-        for k, v in self.__messages.items():
-            keys = v.keys()
-            keys.sort()
-            reverse.setdefault(tuple(keys), []).append((k, v))
-        rkeys = reverse.keys()
-        rkeys.sort()
-        for rkey in rkeys:
-            rentries = reverse[rkey]
-            rentries.sort()
-            for k, v in rentries:
-                isdocstring = 0
-                # If the entry was gleaned out of a docstring, then add a
-                # comment stating so.  This is to aid translators who may wish
-                # to skip translating some unimportant docstrings.
-                if reduce(operator.__add__, v.values()):
-                    isdocstring = 1
-                # k is the message string, v is a dictionary-set of (filename,
-                # lineno) tuples.  We want to sort the entries in v first by
-                # file name and then by line number.
-                v = v.keys()
-                v.sort()
-                if not options.writelocations:
-                    pass
-                # location comments are different b/w Solaris and GNU:
-                elif options.locationstyle == options.SOLARIS:
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        print >>fp, _(
-                            '# File: %(filename)s, line: %(lineno)d') % d
-                elif options.locationstyle == options.GNU:
-                    # fit as many locations on one line, as long as the
-                    # resulting line length doesn't exceeds 'options.width'
-                    locline = '#:'
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        s = _(' %(filename)s:%(lineno)d') % d
-                        if len(locline) + len(s) <= options.width:
-                            locline = locline + s
-                        else:
-                            print >> fp, locline
-                            locline = "#:" + s
-                    if len(locline) > 2:
-                        print >> fp, locline
-                if isdocstring:
-                    print >> fp, '#, docstring'
-                print >> fp, 'msgid', normalize(k)
-                print >> fp, 'msgstr ""\n'
-
-
-
-def main():
-    global default_keywords
-    try:
-        opts, args = getopt.getopt(
-            sys.argv[1:],
-            'ad:DEhk:Kno:p:S:Vvw:x:X:',
-            ['extract-all', 'default-domain=', 'escape', 'help',
-             'keyword=', 'no-default-keywords',
-             'add-location', 'no-location', 'output=', 'output-dir=',
-             'style=', 'verbose', 'version', 'width=', 'exclude-file=',
-             'docstrings', 'no-docstrings',
-             ])
-    except getopt.error, msg:
-        usage(1, msg)
-
-    # for holding option values
-    class Options:
-        # constants
-        GNU = 1
-        SOLARIS = 2
-        # defaults
-        extractall = 0 # FIXME: currently this option has no effect at all.
-        escape = 0
-        keywords = []
-        outpath = ''
-        outfile = 'messages.pot'
-        writelocations = 1
-        locationstyle = GNU
-        verbose = 0
-        width = 78
-        excludefilename = ''
-        docstrings = 0
-        nodocstrings = {}
-
-    options = Options()
-    locations = {'gnu' : options.GNU,
-                 'solaris' : options.SOLARIS,
-                 }
-
-    # parse options
-    for opt, arg in opts:
-        if opt in ('-h', '--help'):
-            usage(0)
-        elif opt in ('-a', '--extract-all'):
-            options.extractall = 1
-        elif opt in ('-d', '--default-domain'):
-            options.outfile = arg + '.pot'
-        elif opt in ('-E', '--escape'):
-            options.escape = 1
-        elif opt in ('-D', '--docstrings'):
-            options.docstrings = 1
-        elif opt in ('-k', '--keyword'):
-            options.keywords.append(arg)
-        elif opt in ('-K', '--no-default-keywords'):
-            default_keywords = []
-        elif opt in ('-n', '--add-location'):
-            options.writelocations = 1
-        elif opt in ('--no-location',):
-            options.writelocations = 0
-        elif opt in ('-S', '--style'):
-            options.locationstyle = locations.get(arg.lower())
-            if options.locationstyle is None:
-                usage(1, _('Invalid value for --style: %s') % arg)
-        elif opt in ('-o', '--output'):
-            options.outfile = arg
-        elif opt in ('-p', '--output-dir'):
-            options.outpath = arg
-        elif opt in ('-v', '--verbose'):
-            options.verbose = 1
-        elif opt in ('-V', '--version'):
-            print _('pygettext.py (xgettext for Python) %s') % __version__
-            sys.exit(0)
-        elif opt in ('-w', '--width'):
-            try:
-                options.width = int(arg)
-            except ValueError:
-                usage(1, _('--width argument must be an integer: %s') % arg)
-        elif opt in ('-x', '--exclude-file'):
-            options.excludefilename = arg
-        elif opt in ('-X', '--no-docstrings'):
-            fp = open(arg)
-            try:
-                while 1:
-                    line = fp.readline()
-                    if not line:
-                        break
-                    options.nodocstrings[line[:-1]] = 1
-            finally:
-                fp.close()
-
-    # calculate escapes
-    make_escapes(options.escape)
-
-    # calculate all keywords
-    options.keywords.extend(default_keywords)
-
-    # initialize list of strings to exclude
-    if options.excludefilename:
-        try:
-            fp = open(options.excludefilename)
-            options.toexclude = fp.readlines()
-            fp.close()
-        except IOError:
-            print >> sys.stderr, _(
-                "Can't read --exclude-file: %s") % options.excludefilename
-            sys.exit(1)
-    else:
-        options.toexclude = []
-
-    # resolve args to module lists
-    expanded = []
-    for arg in args:
-        if arg == '-':
-            expanded.append(arg)
-        else:
-            expanded.extend(getFilesForName(arg))
-    args = expanded
-
-    # slurp through all the files
-    eater = TokenEater(options)
-    for filename in args:
-        if filename == '-':
-            if options.verbose:
-                print _('Reading standard input')
-            fp = sys.stdin
-            closep = 0
-        else:
-            if options.verbose:
-                print _('Working on %s') % filename
-            fp = open(filename)
-            closep = 1
-        try:
-            eater.set_filename(filename)
-            try:
-                tokenize.tokenize(fp.readline, eater)
-            except tokenize.TokenError, e:
-                print >> sys.stderr, '%s: %s, line %d, column %d' % (
-                    e[0], filename, e[1][0], e[1][1])
-        finally:
-            if closep:
-                fp.close()
-
-    # write the output
-    if options.outfile == '-':
-        fp = sys.stdout
-        closep = 0
-    else:
-        if options.outpath:
-            options.outfile = os.path.join(options.outpath, options.outfile)
-        fp = open(options.outfile, 'w')
-        closep = 1
-    try:
-        eater.write(fp)
-    finally:
-        if closep:
-            fp.close()
-
-
-if __name__ == '__main__':
-    main()
-    # some more test strings
-    _(u'a unicode string')
-    # this one creates a warning
-    _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
-    _('more' 'than' 'one' 'string')