Placed official version, and removed other files since work only with parsing python (nw)

This commit is contained in:
Miodrag Milanovic 2016-02-21 14:45:38 +01:00
parent 78ada55150
commit be7cc43b08
4 changed files with 45 additions and 1875 deletions

View File

@ -1,490 +0,0 @@
#! /usr/bin/env python
#
# check_po - a gramps tool to check validity of po files
#
# Copyright (C) 2006-2006 Kees Bakker
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# TODO
#
# * Check for HTML text in msgstr when there is none in msgid
# * Check for matching HTML tag/endtag in msgstr
#
# Adapted for Umit by Guilherme Polo, original file:
# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po
import re
import sys
from optparse import OptionParser
APP = "Umit"
all_total = {}
all_fuzzy = {}
all_untranslated = {}
all_percent_s = {}
all_named_s = {}
all_bnamed_s = {}
all_context = {}
all_coverage = {}
all_template_coverage = {}
def strip_quotes(st):
if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
st = st.strip()[1:-1]
return st
# This is a base class for all checks
class Check:
def __init__( self ):
self.msgs = []
def diag( self ):
if len( self.msgs ):
print
print self.diag_header
for m in self.msgs:
m.diag()
def summary( self ):
print "%-20s%d" % ( self.summary_text, len(self.msgs) )
class Check_fmt( Check ):
def __init__( self, fmt ):
Check.__init__( self )
self.diag_header = "-------- %s mismatches --------------" % fmt
self.summary_text = "%s mismatches:" % fmt
self.fmt = fmt
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
cnt1 = msgid.count( self.fmt )
cnt2 = msgstr.count( self.fmt )
if cnt1 != cnt2:
self.msgs.append( msg )
class Check_named_fmt( Check ):
# A pattern to find all %()
find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- %() name mismatches --------------"
self.summary_text = "%() name mismatches:"
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
# Same number of named formats?
fmts1 = self.find_named_fmt_pat.findall( msgid )
fmts2 = self.find_named_fmt_pat.findall( msgstr )
if len( fmts1 ) != len( fmts2 ):
self.msgs.append( msg )
else:
# Do we have the same named formats?
fmts1.sort()
fmts2.sort()
if fmts1 != fmts2:
self.msgs.append( msg )
class Check_missing_sd( Check ):
# A pattern to find %() without s or d
# Here is a command to use for testing
# print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
self.summary_text = "%() missing s/d:"
def process( self, msg ):
msgstr = msg.msgstr
fmts = self.find_named_fmt_pat2.findall( msgstr )
for f in fmts:
if not f in ('s', 'd'):
self.msgs.append( msg )
break
class Check_runaway( Check ):
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- Runaway context in translation ---------"
self.summary_text = "Runaway context:"
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
# Runaway context. In the translated part we only to see
# the translation of the word after the |
if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr:
self.msgs.append( msg )
class Check_xml_chars( Check ):
# Special XML characters
# It is not allowed to have a quote, an ampersand or an angle bracket
xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- unescaped XML special characters ---------"
self.summary_text = "XML special chars:"
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
# XML errors
# Only look at messages in the tips.xml
if msg.is_tips_xml:
if self.xml_chars_pat.search( msgstr ):
self.msgs.append( msg )
class Check_last_char( Check ):
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- last character not identical ---------"
self.summary_text = "Last character:"
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
# Last character of msgid? White space? Period?
if msg.is_fuzzy:
return
msgid_last = msgid[-1:]
msgstr_last = msgstr[-1:]
if msgid_last.isspace() != msgstr_last.isspace():
self.msgs.append( msg )
elif (msgid_last == '.') != (msgstr_last == '.'):
self.msgs.append( msg )
class Check_shortcut_trans( Check ):
def __init__( self ):
Check.__init__( self )
self.diag_header = "-------- shortcut key in translation ---------"
self.summary_text = "Shortcut in msgstr:"
def process( self, msg ):
msgid = msg.msgid
msgstr = msg.msgstr
if msgid.count('_') == 0 and msgstr.count('_') > 0:
self.msgs.append( msg )
class Msgid:
fuzzy_pat = re.compile( 'fuzzy' )
tips_xml_pat = re.compile( r'tips\.xml' )
def __init__( self, msgnr, lineno ):
self._msgid = []
self._msgstr = []
self.msgid = ''
self.msgstr = ''
self._cmnt = []
self.nr = msgnr
self.lineno = lineno
self.is_fuzzy = 0
self.is_tips_xml = 0
def diag( self ):
if 1:
print
print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" )
sys.stdout.write( ''.join( self._msgid ) )
sys.stdout.write( ''.join( self._msgstr ) )
else:
# Compatible with the old check_po
print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr )
def add_msgid( self, line, lineno ):
self._msgid.append( line )
line = re.sub( r'msgid\s+', '', line )
line = line.strip()
if line[0] != '"' or line[-1:] != '"':
print "ERROR at line %d: Missing quote." % lineno
line = strip_quotes( line )
self.msgid += line
def add_msgstr( self, line, lineno ):
self._msgstr.append( line )
line = re.sub( r'msgstr\s+', '', line )
line = line.strip()
if line[0] != '"' or line[-1:] != '"':
print "ERROR at line %d: Missing quote." % lineno
line = strip_quotes( line )
self.msgstr += line
def add_cmnt( self, line ):
self._cmnt.append( line )
if not self.is_fuzzy and self.fuzzy_pat.search( line ):
self.is_fuzzy = 1
if not self.is_tips_xml and self.tips_xml_pat.search( line ):
self.is_tips_xml = 1
def read_msgs( fname ):
empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
comment_pat = re.compile( r'\#', re.VERBOSE )
msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE )
str_pat = re.compile( r'"', re.VERBOSE )
old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
msgnr = 0 # This is the message number of the next message to read. The first real message is 1.
f = open( fname )
lines = f.readlines()
# parse it like a statemachine
NONE = 0 # Nothing detected, yet
CMNT = 1 # Inside comment part
MSGID = 2 # Inside msgid part
MSGSTR = 3 # Inside msgstr part
STR = 4 # A continuation string
OLD = 5 # An old pattern with #~
state = NONE
msg = None
msgs = []
for ix in range( len(lines) ): # Use line numbers for messages
line = lines[ix]
lineno = ix + 1
m = empty_pat.match( line )
if m:
continue # Empty lines are not interesting
# What's the next state?
if old_pat.match( line ):
next_state = OLD
elif comment_pat.match( line ):
next_state = CMNT
elif msgid_pat.match( line ):
next_state = MSGID
elif msgstr_pat.match( line ):
next_state = MSGSTR
elif str_pat.match( line ):
next_state = STR
else:
print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars()
next_state = NONE
#print "%(state)d->%(next_state)d\t%(line)s" % vars()
if state == NONE:
# expect msgid or comment or old stuff
if next_state == CMNT:
state = CMNT
msg = Msgid( msgnr, lineno ) # Start with an empty new item
msgnr += 1
msgs.append( msg )
msg.add_cmnt( line )
elif next_state == MSGID:
state = MSGID
msg = Msgid( msgnr, lineno ) # Start with an empty new item
msgnr += 1
msgs.append( msg )
msg.add_msgid( line, lineno )
elif next_state == MSGSTR:
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
state = MSGSTR
msg = Msgid( msgnr, lineno ) # Start with an empty new item
msgnr += 1
msgs.append( msg )
msg.add_msgstr( line, lineno )
elif next_state == STR:
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
elif next_state == OLD:
pass # Just skip
elif state == CMNT:
if next_state == CMNT:
if msg:
msg.add_cmnt( line )
else:
# Note. We may need to do something about these comments
# Skip for now
pass
elif next_state == MSGID:
state = MSGID
if not msg:
msg = Msgid( msgnr, lineno ) # Start with an empty new item
msgnr += 1
msgs.append( msg )
msg.add_msgid( line, lineno )
elif next_state == MSGSTR:
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
state = MSGSTR
msg = Msgid( msgnr, lineno ) # Start with an empty new item
msgnr += 1
msgs.append( msg )
msg.add_msgstr( line, lineno )
elif next_state == STR:
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
elif next_state == OLD:
msg = None
pass # Just skip
elif state == MSGID:
if next_state == CMNT:
# Hmmm. A comment here?
print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
elif next_state == MSGID:
raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
elif next_state == MSGSTR:
state = MSGSTR
msg.add_msgstr( line, lineno )
elif next_state == STR:
msg.add_msgid( line, lineno )
elif next_state == OLD:
msg = None
pass # Just skip
elif state == MSGSTR:
if next_state == CMNT:
# A comment probably starts a new item
state = CMNT
msg = Msgid( msgnr, lineno )
msgnr += 1
msgs.append( msg )
msg.add_cmnt( line )
elif next_state == MSGID:
state = MSGID
msg = Msgid( msgnr, lineno )
msgnr += 1
msgs.append( msg )
msg.add_msgid( line, lineno )
elif next_state == MSGSTR:
raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
elif next_state == STR:
msg.add_msgstr( line, lineno )
elif next_state == OLD:
msg = None
pass # Just skip
else:
raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
# Strip items with just comments. (Can this happen?)
msgs1 = []
for m in msgs:
if not m.msgid and not m.msgstr:
#print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
pass
else:
msgs1.append( m )
msgs = msgs1
return msgs
def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ):
nr_fuzzy = 0
nr_untranslated = 0
checks = []
checks.append( Check_fmt( '%s' ) )
checks.append( Check_fmt( '%d' ) )
checks.append( Check_named_fmt() )
checks.append( Check_missing_sd() )
checks.append( Check_runaway() )
checks.append( Check_xml_chars() )
checks.append( Check_last_char() )
checks.append( Check_shortcut_trans() )
for msg in msgs:
msgid = msg.msgid
msgstr = msg.msgstr
#print
#print "msgid: %(msgid)s" % vars()
#print "msgstr: %(msgstr)s" % vars()
if not msgstr:
nr_untranslated += 1
continue
if msg.is_fuzzy:
nr_fuzzy += 1
if options.skip_fuzzy:
continue
for c in checks:
c.process( msg )
nr_msgs = len(msgs)
if nth > 0:
print
print "====================================="
print "%-20s%s" % ( "File:", fname )
print "%-20s%d" % ( "Template total:", nr_templates )
print "%-20s%d" % ( "PO total:", nr_msgs )
print "%-20s%d" % ( "Fuzzy:", nr_fuzzy )
print "%-20s%d" % ( "Untranslated:", nr_untranslated )
for c in checks:
c.summary()
po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage )
template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
if not options.only_summary:
for c in checks:
c.diag()
def main(args):
if len(sys.argv) < 2:
print "Error: Especify the umit.pot file path"
sys.exit(1)
parser = OptionParser(description="This program validates a PO file for "
"%s." % APP, usage='%prog [options] po-file...' )
parser.add_option("", "--skip-fuzzy",
action="store_true", dest="skip_fuzzy", default=False,
help="skip fuzzies")
parser.add_option("-s", "--only-summary",
action="store_true", dest="only_summary", default=False,
help="only give the summary")
options, args = parser.parse_args()
try:
pot_msgs = read_msgs(sys.argv[1])
nr_templates = len(pot_msgs)
nth = 0
for fname in args:
msgs = read_msgs(fname)
analyze_msgs(options, fname, msgs, nr_templates, nth)
nth += 1
except Exception, e:
print e
if __name__ == "__main__":
main(sys.argv)

View File

@ -1,14 +1,6 @@
#! /usr/bin/env python
#!/usr/bin/env python2
# -*- coding: iso-8859-1 -*-
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
#
# Changelog: (Guilherme Polo)
# 2008-04-11
# - Support for files with BOM UTF8 mark.
#
# 2008-04-10
# - Support for fuzzy strings in output.
# - Bumped to version 1.1.1
"""Generate binary message catalog from textual translation description.
@ -24,10 +16,6 @@ Options:
Specify the output file to write to. If omitted, output will go to a
file named filename.mo (based off the input file name).
-f
--use-fuzzy
Use fuzzy entries in output
-h
--help
Print this message and exit.
@ -35,23 +23,21 @@ Options:
-V
--version
Display version information and exit.
Before using the -f (fuzzy) option, read this:
http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en
"""
import sys
import os
import sys
import ast
import getopt
import struct
import array
import codecs
__version__ = "1.1.1"
__version__ = "1.1"
MESSAGES = {}
def usage(code, msg=''):
print >> sys.stderr, __doc__
if msg:
@ -59,13 +45,15 @@ def usage(code, msg=''):
sys.exit(code)
def add(id, str, fuzzy, use_fuzzy):
"Add a translation to the dictionary."
def add(id, str, fuzzy):
"Add a non-fuzzy translation to the dictionary."
global MESSAGES
if (not fuzzy or use_fuzzy) and str:
if not fuzzy and str:
MESSAGES[id] = str
def generate():
"Return the generated output."
global MESSAGES
@ -108,7 +96,8 @@ def generate():
return output
def make(filename, outfile, use_fuzzy):
def make(filename, outfile):
ID = 1
STR = 2
@ -122,8 +111,6 @@ def make(filename, outfile, use_fuzzy):
try:
lines = open(infile).readlines()
if lines[0].startswith(codecs.BOM_UTF8):
lines[0] = lines[0][len(codecs.BOM_UTF8):]
except IOError, msg:
print >> sys.stderr, msg
sys.exit(1)
@ -137,7 +124,7 @@ def make(filename, outfile, use_fuzzy):
lno += 1
# If we get a comment line after a msgstr, this is a new entry
if l[0] == '#' and section == STR:
add(msgid, msgstr, fuzzy, use_fuzzy)
add(msgid, msgstr, fuzzy)
section = None
fuzzy = 0
# Record a fuzzy mark
@ -147,22 +134,44 @@ def make(filename, outfile, use_fuzzy):
if l[0] == '#':
continue
# Now we are in a msgid section, output previous section
if l.startswith('msgid'):
if l.startswith('msgid') and not l.startswith('msgid_plural'):
if section == STR:
add(msgid, msgstr, fuzzy, use_fuzzy)
add(msgid, msgstr, fuzzy)
section = ID
l = l[5:]
msgid = msgstr = ''
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
if section != ID:
print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
(infile, lno)
sys.exit(1)
l = l[12:]
msgid += '\0' # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
section = STR
l = l[6:]
if l.startswith('msgstr['):
if not is_plural:
print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
(infile, lno)
sys.exit(1)
l = l.split(']', 1)[1]
if msgstr:
msgstr += '\0' # Separator of the various plural forms
else:
if is_plural:
print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\
(infile, lno)
sys.exit(1)
l = l[6:]
# Skip empty lines
l = l.strip()
if not l:
continue
# XXX: Does this always follow Python escape semantics?
l = eval(l)
l = ast.literal_eval(l)
if section == ID:
msgid += l
elif section == STR:
@ -174,7 +183,7 @@ def make(filename, outfile, use_fuzzy):
sys.exit(1)
# Add last entry
if section == STR:
add(msgid, msgstr, fuzzy, use_fuzzy)
add(msgid, msgstr, fuzzy)
# Compute output
output = generate()
@ -185,15 +194,15 @@ def make(filename, outfile, use_fuzzy):
print >> sys.stderr, msg
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hVo:f',
['help', 'version', 'output-file=', 'use-fuzzy'])
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
['help', 'version', 'output-file='])
except getopt.error, msg:
usage(1, msg)
outfile = None
use_fuzzy = False
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
@ -201,8 +210,6 @@ def main():
elif opt in ('-V', '--version'):
print >> sys.stderr, "msgfmt.py", __version__
sys.exit(0)
elif opt in ('-f', '--use-fuzzy'):
use_fuzzy = True
elif opt in ('-o', '--output-file'):
outfile = arg
# do it
@ -212,7 +219,7 @@ def main():
return
for filename in args:
make(filename, outfile, use_fuzzy)
make(filename, outfile)
if __name__ == '__main__':

View File

@ -1,675 +0,0 @@
#! /usr/bin/env python
# -*- coding: iso-8859-1 -*-
#
# Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003.
#
'''Merge two Uniforum style .po files together.
This is a implementation (not complete) in Python of the GNU
msgmerge(1) program. It can be used on the command line (or as a Python
module).
Usage: msgmerge.py [OPTIONS] def.po ref.pot
The def.po file is an existing PO file with translations. The ref.pot
file is the last created PO file with up-to-date source references but
old translations, or a PO Template file.
Options:
-U, --update update def.po,
do nothing if def.po is already up to date.
-o, --output-file=FILE write output to file FILE. Output is written to
stdout if set to - or if the option is not present.
-D, --docstrings don\'t remove docstring flag.
-h, --help display help text and exit.
-V, --version display version and exit.
-q, --quiet, --silent suppress progress indicators.
'''
from __future__ import generators
if not __name__ == '__main__':
__doc__ += '''\
When used as module the interesting functions are merge() and
merge_dir().
The merge() function does the same as the command line version, and
the arguments are as follows. The first argument is the def.po file,
then the ref.pot file. The third argument controls whether do work in
update mode or not, then the next argument sets the output file. Set
the next argument to False to remove docstring flags. The last
argument can be used to suppress progress indicators. The default is
to work in update mode with progress indicators.
Example:
merge("def.po", "ref.pot")
merge the files def.po and ref.pot and write output to def.po if
there are any changes.
merge("def.po", "red.pot", docstrings = False, verbose = False,
update = False, outfile = "-")
merge the files def.po and ref.pot and write output to stdout,
remove docstring flag and be quiet.
The merge_dir() function is useful when merging a directory of po
files. The only required argument is the name of the directory with po
files and the pot file. It will use simple glob to find the files. The
second argument can be used to specify the pot file (in the
directory). Third argument is a list of po files (then globbing will
not be used) and the next argument is list of filename to exclude. The
last argument can be used to suppress progress indicators. Docstring
flag will not be removed.
Example:
merge_dir("po")
merge (and update) all po files in directory po with the single pot
file in the same directory.
The module raises the MsgmergeError exception in case of error.
'''
__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $'
__version__ = '0.1'
name = 'msgmerge.py'
__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ]
import sys
import re
import string
import getopt
import difflib
import glob
import os.path
import codecs
try:
True, False
except NameError:
True, False = 1, 0
class Msgs:
'''Class to hold information about messages.'''
width = 80
file = ''
def __init__(self, msgid, msgstr, flag, lno, entry, **kwds):
self.id = msgid
self.str = msgstr
self.cmt = kwds.get('cmt', '')
self.ref = kwds.get('ref', '')
self.autocmt = kwds.get('autocmt', '')
self.flag = flag
self.entry = entry
self.lno = lno
self.count = 0
def wash(self):
self.id = wash(self.id, width = self.width,
filename = self.file, lno = self.lno)
self.str = wash(self.str, 'msgstr', width = self.width,
filename = self.file, lno = self.lno)
def used(self):
self.count += 1
def get_clean_id(self):
return self.id.replace('msgid "','', 1)
def obsolete(self):
self.width -= len('#~ ')
self.wash()
t = [ '#~ %s\n' % s for s in self.id.splitlines() ]
self.id = ''.join(t)
t = [ '#~ %s\n' % s for s in self.str.splitlines() ]
self.str = ''.join(t)
class Options:
'''Class to hold options'''
def __init__(self, cmdline = False, **kwds):
if not cmdline:
self.update = kwds.get('update', True)
self.outfile = kwds.get('outfile', '-')
self.docstrings = kwds.get('docstrings', True)
self.verbose = kwds.get('verbose', False)
self.suffix = kwds.get('suffix', '~')
self.backup = kwds.get('backup', True)
else:
self.update = False
self.outfile = False
self.docstrings = False
self.verbose = True
self.suffix = '~'
self.backup = True
class MsgmergeError(Exception):
'''Exception class for msgmerge'''
def gen(lines):
'''
Generator which returns a line (with the obsolete prefix removed)
from the list of lines in <lines>, the line number is also
returned.
'''
lno = 0
for l in lines:
lno += 1
yield l.replace('#~ ', '', 1), lno
yield l, lno
def slurp(s, g, sign):
'''
The string returned from iterator <g>\'s next() method is added to
the string <s> if string returned is beginning with the string
<sign>. The return value is the first returned string which do not
start with <sign>, the line number, the iterator <g> and the
(possibly) updated string <s>.
'''
l, lno = g.next()
while l.startswith(sign) or (sign == '# ' and l.strip() == '#'):
s += l
l, lno = g.next()
return l, lno, g, s
def splitted_fit(chunk, line, width, break_always, break_after_space):
'''
Check if string <chunk> can be splitted by newline to fit into
string <line> with width smaller than <width>. The return value is
a tuple where the first element is the part of chunk which fits
and the second element is the rest of chunk.
'''
ret = '', chunk
l = len(chunk)
for i in range(l - 1, -1, -1):
if chunk[i] in break_always and len(chunk[0:i] + line) <= width:
ret = chunk[0:i], chunk[i:]
break
elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '':
ret = chunk[0:i], chunk[i:]
break
elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \
and len(chunk[0:i] + line) <= width:
ret = chunk[0:i], chunk[i:]
break
return ret
def wrap(msg, width):
'''
Accept a list <msg> of strings to wrap, each string is wrapped to
width <width> and surrounded with a pair of ". The return value is
a string with these wrapped strings joined together with newlines.
'''
if msg.isspace() or not msg:
return '"%s"' % msg
# \ and " is here, but " is special in po files.
break_always = '$%+({['
# XXX what about: « © » ¦ § etc?
break_after_space = '_-=^`~\'<|>&*#@'
enders = '.:,;!?/])}|%-'
extra = string.punctuation
for c in enders:
extra = extra.replace(c, '')
escaped = { 'enders' : re.escape(enders),
'extra' : re.escape(extra) }
regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped
r = re.compile(regex, re.UNICODE)
msg = [ m for m in r.split(msg) if not m == '']
lines = []
line = msg.pop(0)
# Handle \n on end of line
if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \
and msg[-2][-1] == '\\':
msg[-2] += msg[-1]
msg.pop()
# Do not allow a single \n on a line
if len(msg) > 2 and msg[-1] == '\\n':
msg[-2] += msg[-1]
msg.pop()
for m in msg:
if len(line) > width or len(m) > width or len(line + m) > width:
fit, rest = splitted_fit(m, line, width, break_always,
break_after_space)
line += fit
lines.append(line)
line = rest
else:
line += m
lines.append(line)
lines = [ '"%s"' % l for l in lines ]
return '\n'.join(lines)
def normalize(lines):
'''
Normalize <lines>: e.g "\n\nText\n\n" becomes:
"\n"
"\n"
"Text\n"
"\n"
'''
if 0 < lines.find('\\n') < len(lines) - 3:
if lines[-3:] == '\\n"':
lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \
+ '\\n"'
else:
lines = lines.replace('\\n','\\n"\n"').replace('""\n','')
return lines
def wash(msg, idx = 'msgid', width = 80, **kwds):
'''
Do washing on the msgstr or msgid fields. Wrap the text to fit in
width <width>. <msg> is a list of lines that makes up the field.
<idx> indicate msgid or msgstr, <width> holds the width. <filename>
and <lno> (line number) is picked up from <kwds>.
Returns the washed field as a string.
'''
msg = normalize(msg)
lines = msg.splitlines()
size = len(lines)
if size > 1 or len(msg) > width:
washed = []
# The first line is special
m = re.match('^%s "(.*)"$' % (idx, ), lines[0])
if not m:
print lines[0]
kwds['lno'] -= size + 1
raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
% kwds)
washed.append(m.group(1))
if m.group(1).endswith(r'\n'):
washed.append('')
i = 0
for line in lines[1:]:
m = re.match('^"(\s*.*)"$', line)
i += 1
if not m:
print line
kwds['lno'] -= size - i + 1
raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
% kwds)
washed[-1] += m.group(1)
if m.group(1).endswith(r'\n'):
washed.append('')
if washed[0] == '':
washed.pop(0)
if washed[-1] == '':
washed.pop()
washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed.
# One line or multiline
if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width:
washed = '%s %s\n' % (idx, washed[0])
else:
washed = '%s ""\n%s\n' % (idx, '\n'.join(washed))
else:
washed = msg
return washed
def parse(filename, entry):
'''
Parse po or pot file with name <filename>. Set the variable
<entry> to msgid/msgstr to indicate pot/po file. The return value
is a dict with msgid (washed) as key and Msgs instances as
values.
'''
lines = io(filename).readlines()
Msgs.file = filename
messages = {}
last = len(lines)
g = gen(lines)
cmt = autocmt = ref = flag = ''
msgid = False
lno = 0
while not lno == last:
l, lno = g.next()
if l.startswith('# '):
l, lno, g, cmt = slurp(l, g, '# ')
if l.startswith('#.'):
l, lno, g, autocmt = slurp(l, g, '#.')
if l.startswith('#:'):
l, lno, g, ref = slurp(l, g, '#:')
if l.startswith('#,'):
l, lno, g, flag = slurp(l, g, '#,')
if l.startswith('msgid'):
l, lno, g, msgid = slurp(l, g, '"')
if l.startswith('msgstr'):
l, lno, g, msgstr = slurp(l, g, '"')
if not lno == last and not l.strip() == '':
raise MsgmergeError('parse error: %s:%s.' % (filename, lno))
if msgid and entry == 'msgstr':
idx = wash(msgid, filename = filename, lno = lno)
messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt)
msgid = False; msgstr = cmt = autocmt = ref = flag = ''
elif msgid and entry == 'msgid':
idx = wash(msgid, filename = filename, lno = lno)
messages[idx] = Msgs(msgid, msgstr, flag, lno, entry,
autocmt = autocmt, ref = ref)
msgid = False; msgstr = cmt = autocmt = ref = flag = ''
for m in messages.values():
m.wash()
return messages
def fuzzy_match(pot, defs):
'''
Try to find the best difflib match (with ratio > 0.6) between
id of Msgs object <pot> and Msgs in the dict <defs>.
Return value is the Msgs object in <defs> with highest ratio,
False is returned if no suitable Msgs is found.
'''
limit = 0.6
l, po = limit - 0.01, False
s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id())
len2 = len(pot.get_clean_id())
for candidate in defs.values():
if candidate.str == 'msgstr ""\n': # Empty translation
continue
if candidate.id == 'msgid ""\n': # Empty msgid (header)
continue
len1 = len(candidate.get_clean_id())
if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first
continue
s.set_seq1(candidate.get_clean_id())
if s.quick_ratio() < l:
continue
r = s.ratio() # This is expensive
if r > l:
l, po = r, candidate
return po
def flags(po, pot, fuzzy = False, obs = False):
'''
Create flag field from flag field in Msgs objects <po> and
<pot>. When <fuzzy> is true <po>\'s flags are ignored and the
fuzzy flag is added. If <obs> is set then most flags but fuzzy are
removed. If the global variable option.docstrings is set then
docstring flags will not be removed. The return value is a string
which holds the combined flag.
'''
global option
flag = ''
if po.flag or pot.flag or fuzzy:
if not fuzzy:
flag = '%s, %s' % (po.flag.strip(), pot.flag.strip())
else:
flag = '%s, %s' % ('#, fuzzy', pot.flag.strip())
flag = flag.split(', ')
fl = {}
flag = [fl.setdefault(f, f) for f in flag if f not in fl and f]
if not option.docstrings:
try:
flag.remove('docstring')
except ValueError:
pass
if obs:
removes = ['c-format', 'python-format', 'docstring']
for remove in removes:
try:
flag.remove(remove)
except ValueError:
pass
# Put fuzzy first
if 'fuzzy' in flag and not flag.index('fuzzy') == 1:
i = flag.index('fuzzy')
flag[1], flag[i] = flag[i], flag[1]
if len(flag) == 1:
flag = ''
else:
flag = ', '.join(flag) + '\n'
return flag
def add(pot, po, fuzzy = False):
'''
Build a new entry from the Msgs objects <pot> and <pot>. If
<fuzzy> is true, <po>\'s flag field is ignored (in
flags()). Returns a multiline string with a up to date entry.
'''
msg = []
msg.append(po.cmt)
msg.append(pot.autocmt)
msg.append(pot.ref)
msg.append(flags(po, pot, fuzzy = fuzzy))
msg.append(pot.id)
msg.append(po.str)
return ''.join(msg)
def header(pot, defs):
'''
Update date in header entry. Returns the updated header entry.
'''
try:
[po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ]
except ValueError:
raise MsgmergeError('Error: did not find header in po file.')
r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)',
re.MULTILINE | re.DOTALL)
m = r.match(pot.str)
if not m:
raise MsgmergeError(
'Error: did not find POT-Creation-Date field in pot file.')
subs = '\\1%s\\3' % m.group(2)
_, count = r.subn(subs, po.str)
if not count == 1:
raise MsgmergeError(
'Error: did not find POT-Creation-Date field in po file.')
return po
def match(defs, refs):
'''
Try to match Msgs objects in <refs> with Msgs objects in
<defs>. The return value is a list with po entries.
'''
global option
matches = []
empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
deco = [(r.lno, r) for r in refs.values()]
deco.sort()
po = header(deco.pop(0)[1], defs) # Header entry
matches.append(add(empty, po))
po.used()
sorted = [ a[1] for a in deco ]
for pot in sorted:
if option.verbose:
sys.stderr.write('.')
po = defs.get(pot.id, False) # Perfect match
if po:
matches.append(add(pot, po))
po.used(); pot.used()
continue
po = fuzzy_match(pot, defs) # Fuzzy match
if po:
matches.append(add(pot, po, fuzzy = True))
po.used(); pot.used()
continue
matches.append(add(pot, empty)) # No match
obsolete(defs, matches)
return matches
def obsolete(defs, matches):
'''Handle obsolete translations.'''
deco = [ (d.lno, d) for d in defs.values() if
d.count == 0 and not d.str == 'msgstr ""\n' ]
deco.sort()
empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
obs = [ o[1] for o in deco ]
for o in obs:
o.flag = flags(o, empty, obs = True)
o.obsolete()
matches.append('%s%s%s' % (o.flag, o.id, o.str))
def help():
'''Print help text and exit.'''
print __doc__
sys.exit(0)
def cmdline():
'''Parse options and arguments from command line.'''
advice = 'Try `%(name)s --help\' for more information.'
try:
long_opt = ['help', 'version', 'update', 'output-file=',
'quiet', 'silent', 'docstrings', 'suffix', 'backup']
opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt)
except getopt.error, msg:
print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals()
sys.exit(1)
option = Options(cmdline = True)
for opt, arg in opts:
if opt in ['-h', '--help']:
help()
elif opt in ['-V', '--version']:
print '%(name)s %(__version__)s' % globals()
sys.exit(0)
elif opt in ['-o', '--output-file']:
option.outfile = arg
elif opt in ['-U', '--update']:
option.update = True
elif opt in ['-q', '--silent', '--quiet']:
option.verbose = False
elif opt in ['-D', '--docstrings']:
option.docstrings = True
elif opt in ['--suffix']:
option.suffix = arg
elif opt in ['--backup']:
option.backup = arg
# Sanity checks
warn = False
if option.update and option.outfile:
warn = '--update and --output-file are mutually exclusive.'
if len(args) == 0:
warn = 'no input files given.'
elif len(args) == 1 or len(args) > 2:
warn = 'exactly 2 input files required.'
if warn:
print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals()
sys.exit(1)
if option.update:
option.outfile = args[0]
elif not option.outfile:
option.outfile = '-'
defs, refs = args
try:
merge(defs, refs, option = option)
except MsgmergeError, err:
print '%(name)s: ' % globals() + '%s' % err
sys.exit(1)
def io(iofile, mode = 'rU'):
'''Wrapper around open().'''
try:
fo = open(iofile, mode)
if 'r' in mode and fo.read(3) != codecs.BOM_UTF8:
fo.seek(0)
except IOError, msg:
raise MsgmergeError('error while opening file: %s: %s.' %
(msg[1], iofile))
return fo
def backup(infile):
'''Handle backup of files in update mode'''
os.environ.get('VERSION_CONTROL', '')
suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~')
backup_file = '%s%s' % (infile, suffix)
def changes(new, old):
return cmp(''.join(old), '\n'.join(new))
def write(matches, outfile):
'''Write the list <matches> to file <outfile>'''
if not outfile == '-':
fd = io(outfile, 'w')
else:
fd = sys.stdout
fd.write('\n'.join(matches))
def merge(def_file, ref_file, update = True, outfile = '-',
docstrings = True, suffix = '~', backup = True,
verbose = True, **kwds):
'''
Merge po file <def_file> with pot file <ref_file> . If <update> is
set to True then only update if there are changes to the po
file. Set outfile to write updated po file to an another file. Set
to `-\' for writing to standard out. If docstrings is False
docstrings flag will removed. Set verbose to False to suppress
progress indicators. <kwds> is used to pass options from the
command line interface.
'''
global option
option = kwds.get('option', Options(update = update,
outfile = outfile,
docstrings = docstrings,
suffix = suffix,
backup = backup,
verbose = verbose))
def_msgs = parse(def_file, 'msgstr')
ref_msgs = parse(ref_file, 'msgid')
if verbose and not __name__ == '__main__':
print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file)
updated_lines = match(def_msgs, ref_msgs)
if option.verbose:
print >> sys.stderr, ' done.'
if not option.update:
write(updated_lines, option.outfile)
elif option.update and changes(updated_lines, io(def_file).readlines()):
write(updated_lines, def_file)
def merge_dir(directory, pot = False, include = [], exclude = [],
verbose = True):
'''
Tries to merge a directory of po files. Uses simple glob to find
po files and pot file. The parameter <pot> can be used to specify
the pot file in the directory. If the list <include> is given only
files in this list is merged. Use the list <exclude> to exclude
files to be merged. This function is only useful if po files and
pot file are in the same directory. Set <verbose> to get
information when running.
'''
if directory[-1] == '/':
directory = os.path.dirname(directory)
if pot:
pot = os.path.basename(pot)
else:
pot = glob.glob('%s/*.pot' % directory)
if not pot:
raise MsgmergeError('No pot file found.')
elif len(pot) > 1:
raise MsgmergeError('More than one pot file found: %s.' % pot)
pot = os.path.basename(pot[0])
if not include:
pos = glob.glob('%s/*po' % directory)
if not len(pos) > 1:
raise MsgmergeError('No po file(s) found.')
pos = [ os.path.basename(po) for po in pos ]
else:
pos = [ os.path.basename(po) for po in include ]
for po in exclude:
try:
pos.remove(po)
except ValueError:
pass
format = '%s/%s'
for po in pos:
try:
merge(format % (directory, po), format % (directory, pot),
update = True, verbose = verbose,
outfile = format % (directory, po))
except MsgmergeError, err:
if verbose:
print >> sys.stderr, '%s Not updated.' % err
else:
print >> sys.stderr, '%s %s not updated.' % (err, po)
if __name__ == '__main__':
cmdline()

View File

@ -1,672 +0,0 @@
#! /usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@zope.com>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
#
# 2002-11-22 Jürgen Hermann <jh@web.de>
# Added checks that _() only contains string literals, and
# command line args are resolved to module lists, i.e. you
# can now pass a filename, a module or package name, or a
# directory (including globbing chars, important for Win32).
# Made docstring fit in 80 chars wide displays using pydoc.
#
# for selftesting
try:
import fintl
_ = fintl.gettext
except ImportError:
_ = lambda s: s
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
the programming language and can be used from within Python programs.
Martin von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
looking for message strings, but it groks only C (or C++). Python
introduces a few wrinkles, such as dual quoting characters, triple quoted
strings, and raw strings. xgettext understands none of this.
Enter pygettext, which uses Python's standard tokenize module to scan
Python source code, generating .pot files identical to what GNU xgettext[2]
generates for C and C++ code. From there, the standard GNU tools can be
used.
A word about marking Python strings as candidates for translation. GNU
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
and gettext_noop. But those can be a lot of text to include all over your
code. C and C++ have a trick: they use the C preprocessor. Most
internationalized C source includes a #define for gettext() to _() so that
what has to be written in the source is much less. Thus these are both
translatable strings:
gettext("Translatable String")
_("Translatable String")
Python of course has no preprocessor so this doesn't work so well. Thus,
pygettext searches only for _() by default, but see the -k/--keyword flag
below for how to augment this.
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU
xgettext where ever possible. However some options are still missing or are
not fully implemented. Also, xgettext's use of command line switches with
option arguments is broken, and in these cases, pygettext just defines
additional switches.
Usage: pygettext [options] inputfile ...
Options:
-a
--extract-all
Extract all strings.
-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot.
-E
--escape
Replace non-ASCII characters with octal escape sequences.
-D
--docstrings
Extract module, class, method, and function docstrings. These do
not need to be wrapped in _() markers, and in fact cannot be for
Python to consider them docstrings. (See also the -X option).
-h
--help
Print this help message and exit.
-k word
--keyword=word
Keywords to look for in addition to the default set, which are:
%(DEFAULTKEYWORDS)s
You can have multiple -k flags on the command line.
-K
--no-default-keywords
Disable the default set of keywords (see above). Any keywords
explicitly added with the -k/--keyword option are still recognized.
--no-location
Do not write filename/lineno location comments.
-n
--add-location
Write filename/lineno location comments indicating where each
extracted string is found in the source. These lines appear before
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
-o filename
--output=filename
Rename the default output file from messages.pot to filename. If
filename is `-' then the output is sent to standard out.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
supported:
Solaris # File: filename, line: line-number
GNU #: filename:line
The style name is case insensitive. GNU style is the default.
-v
--verbose
Print the names of the files being processed.
-V
--version
Print the version of pygettext and exit.
-w columns
--width=columns
Set width of output to columns.
-x filename
--exclude-file=filename
Specify a file that contains a list of strings that are not be
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
-X filename
--no-docstrings=filename
Specify a file that contains a list of files (one per line) that
should not have their docstrings extracted. This is only useful in
conjunction with the -D option above.
If `inputfile' is -, standard input is read.
""")
import os
import imp
import sys
import glob
import time
import getopt
import token
import tokenize
import operator
from umit.pm.core.const import PM_VERSION
__version__ = '1.5'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
EMPTYSTRING = ''
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
# there.
pot_header = _('''\
# PacketManipulator catalog.
# Copyright (C) 2009 Adriano Montero Marques
# Francesco Piccinno <stack.box@gmail.com>, 2009
#
msgid ""
msgstr ""
"Project-Id-Version: PacketManipulator %(pm_version)s\\n"
"POT-Creation-Date: %(time)s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
"Generated-By: pygettext.py %(version)s\\n"
''')
def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
sys.exit(code)
escapes = []
def make_escapes(pass_iso8859):
global escapes
if pass_iso8859:
# Allow iso-8859 characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
else:
mod = 256
for i in range(256):
if 32 <= (i % mod) <= 126:
escapes.append(chr(i))
else:
escapes.append("\\%03o" % i)
escapes[ord('\\')] = '\\\\'
escapes[ord('\t')] = '\\t'
escapes[ord('\r')] = '\\r'
escapes[ord('\n')] = '\\n'
escapes[ord('\"')] = '\\"'
def escape(s):
global escapes
s = list(s)
for i in range(len(s)):
s[i] = escapes[ord(s[i])]
return EMPTYSTRING.join(s)
def safe_eval(s):
# unwrap quotes, safely
return eval(s, {'__builtins__':{}}, {})
def normalize(s):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines = s.split('\n')
if len(lines) == 1:
s = '"' + escape(s) + '"'
else:
if not lines[-1]:
del lines[-1]
lines[-1] = lines[-1] + '\n'
for i in range(len(lines)):
lines[i] = escape(lines[i])
lineterm = '\\n"\n"'
s = '""\n"' + lineterm.join(lines) + '"'
return s
def containsAny(str, set):
"""Check whether 'str' contains ANY of the chars in 'set'"""
return 1 in [c in str for c in set]
def _visit_pyfiles(list, dirname, names):
"""Helper for getFilesForName()."""
# get extension for python source files
if not globals().has_key('_py_ext'):
global _py_ext
_py_ext = [triple[0] for triple in imp.get_suffixes()
if triple[2] == imp.PY_SOURCE][0]
# don't recurse into CVS directories
if 'CVS' in names:
names.remove('CVS')
# add all *.py files to list
list.extend(
[os.path.join(dirname, file) for file in names
if os.path.splitext(file)[1] == _py_ext]
)
def _get_modpkg_path(dotted_name, pathlist=None):
"""Get the filesystem path for a module or a package.
Return the file system path to a file for a module, and to a directory for
a package. Return None if the name is not found, or is a builtin or
extension module.
"""
# split off top-most name
parts = dotted_name.split('.', 1)
if len(parts) > 1:
# we have a dotted path, import top-level package
try:
file, pathname, description = imp.find_module(parts[0], pathlist)
if file: file.close()
except ImportError:
return None
# check if it's indeed a package
if description[2] == imp.PKG_DIRECTORY:
# recursively handle the remaining name parts
pathname = _get_modpkg_path(parts[1], [pathname])
else:
pathname = None
else:
# plain name
try:
file, pathname, description = imp.find_module(
dotted_name, pathlist)
if file:
file.close()
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
pathname = None
except ImportError:
pathname = None
return pathname
def getFilesForName(name):
"""Get a list of module files for a filename, a module or package name,
or a directory.
"""
if not os.path.exists(name):
# check for glob chars
if containsAny(name, "*?[]"):
files = glob.glob(name)
list = []
for file in files:
list.extend(getFilesForName(file))
return list
# try to find module or package
name = _get_modpkg_path(name)
if not name:
return []
if os.path.isdir(name):
# find all python files in directory
list = []
os.path.walk(name, _visit_pyfiles, list)
return list
elif os.path.exists(name):
# a single file
return [name]
return []
class TokenEater:
def __init__(self, options):
self.__options = options
self.__messages = {}
self.__state = self.__waiting
self.__data = []
self.__lineno = -1
self.__freshmodule = 1
self.__curfile = None
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
## 'tstring:', tstring
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
opts = self.__options
# Do docstring extractions, if enabled
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0
return
# class docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
def __suiteseen(self, ttype, tstring, lineno):
# ignore anything until we see the colon
if ttype == tokenize.OP and tstring == ':':
self.__state = self.__suitedocstring
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
self.__data = []
self.__lineno = lineno
self.__state = self.__openseen
else:
self.__state = self.__waiting
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0):
if lineno is None:
lineno = self.__lineno
if not msg in self.__options.toexclude:
entry = (self.__curfile, lineno)
self.__messages.setdefault(msg, {})[entry] = isdocstring
def set_filename(self, filename):
self.__curfile = filename
self.__freshmodule = 1
def write(self, fp):
options = self.__options
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
# The time stamp in the header doesn't have the same format as that
# generated by xgettext...
print >> fp, pot_header % {'time': timestamp, 'version': __version__,
'pm_version': PM_VERSION}
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse = {}
for k, v in self.__messages.items():
keys = v.keys()
keys.sort()
reverse.setdefault(tuple(keys), []).append((k, v))
rkeys = reverse.keys()
rkeys.sort()
for rkey in rkeys:
rentries = reverse[rkey]
rentries.sort()
for k, v in rentries:
isdocstring = 0
# If the entry was gleaned out of a docstring, then add a
# comment stating so. This is to aid translators who may wish
# to skip translating some unimportant docstrings.
if reduce(operator.__add__, v.values()):
isdocstring = 1
# k is the message string, v is a dictionary-set of (filename,
# lineno) tuples. We want to sort the entries in v first by
# file name and then by line number.
v = v.keys()
v.sort()
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
print >>fp, _(
'# File: %(filename)s, line: %(lineno)d') % d
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print >> fp, locline
locline = "#:" + s
if len(locline) > 2:
print >> fp, locline
if isdocstring:
print >> fp, '#, docstring'
print >> fp, 'msgid', normalize(k)
print >> fp, 'msgstr ""\n'
def main():
global default_keywords
try:
opts, args = getopt.getopt(
sys.argv[1:],
'ad:DEhk:Kno:p:S:Vvw:x:X:',
['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings',
])
except getopt.error, msg:
usage(1, msg)
# for holding option values
class Options:
# constants
GNU = 1
SOLARIS = 2
# defaults
extractall = 0 # FIXME: currently this option has no effect at all.
escape = 0
keywords = []
outpath = ''
outfile = 'messages.pot'
writelocations = 1
locationstyle = GNU
verbose = 0
width = 78
excludefilename = ''
docstrings = 0
nodocstrings = {}
options = Options()
locations = {'gnu' : options.GNU,
'solaris' : options.SOLARIS,
}
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
elif opt in ('-D', '--docstrings'):
options.docstrings = 1
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):
default_keywords = []
elif opt in ('-n', '--add-location'):
options.writelocations = 1
elif opt in ('--no-location',):
options.writelocations = 0
elif opt in ('-S', '--style'):
options.locationstyle = locations.get(arg.lower())
if options.locationstyle is None:
usage(1, _('Invalid value for --style: %s') % arg)
elif opt in ('-o', '--output'):
options.outfile = arg
elif opt in ('-p', '--output-dir'):
options.outpath = arg
elif opt in ('-v', '--verbose'):
options.verbose = 1
elif opt in ('-V', '--version'):
print _('pygettext.py (xgettext for Python) %s') % __version__
sys.exit(0)
elif opt in ('-w', '--width'):
try:
options.width = int(arg)
except ValueError:
usage(1, _('--width argument must be an integer: %s') % arg)
elif opt in ('-x', '--exclude-file'):
options.excludefilename = arg
elif opt in ('-X', '--no-docstrings'):
fp = open(arg)
try:
while 1:
line = fp.readline()
if not line:
break
options.nodocstrings[line[:-1]] = 1
finally:
fp.close()
# calculate escapes
make_escapes(options.escape)
# calculate all keywords
options.keywords.extend(default_keywords)
# initialize list of strings to exclude
if options.excludefilename:
try:
fp = open(options.excludefilename)
options.toexclude = fp.readlines()
fp.close()
except IOError:
print >> sys.stderr, _(
"Can't read --exclude-file: %s") % options.excludefilename
sys.exit(1)
else:
options.toexclude = []
# resolve args to module lists
expanded = []
for arg in args:
if arg == '-':
expanded.append(arg)
else:
expanded.extend(getFilesForName(arg))
args = expanded
# slurp through all the files
eater = TokenEater(options)
for filename in args:
if filename == '-':
if options.verbose:
print _('Reading standard input')
fp = sys.stdin
closep = 0
else:
if options.verbose:
print _('Working on %s') % filename
fp = open(filename)
closep = 1
try:
eater.set_filename(filename)
try:
tokenize.tokenize(fp.readline, eater)
except tokenize.TokenError, e:
print >> sys.stderr, '%s: %s, line %d, column %d' % (
e[0], filename, e[1][0], e[1][1])
finally:
if closep:
fp.close()
# write the output
if options.outfile == '-':
fp = sys.stdout
closep = 0
else:
if options.outpath:
options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w')
closep = 1
try:
eater.write(fp)
finally:
if closep:
fp.close()
if __name__ == '__main__':
main()
# some more test strings
_(u'a unicode string')
# this one creates a warning
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
_('more' 'than' 'one' 'string')