mirror of
https://github.com/holub/mame
synced 2025-04-22 08:22:15 +03:00
Placed official version, and removed other files since work only with parsing python (nw)
This commit is contained in:
parent
78ada55150
commit
be7cc43b08
@ -1,490 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
#
|
||||
# check_po - a gramps tool to check validity of po files
|
||||
#
|
||||
# Copyright (C) 2006-2006 Kees Bakker
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
#
|
||||
# TODO
|
||||
#
|
||||
# * Check for HTML text in msgstr when there is none in msgid
|
||||
# * Check for matching HTML tag/endtag in msgstr
|
||||
#
|
||||
|
||||
# Adapted for Umit by Guilherme Polo, original file:
|
||||
# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po
|
||||
|
||||
import re
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
APP = "Umit"
|
||||
|
||||
all_total = {}
|
||||
all_fuzzy = {}
|
||||
all_untranslated = {}
|
||||
all_percent_s = {}
|
||||
all_named_s = {}
|
||||
all_bnamed_s = {}
|
||||
all_context = {}
|
||||
all_coverage = {}
|
||||
all_template_coverage = {}
|
||||
|
||||
def strip_quotes(st):
|
||||
if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
|
||||
st = st.strip()[1:-1]
|
||||
return st
|
||||
|
||||
# This is a base class for all checks
|
||||
class Check:
|
||||
def __init__( self ):
|
||||
self.msgs = []
|
||||
def diag( self ):
|
||||
if len( self.msgs ):
|
||||
print
|
||||
print self.diag_header
|
||||
for m in self.msgs:
|
||||
m.diag()
|
||||
def summary( self ):
|
||||
print "%-20s%d" % ( self.summary_text, len(self.msgs) )
|
||||
|
||||
class Check_fmt( Check ):
|
||||
def __init__( self, fmt ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- %s mismatches --------------" % fmt
|
||||
self.summary_text = "%s mismatches:" % fmt
|
||||
self.fmt = fmt
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
cnt1 = msgid.count( self.fmt )
|
||||
cnt2 = msgstr.count( self.fmt )
|
||||
if cnt1 != cnt2:
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Check_named_fmt( Check ):
|
||||
# A pattern to find all %()
|
||||
find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)
|
||||
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- %() name mismatches --------------"
|
||||
self.summary_text = "%() name mismatches:"
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
# Same number of named formats?
|
||||
fmts1 = self.find_named_fmt_pat.findall( msgid )
|
||||
fmts2 = self.find_named_fmt_pat.findall( msgstr )
|
||||
if len( fmts1 ) != len( fmts2 ):
|
||||
self.msgs.append( msg )
|
||||
else:
|
||||
# Do we have the same named formats?
|
||||
fmts1.sort()
|
||||
fmts2.sort()
|
||||
if fmts1 != fmts2:
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Check_missing_sd( Check ):
|
||||
# A pattern to find %() without s or d
|
||||
# Here is a command to use for testing
|
||||
# print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
|
||||
find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)
|
||||
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
|
||||
self.summary_text = "%() missing s/d:"
|
||||
def process( self, msg ):
|
||||
msgstr = msg.msgstr
|
||||
fmts = self.find_named_fmt_pat2.findall( msgstr )
|
||||
for f in fmts:
|
||||
if not f in ('s', 'd'):
|
||||
self.msgs.append( msg )
|
||||
break
|
||||
|
||||
class Check_runaway( Check ):
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- Runaway context in translation ---------"
|
||||
self.summary_text = "Runaway context:"
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
|
||||
# Runaway context. In the translated part we only to see
|
||||
# the translation of the word after the |
|
||||
if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr:
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Check_xml_chars( Check ):
|
||||
# Special XML characters
|
||||
# It is not allowed to have a quote, an ampersand or an angle bracket
|
||||
xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )
|
||||
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- unescaped XML special characters ---------"
|
||||
self.summary_text = "XML special chars:"
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
|
||||
# XML errors
|
||||
# Only look at messages in the tips.xml
|
||||
if msg.is_tips_xml:
|
||||
if self.xml_chars_pat.search( msgstr ):
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Check_last_char( Check ):
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- last character not identical ---------"
|
||||
self.summary_text = "Last character:"
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
|
||||
# Last character of msgid? White space? Period?
|
||||
if msg.is_fuzzy:
|
||||
return
|
||||
|
||||
msgid_last = msgid[-1:]
|
||||
msgstr_last = msgstr[-1:]
|
||||
if msgid_last.isspace() != msgstr_last.isspace():
|
||||
self.msgs.append( msg )
|
||||
elif (msgid_last == '.') != (msgstr_last == '.'):
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Check_shortcut_trans( Check ):
|
||||
def __init__( self ):
|
||||
Check.__init__( self )
|
||||
self.diag_header = "-------- shortcut key in translation ---------"
|
||||
self.summary_text = "Shortcut in msgstr:"
|
||||
def process( self, msg ):
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
|
||||
if msgid.count('_') == 0 and msgstr.count('_') > 0:
|
||||
self.msgs.append( msg )
|
||||
|
||||
class Msgid:
|
||||
fuzzy_pat = re.compile( 'fuzzy' )
|
||||
tips_xml_pat = re.compile( r'tips\.xml' )
|
||||
def __init__( self, msgnr, lineno ):
|
||||
self._msgid = []
|
||||
self._msgstr = []
|
||||
self.msgid = ''
|
||||
self.msgstr = ''
|
||||
self._cmnt = []
|
||||
self.nr = msgnr
|
||||
self.lineno = lineno
|
||||
self.is_fuzzy = 0
|
||||
self.is_tips_xml = 0
|
||||
|
||||
def diag( self ):
|
||||
if 1:
|
||||
print
|
||||
print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" )
|
||||
sys.stdout.write( ''.join( self._msgid ) )
|
||||
sys.stdout.write( ''.join( self._msgstr ) )
|
||||
else:
|
||||
# Compatible with the old check_po
|
||||
print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr )
|
||||
|
||||
def add_msgid( self, line, lineno ):
|
||||
self._msgid.append( line )
|
||||
line = re.sub( r'msgid\s+', '', line )
|
||||
line = line.strip()
|
||||
if line[0] != '"' or line[-1:] != '"':
|
||||
print "ERROR at line %d: Missing quote." % lineno
|
||||
line = strip_quotes( line )
|
||||
self.msgid += line
|
||||
|
||||
def add_msgstr( self, line, lineno ):
|
||||
self._msgstr.append( line )
|
||||
line = re.sub( r'msgstr\s+', '', line )
|
||||
line = line.strip()
|
||||
if line[0] != '"' or line[-1:] != '"':
|
||||
print "ERROR at line %d: Missing quote." % lineno
|
||||
line = strip_quotes( line )
|
||||
self.msgstr += line
|
||||
|
||||
def add_cmnt( self, line ):
|
||||
self._cmnt.append( line )
|
||||
if not self.is_fuzzy and self.fuzzy_pat.search( line ):
|
||||
self.is_fuzzy = 1
|
||||
if not self.is_tips_xml and self.tips_xml_pat.search( line ):
|
||||
self.is_tips_xml = 1
|
||||
|
||||
def read_msgs( fname ):
|
||||
empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
|
||||
comment_pat = re.compile( r'\#', re.VERBOSE )
|
||||
msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
|
||||
msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE )
|
||||
str_pat = re.compile( r'"', re.VERBOSE )
|
||||
old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
|
||||
|
||||
msgnr = 0 # This is the message number of the next message to read. The first real message is 1.
|
||||
f = open( fname )
|
||||
lines = f.readlines()
|
||||
|
||||
# parse it like a statemachine
|
||||
NONE = 0 # Nothing detected, yet
|
||||
CMNT = 1 # Inside comment part
|
||||
MSGID = 2 # Inside msgid part
|
||||
MSGSTR = 3 # Inside msgstr part
|
||||
STR = 4 # A continuation string
|
||||
OLD = 5 # An old pattern with #~
|
||||
|
||||
state = NONE
|
||||
msg = None
|
||||
msgs = []
|
||||
|
||||
for ix in range( len(lines) ): # Use line numbers for messages
|
||||
line = lines[ix]
|
||||
lineno = ix + 1
|
||||
|
||||
m = empty_pat.match( line )
|
||||
if m:
|
||||
continue # Empty lines are not interesting
|
||||
|
||||
# What's the next state?
|
||||
if old_pat.match( line ):
|
||||
next_state = OLD
|
||||
elif comment_pat.match( line ):
|
||||
next_state = CMNT
|
||||
elif msgid_pat.match( line ):
|
||||
next_state = MSGID
|
||||
elif msgstr_pat.match( line ):
|
||||
next_state = MSGSTR
|
||||
elif str_pat.match( line ):
|
||||
next_state = STR
|
||||
else:
|
||||
print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars()
|
||||
next_state = NONE
|
||||
|
||||
#print "%(state)d->%(next_state)d\t%(line)s" % vars()
|
||||
if state == NONE:
|
||||
# expect msgid or comment or old stuff
|
||||
if next_state == CMNT:
|
||||
state = CMNT
|
||||
msg = Msgid( msgnr, lineno ) # Start with an empty new item
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_cmnt( line )
|
||||
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
msg = Msgid( msgnr, lineno ) # Start with an empty new item
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line, lineno )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
|
||||
state = MSGSTR
|
||||
msg = Msgid( msgnr, lineno ) # Start with an empty new item
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_msgstr( line, lineno )
|
||||
|
||||
elif next_state == STR:
|
||||
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == OLD:
|
||||
pass # Just skip
|
||||
|
||||
elif state == CMNT:
|
||||
if next_state == CMNT:
|
||||
if msg:
|
||||
msg.add_cmnt( line )
|
||||
else:
|
||||
# Note. We may need to do something about these comments
|
||||
# Skip for now
|
||||
pass
|
||||
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
if not msg:
|
||||
msg = Msgid( msgnr, lineno ) # Start with an empty new item
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line, lineno )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
|
||||
state = MSGSTR
|
||||
msg = Msgid( msgnr, lineno ) # Start with an empty new item
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_msgstr( line, lineno )
|
||||
|
||||
elif next_state == STR:
|
||||
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
elif state == MSGID:
|
||||
if next_state == CMNT:
|
||||
# Hmmm. A comment here?
|
||||
print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == MSGID:
|
||||
raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
state = MSGSTR
|
||||
msg.add_msgstr( line, lineno )
|
||||
|
||||
elif next_state == STR:
|
||||
msg.add_msgid( line, lineno )
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
elif state == MSGSTR:
|
||||
if next_state == CMNT:
|
||||
# A comment probably starts a new item
|
||||
state = CMNT
|
||||
msg = Msgid( msgnr, lineno )
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_cmnt( line )
|
||||
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
msg = Msgid( msgnr, lineno )
|
||||
msgnr += 1
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line, lineno )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
|
||||
|
||||
elif next_state == STR:
|
||||
msg.add_msgstr( line, lineno )
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
else:
|
||||
raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
|
||||
|
||||
# Strip items with just comments. (Can this happen?)
|
||||
msgs1 = []
|
||||
for m in msgs:
|
||||
if not m.msgid and not m.msgstr:
|
||||
#print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
|
||||
pass
|
||||
else:
|
||||
msgs1.append( m )
|
||||
msgs = msgs1
|
||||
return msgs
|
||||
|
||||
def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ):
|
||||
nr_fuzzy = 0
|
||||
nr_untranslated = 0
|
||||
|
||||
checks = []
|
||||
checks.append( Check_fmt( '%s' ) )
|
||||
checks.append( Check_fmt( '%d' ) )
|
||||
checks.append( Check_named_fmt() )
|
||||
checks.append( Check_missing_sd() )
|
||||
checks.append( Check_runaway() )
|
||||
checks.append( Check_xml_chars() )
|
||||
checks.append( Check_last_char() )
|
||||
checks.append( Check_shortcut_trans() )
|
||||
|
||||
for msg in msgs:
|
||||
msgid = msg.msgid
|
||||
msgstr = msg.msgstr
|
||||
#print
|
||||
#print "msgid: %(msgid)s" % vars()
|
||||
#print "msgstr: %(msgstr)s" % vars()
|
||||
|
||||
if not msgstr:
|
||||
nr_untranslated += 1
|
||||
continue
|
||||
|
||||
if msg.is_fuzzy:
|
||||
nr_fuzzy += 1
|
||||
if options.skip_fuzzy:
|
||||
continue
|
||||
|
||||
for c in checks:
|
||||
c.process( msg )
|
||||
|
||||
nr_msgs = len(msgs)
|
||||
if nth > 0:
|
||||
print
|
||||
print "====================================="
|
||||
print "%-20s%s" % ( "File:", fname )
|
||||
print "%-20s%d" % ( "Template total:", nr_templates )
|
||||
print "%-20s%d" % ( "PO total:", nr_msgs )
|
||||
print "%-20s%d" % ( "Fuzzy:", nr_fuzzy )
|
||||
print "%-20s%d" % ( "Untranslated:", nr_untranslated )
|
||||
|
||||
for c in checks:
|
||||
c.summary()
|
||||
|
||||
po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
|
||||
print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage )
|
||||
|
||||
template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
|
||||
print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
|
||||
|
||||
if not options.only_summary:
|
||||
for c in checks:
|
||||
c.diag()
|
||||
|
||||
def main(args):
|
||||
if len(sys.argv) < 2:
|
||||
print "Error: Especify the umit.pot file path"
|
||||
sys.exit(1)
|
||||
|
||||
parser = OptionParser(description="This program validates a PO file for "
|
||||
"%s." % APP, usage='%prog [options] po-file...' )
|
||||
|
||||
parser.add_option("", "--skip-fuzzy",
|
||||
action="store_true", dest="skip_fuzzy", default=False,
|
||||
help="skip fuzzies")
|
||||
|
||||
parser.add_option("-s", "--only-summary",
|
||||
action="store_true", dest="only_summary", default=False,
|
||||
help="only give the summary")
|
||||
|
||||
options, args = parser.parse_args()
|
||||
|
||||
try:
|
||||
pot_msgs = read_msgs(sys.argv[1])
|
||||
nr_templates = len(pot_msgs)
|
||||
nth = 0
|
||||
for fname in args:
|
||||
msgs = read_msgs(fname)
|
||||
analyze_msgs(options, fname, msgs, nr_templates, nth)
|
||||
nth += 1
|
||||
|
||||
except Exception, e:
|
||||
print e
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
@ -1,14 +1,6 @@
|
||||
#! /usr/bin/env python
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
|
||||
#
|
||||
# Changelog: (Guilherme Polo)
|
||||
# 2008-04-11
|
||||
# - Support for files with BOM UTF8 mark.
|
||||
#
|
||||
# 2008-04-10
|
||||
# - Support for fuzzy strings in output.
|
||||
# - Bumped to version 1.1.1
|
||||
|
||||
"""Generate binary message catalog from textual translation description.
|
||||
|
||||
@ -24,10 +16,6 @@ Options:
|
||||
Specify the output file to write to. If omitted, output will go to a
|
||||
file named filename.mo (based off the input file name).
|
||||
|
||||
-f
|
||||
--use-fuzzy
|
||||
Use fuzzy entries in output
|
||||
|
||||
-h
|
||||
--help
|
||||
Print this message and exit.
|
||||
@ -35,23 +23,21 @@ Options:
|
||||
-V
|
||||
--version
|
||||
Display version information and exit.
|
||||
|
||||
Before using the -f (fuzzy) option, read this:
|
||||
http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
import ast
|
||||
import getopt
|
||||
import struct
|
||||
import array
|
||||
import codecs
|
||||
|
||||
__version__ = "1.1.1"
|
||||
__version__ = "1.1"
|
||||
|
||||
MESSAGES = {}
|
||||
|
||||
|
||||
|
||||
def usage(code, msg=''):
|
||||
print >> sys.stderr, __doc__
|
||||
if msg:
|
||||
@ -59,13 +45,15 @@ def usage(code, msg=''):
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
def add(id, str, fuzzy, use_fuzzy):
|
||||
"Add a translation to the dictionary."
|
||||
|
||||
def add(id, str, fuzzy):
|
||||
"Add a non-fuzzy translation to the dictionary."
|
||||
global MESSAGES
|
||||
if (not fuzzy or use_fuzzy) and str:
|
||||
if not fuzzy and str:
|
||||
MESSAGES[id] = str
|
||||
|
||||
|
||||
|
||||
def generate():
|
||||
"Return the generated output."
|
||||
global MESSAGES
|
||||
@ -108,7 +96,8 @@ def generate():
|
||||
return output
|
||||
|
||||
|
||||
def make(filename, outfile, use_fuzzy):
|
||||
|
||||
def make(filename, outfile):
|
||||
ID = 1
|
||||
STR = 2
|
||||
|
||||
@ -122,8 +111,6 @@ def make(filename, outfile, use_fuzzy):
|
||||
|
||||
try:
|
||||
lines = open(infile).readlines()
|
||||
if lines[0].startswith(codecs.BOM_UTF8):
|
||||
lines[0] = lines[0][len(codecs.BOM_UTF8):]
|
||||
except IOError, msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(1)
|
||||
@ -137,7 +124,7 @@ def make(filename, outfile, use_fuzzy):
|
||||
lno += 1
|
||||
# If we get a comment line after a msgstr, this is a new entry
|
||||
if l[0] == '#' and section == STR:
|
||||
add(msgid, msgstr, fuzzy, use_fuzzy)
|
||||
add(msgid, msgstr, fuzzy)
|
||||
section = None
|
||||
fuzzy = 0
|
||||
# Record a fuzzy mark
|
||||
@ -147,22 +134,44 @@ def make(filename, outfile, use_fuzzy):
|
||||
if l[0] == '#':
|
||||
continue
|
||||
# Now we are in a msgid section, output previous section
|
||||
if l.startswith('msgid'):
|
||||
if l.startswith('msgid') and not l.startswith('msgid_plural'):
|
||||
if section == STR:
|
||||
add(msgid, msgstr, fuzzy, use_fuzzy)
|
||||
add(msgid, msgstr, fuzzy)
|
||||
section = ID
|
||||
l = l[5:]
|
||||
msgid = msgstr = ''
|
||||
is_plural = False
|
||||
# This is a message with plural forms
|
||||
elif l.startswith('msgid_plural'):
|
||||
if section != ID:
|
||||
print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
|
||||
(infile, lno)
|
||||
sys.exit(1)
|
||||
l = l[12:]
|
||||
msgid += '\0' # separator of singular and plural
|
||||
is_plural = True
|
||||
# Now we are in a msgstr section
|
||||
elif l.startswith('msgstr'):
|
||||
section = STR
|
||||
l = l[6:]
|
||||
if l.startswith('msgstr['):
|
||||
if not is_plural:
|
||||
print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
|
||||
(infile, lno)
|
||||
sys.exit(1)
|
||||
l = l.split(']', 1)[1]
|
||||
if msgstr:
|
||||
msgstr += '\0' # Separator of the various plural forms
|
||||
else:
|
||||
if is_plural:
|
||||
print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\
|
||||
(infile, lno)
|
||||
sys.exit(1)
|
||||
l = l[6:]
|
||||
# Skip empty lines
|
||||
l = l.strip()
|
||||
if not l:
|
||||
continue
|
||||
# XXX: Does this always follow Python escape semantics?
|
||||
l = eval(l)
|
||||
l = ast.literal_eval(l)
|
||||
if section == ID:
|
||||
msgid += l
|
||||
elif section == STR:
|
||||
@ -174,7 +183,7 @@ def make(filename, outfile, use_fuzzy):
|
||||
sys.exit(1)
|
||||
# Add last entry
|
||||
if section == STR:
|
||||
add(msgid, msgstr, fuzzy, use_fuzzy)
|
||||
add(msgid, msgstr, fuzzy)
|
||||
|
||||
# Compute output
|
||||
output = generate()
|
||||
@ -185,15 +194,15 @@ def make(filename, outfile, use_fuzzy):
|
||||
print >> sys.stderr, msg
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVo:f',
|
||||
['help', 'version', 'output-file=', 'use-fuzzy'])
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
|
||||
['help', 'version', 'output-file='])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
outfile = None
|
||||
use_fuzzy = False
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
@ -201,8 +210,6 @@ def main():
|
||||
elif opt in ('-V', '--version'):
|
||||
print >> sys.stderr, "msgfmt.py", __version__
|
||||
sys.exit(0)
|
||||
elif opt in ('-f', '--use-fuzzy'):
|
||||
use_fuzzy = True
|
||||
elif opt in ('-o', '--output-file'):
|
||||
outfile = arg
|
||||
# do it
|
||||
@ -212,7 +219,7 @@ def main():
|
||||
return
|
||||
|
||||
for filename in args:
|
||||
make(filename, outfile, use_fuzzy)
|
||||
make(filename, outfile)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,675 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
#
|
||||
# Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003.
|
||||
#
|
||||
'''Merge two Uniforum style .po files together.
|
||||
|
||||
This is a implementation (not complete) in Python of the GNU
|
||||
msgmerge(1) program. It can be used on the command line (or as a Python
|
||||
module).
|
||||
|
||||
Usage: msgmerge.py [OPTIONS] def.po ref.pot
|
||||
|
||||
The def.po file is an existing PO file with translations. The ref.pot
|
||||
file is the last created PO file with up-to-date source references but
|
||||
old translations, or a PO Template file.
|
||||
|
||||
Options:
|
||||
-U, --update update def.po,
|
||||
do nothing if def.po is already up to date.
|
||||
-o, --output-file=FILE write output to file FILE. Output is written to
|
||||
stdout if set to - or if the option is not present.
|
||||
-D, --docstrings don\'t remove docstring flag.
|
||||
-h, --help display help text and exit.
|
||||
-V, --version display version and exit.
|
||||
-q, --quiet, --silent suppress progress indicators.
|
||||
'''
|
||||
from __future__ import generators
|
||||
|
||||
if not __name__ == '__main__':
|
||||
__doc__ += '''\
|
||||
|
||||
When used as module the interesting functions are merge() and
|
||||
merge_dir().
|
||||
|
||||
The merge() function does the same as the command line version, and
|
||||
the arguments are as follows. The first argument is the def.po file,
|
||||
then the ref.pot file. The third argument controls whether do work in
|
||||
update mode or not, then the next argument sets the output file. Set
|
||||
the next argument to False to remove docstring flags. The last
|
||||
argument can be used to suppress progress indicators. The default is
|
||||
to work in update mode with progress indicators.
|
||||
|
||||
Example:
|
||||
merge("def.po", "ref.pot")
|
||||
merge the files def.po and ref.pot and write output to def.po if
|
||||
there are any changes.
|
||||
merge("def.po", "red.pot", docstrings = False, verbose = False,
|
||||
update = False, outfile = "-")
|
||||
merge the files def.po and ref.pot and write output to stdout,
|
||||
remove docstring flag and be quiet.
|
||||
|
||||
The merge_dir() function is useful when merging a directory of po
|
||||
files. The only required argument is the name of the directory with po
|
||||
files and the pot file. It will use simple glob to find the files. The
|
||||
second argument can be used to specify the pot file (in the
|
||||
directory). Third argument is a list of po files (then globbing will
|
||||
not be used) and the next argument is list of filename to exclude. The
|
||||
last argument can be used to suppress progress indicators. Docstring
|
||||
flag will not be removed.
|
||||
|
||||
Example:
|
||||
merge_dir("po")
|
||||
merge (and update) all po files in directory po with the single pot
|
||||
file in the same directory.
|
||||
|
||||
The module raises the MsgmergeError exception in case of error.
|
||||
'''
|
||||
__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $'
|
||||
__version__ = '0.1'
|
||||
name = 'msgmerge.py'
|
||||
|
||||
__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ]
|
||||
|
||||
import sys
|
||||
import re
|
||||
import string
|
||||
import getopt
|
||||
import difflib
|
||||
import glob
|
||||
import os.path
|
||||
import codecs
|
||||
|
||||
try:
|
||||
True, False
|
||||
except NameError:
|
||||
True, False = 1, 0
|
||||
|
||||
class Msgs:
|
||||
'''Class to hold information about messages.'''
|
||||
width = 80
|
||||
file = ''
|
||||
def __init__(self, msgid, msgstr, flag, lno, entry, **kwds):
|
||||
self.id = msgid
|
||||
self.str = msgstr
|
||||
self.cmt = kwds.get('cmt', '')
|
||||
self.ref = kwds.get('ref', '')
|
||||
self.autocmt = kwds.get('autocmt', '')
|
||||
self.flag = flag
|
||||
self.entry = entry
|
||||
self.lno = lno
|
||||
self.count = 0
|
||||
def wash(self):
|
||||
self.id = wash(self.id, width = self.width,
|
||||
filename = self.file, lno = self.lno)
|
||||
self.str = wash(self.str, 'msgstr', width = self.width,
|
||||
filename = self.file, lno = self.lno)
|
||||
def used(self):
|
||||
self.count += 1
|
||||
def get_clean_id(self):
|
||||
return self.id.replace('msgid "','', 1)
|
||||
def obsolete(self):
|
||||
self.width -= len('#~ ')
|
||||
self.wash()
|
||||
t = [ '#~ %s\n' % s for s in self.id.splitlines() ]
|
||||
self.id = ''.join(t)
|
||||
t = [ '#~ %s\n' % s for s in self.str.splitlines() ]
|
||||
self.str = ''.join(t)
|
||||
|
||||
class Options:
|
||||
'''Class to hold options'''
|
||||
def __init__(self, cmdline = False, **kwds):
|
||||
if not cmdline:
|
||||
self.update = kwds.get('update', True)
|
||||
self.outfile = kwds.get('outfile', '-')
|
||||
self.docstrings = kwds.get('docstrings', True)
|
||||
self.verbose = kwds.get('verbose', False)
|
||||
self.suffix = kwds.get('suffix', '~')
|
||||
self.backup = kwds.get('backup', True)
|
||||
else:
|
||||
self.update = False
|
||||
self.outfile = False
|
||||
self.docstrings = False
|
||||
self.verbose = True
|
||||
self.suffix = '~'
|
||||
self.backup = True
|
||||
|
||||
class MsgmergeError(Exception):
|
||||
'''Exception class for msgmerge'''
|
||||
|
||||
def gen(lines):
|
||||
'''
|
||||
Generator which returns a line (with the obsolete prefix removed)
|
||||
from the list of lines in <lines>, the line number is also
|
||||
returned.
|
||||
'''
|
||||
lno = 0
|
||||
for l in lines:
|
||||
lno += 1
|
||||
yield l.replace('#~ ', '', 1), lno
|
||||
yield l, lno
|
||||
|
||||
def slurp(s, g, sign):
|
||||
'''
|
||||
The string returned from iterator <g>\'s next() method is added to
|
||||
the string <s> if string returned is beginning with the string
|
||||
<sign>. The return value is the first returned string which do not
|
||||
start with <sign>, the line number, the iterator <g> and the
|
||||
(possibly) updated string <s>.
|
||||
'''
|
||||
l, lno = g.next()
|
||||
while l.startswith(sign) or (sign == '# ' and l.strip() == '#'):
|
||||
s += l
|
||||
l, lno = g.next()
|
||||
return l, lno, g, s
|
||||
|
||||
def splitted_fit(chunk, line, width, break_always, break_after_space):
|
||||
'''
|
||||
Check if string <chunk> can be splitted by newline to fit into
|
||||
string <line> with width smaller than <width>. The return value is
|
||||
a tuple where the first element is the part of chunk which fits
|
||||
and the second element is the rest of chunk.
|
||||
'''
|
||||
ret = '', chunk
|
||||
l = len(chunk)
|
||||
for i in range(l - 1, -1, -1):
|
||||
if chunk[i] in break_always and len(chunk[0:i] + line) <= width:
|
||||
ret = chunk[0:i], chunk[i:]
|
||||
break
|
||||
elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '':
|
||||
ret = chunk[0:i], chunk[i:]
|
||||
break
|
||||
elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \
|
||||
and len(chunk[0:i] + line) <= width:
|
||||
ret = chunk[0:i], chunk[i:]
|
||||
break
|
||||
return ret
|
||||
|
||||
def wrap(msg, width):
|
||||
'''
|
||||
Accept a list <msg> of strings to wrap, each string is wrapped to
|
||||
width <width> and surrounded with a pair of ". The return value is
|
||||
a string with these wrapped strings joined together with newlines.
|
||||
'''
|
||||
if msg.isspace() or not msg:
|
||||
return '"%s"' % msg
|
||||
|
||||
# \ and " is here, but " is special in po files.
|
||||
break_always = '$%+({['
|
||||
# XXX what about: « © » ¦ § etc?
|
||||
break_after_space = '_-=^`~\'<|>&*#@'
|
||||
enders = '.:,;!?/])}|%-'
|
||||
extra = string.punctuation
|
||||
for c in enders:
|
||||
extra = extra.replace(c, '')
|
||||
escaped = { 'enders' : re.escape(enders),
|
||||
'extra' : re.escape(extra) }
|
||||
regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped
|
||||
r = re.compile(regex, re.UNICODE)
|
||||
msg = [ m for m in r.split(msg) if not m == '']
|
||||
|
||||
lines = []
|
||||
line = msg.pop(0)
|
||||
|
||||
# Handle \n on end of line
|
||||
if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \
|
||||
and msg[-2][-1] == '\\':
|
||||
msg[-2] += msg[-1]
|
||||
msg.pop()
|
||||
# Do not allow a single \n on a line
|
||||
if len(msg) > 2 and msg[-1] == '\\n':
|
||||
msg[-2] += msg[-1]
|
||||
msg.pop()
|
||||
|
||||
for m in msg:
|
||||
if len(line) > width or len(m) > width or len(line + m) > width:
|
||||
fit, rest = splitted_fit(m, line, width, break_always,
|
||||
break_after_space)
|
||||
line += fit
|
||||
lines.append(line)
|
||||
line = rest
|
||||
else:
|
||||
line += m
|
||||
lines.append(line)
|
||||
lines = [ '"%s"' % l for l in lines ]
|
||||
return '\n'.join(lines)
|
||||
|
||||
def normalize(lines):
|
||||
'''
|
||||
Normalize <lines>: e.g "\n\nText\n\n" becomes:
|
||||
"\n"
|
||||
"\n"
|
||||
"Text\n"
|
||||
"\n"
|
||||
'''
|
||||
if 0 < lines.find('\\n') < len(lines) - 3:
|
||||
if lines[-3:] == '\\n"':
|
||||
lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \
|
||||
+ '\\n"'
|
||||
else:
|
||||
lines = lines.replace('\\n','\\n"\n"').replace('""\n','')
|
||||
return lines
|
||||
|
||||
def wash(msg, idx = 'msgid', width = 80, **kwds):
|
||||
'''
|
||||
Do washing on the msgstr or msgid fields. Wrap the text to fit in
|
||||
width <width>. <msg> is a list of lines that makes up the field.
|
||||
<idx> indicate msgid or msgstr, <width> holds the width. <filename>
|
||||
and <lno> (line number) is picked up from <kwds>.
|
||||
Returns the washed field as a string.
|
||||
'''
|
||||
msg = normalize(msg)
|
||||
lines = msg.splitlines()
|
||||
size = len(lines)
|
||||
if size > 1 or len(msg) > width:
|
||||
washed = []
|
||||
# The first line is special
|
||||
m = re.match('^%s "(.*)"$' % (idx, ), lines[0])
|
||||
if not m:
|
||||
print lines[0]
|
||||
kwds['lno'] -= size + 1
|
||||
raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
|
||||
% kwds)
|
||||
washed.append(m.group(1))
|
||||
if m.group(1).endswith(r'\n'):
|
||||
washed.append('')
|
||||
i = 0
|
||||
for line in lines[1:]:
|
||||
m = re.match('^"(\s*.*)"$', line)
|
||||
i += 1
|
||||
if not m:
|
||||
print line
|
||||
kwds['lno'] -= size - i + 1
|
||||
raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
|
||||
% kwds)
|
||||
washed[-1] += m.group(1)
|
||||
if m.group(1).endswith(r'\n'):
|
||||
washed.append('')
|
||||
if washed[0] == '':
|
||||
washed.pop(0)
|
||||
if washed[-1] == '':
|
||||
washed.pop()
|
||||
|
||||
washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed.
|
||||
|
||||
# One line or multiline
|
||||
if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width:
|
||||
washed = '%s %s\n' % (idx, washed[0])
|
||||
else:
|
||||
washed = '%s ""\n%s\n' % (idx, '\n'.join(washed))
|
||||
else:
|
||||
washed = msg
|
||||
|
||||
return washed
|
||||
|
||||
def parse(filename, entry):
|
||||
'''
|
||||
Parse po or pot file with name <filename>. Set the variable
|
||||
<entry> to msgid/msgstr to indicate pot/po file. The return value
|
||||
is a dict with msgid (washed) as key and Msgs instances as
|
||||
values.
|
||||
'''
|
||||
lines = io(filename).readlines()
|
||||
Msgs.file = filename
|
||||
messages = {}
|
||||
last = len(lines)
|
||||
g = gen(lines)
|
||||
cmt = autocmt = ref = flag = ''
|
||||
msgid = False
|
||||
lno = 0
|
||||
while not lno == last:
|
||||
l, lno = g.next()
|
||||
if l.startswith('# '):
|
||||
l, lno, g, cmt = slurp(l, g, '# ')
|
||||
if l.startswith('#.'):
|
||||
l, lno, g, autocmt = slurp(l, g, '#.')
|
||||
if l.startswith('#:'):
|
||||
l, lno, g, ref = slurp(l, g, '#:')
|
||||
if l.startswith('#,'):
|
||||
l, lno, g, flag = slurp(l, g, '#,')
|
||||
if l.startswith('msgid'):
|
||||
l, lno, g, msgid = slurp(l, g, '"')
|
||||
if l.startswith('msgstr'):
|
||||
l, lno, g, msgstr = slurp(l, g, '"')
|
||||
|
||||
if not lno == last and not l.strip() == '':
|
||||
raise MsgmergeError('parse error: %s:%s.' % (filename, lno))
|
||||
|
||||
if msgid and entry == 'msgstr':
|
||||
idx = wash(msgid, filename = filename, lno = lno)
|
||||
messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt)
|
||||
msgid = False; msgstr = cmt = autocmt = ref = flag = ''
|
||||
elif msgid and entry == 'msgid':
|
||||
idx = wash(msgid, filename = filename, lno = lno)
|
||||
messages[idx] = Msgs(msgid, msgstr, flag, lno, entry,
|
||||
autocmt = autocmt, ref = ref)
|
||||
msgid = False; msgstr = cmt = autocmt = ref = flag = ''
|
||||
|
||||
for m in messages.values():
|
||||
m.wash()
|
||||
return messages
|
||||
|
||||
def fuzzy_match(pot, defs):
|
||||
'''
|
||||
Try to find the best difflib match (with ratio > 0.6) between
|
||||
id of Msgs object <pot> and Msgs in the dict <defs>.
|
||||
Return value is the Msgs object in <defs> with highest ratio,
|
||||
False is returned if no suitable Msgs is found.
|
||||
'''
|
||||
limit = 0.6
|
||||
l, po = limit - 0.01, False
|
||||
s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id())
|
||||
len2 = len(pot.get_clean_id())
|
||||
for candidate in defs.values():
|
||||
if candidate.str == 'msgstr ""\n': # Empty translation
|
||||
continue
|
||||
if candidate.id == 'msgid ""\n': # Empty msgid (header)
|
||||
continue
|
||||
len1 = len(candidate.get_clean_id())
|
||||
if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first
|
||||
continue
|
||||
s.set_seq1(candidate.get_clean_id())
|
||||
if s.quick_ratio() < l:
|
||||
continue
|
||||
r = s.ratio() # This is expensive
|
||||
if r > l:
|
||||
l, po = r, candidate
|
||||
return po
|
||||
|
||||
def flags(po, pot, fuzzy = False, obs = False):
|
||||
'''
|
||||
Create flag field from flag field in Msgs objects <po> and
|
||||
<pot>. When <fuzzy> is true <po>\'s flags are ignored and the
|
||||
fuzzy flag is added. If <obs> is set then most flags but fuzzy are
|
||||
removed. If the global variable option.docstrings is set then
|
||||
docstring flags will not be removed. The return value is a string
|
||||
which holds the combined flag.
|
||||
'''
|
||||
global option
|
||||
flag = ''
|
||||
if po.flag or pot.flag or fuzzy:
|
||||
if not fuzzy:
|
||||
flag = '%s, %s' % (po.flag.strip(), pot.flag.strip())
|
||||
else:
|
||||
flag = '%s, %s' % ('#, fuzzy', pot.flag.strip())
|
||||
flag = flag.split(', ')
|
||||
fl = {}
|
||||
flag = [fl.setdefault(f, f) for f in flag if f not in fl and f]
|
||||
if not option.docstrings:
|
||||
try:
|
||||
flag.remove('docstring')
|
||||
except ValueError:
|
||||
pass
|
||||
if obs:
|
||||
removes = ['c-format', 'python-format', 'docstring']
|
||||
for remove in removes:
|
||||
try:
|
||||
flag.remove(remove)
|
||||
except ValueError:
|
||||
pass
|
||||
# Put fuzzy first
|
||||
if 'fuzzy' in flag and not flag.index('fuzzy') == 1:
|
||||
i = flag.index('fuzzy')
|
||||
flag[1], flag[i] = flag[i], flag[1]
|
||||
|
||||
if len(flag) == 1:
|
||||
flag = ''
|
||||
else:
|
||||
flag = ', '.join(flag) + '\n'
|
||||
return flag
|
||||
|
||||
def add(pot, po, fuzzy = False):
|
||||
'''
|
||||
Build a new entry from the Msgs objects <pot> and <pot>. If
|
||||
<fuzzy> is true, <po>\'s flag field is ignored (in
|
||||
flags()). Returns a multiline string with a up to date entry.
|
||||
'''
|
||||
msg = []
|
||||
msg.append(po.cmt)
|
||||
msg.append(pot.autocmt)
|
||||
msg.append(pot.ref)
|
||||
msg.append(flags(po, pot, fuzzy = fuzzy))
|
||||
msg.append(pot.id)
|
||||
msg.append(po.str)
|
||||
return ''.join(msg)
|
||||
|
||||
def header(pot, defs):
|
||||
'''
|
||||
Update date in header entry. Returns the updated header entry.
|
||||
'''
|
||||
try:
|
||||
[po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ]
|
||||
except ValueError:
|
||||
raise MsgmergeError('Error: did not find header in po file.')
|
||||
|
||||
r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)',
|
||||
re.MULTILINE | re.DOTALL)
|
||||
m = r.match(pot.str)
|
||||
if not m:
|
||||
raise MsgmergeError(
|
||||
'Error: did not find POT-Creation-Date field in pot file.')
|
||||
|
||||
subs = '\\1%s\\3' % m.group(2)
|
||||
_, count = r.subn(subs, po.str)
|
||||
if not count == 1:
|
||||
raise MsgmergeError(
|
||||
'Error: did not find POT-Creation-Date field in po file.')
|
||||
return po
|
||||
|
||||
def match(defs, refs):
|
||||
'''
|
||||
Try to match Msgs objects in <refs> with Msgs objects in
|
||||
<defs>. The return value is a list with po entries.
|
||||
'''
|
||||
global option
|
||||
matches = []
|
||||
empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
|
||||
deco = [(r.lno, r) for r in refs.values()]
|
||||
deco.sort()
|
||||
po = header(deco.pop(0)[1], defs) # Header entry
|
||||
matches.append(add(empty, po))
|
||||
po.used()
|
||||
sorted = [ a[1] for a in deco ]
|
||||
for pot in sorted:
|
||||
if option.verbose:
|
||||
sys.stderr.write('.')
|
||||
po = defs.get(pot.id, False) # Perfect match
|
||||
if po:
|
||||
matches.append(add(pot, po))
|
||||
po.used(); pot.used()
|
||||
continue
|
||||
po = fuzzy_match(pot, defs) # Fuzzy match
|
||||
if po:
|
||||
matches.append(add(pot, po, fuzzy = True))
|
||||
po.used(); pot.used()
|
||||
continue
|
||||
matches.append(add(pot, empty)) # No match
|
||||
|
||||
obsolete(defs, matches)
|
||||
return matches
|
||||
|
||||
def obsolete(defs, matches):
|
||||
'''Handle obsolete translations.'''
|
||||
deco = [ (d.lno, d) for d in defs.values() if
|
||||
d.count == 0 and not d.str == 'msgstr ""\n' ]
|
||||
deco.sort()
|
||||
empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
|
||||
obs = [ o[1] for o in deco ]
|
||||
for o in obs:
|
||||
o.flag = flags(o, empty, obs = True)
|
||||
o.obsolete()
|
||||
matches.append('%s%s%s' % (o.flag, o.id, o.str))
|
||||
|
||||
def help():
|
||||
'''Print help text and exit.'''
|
||||
print __doc__
|
||||
sys.exit(0)
|
||||
|
||||
def cmdline():
|
||||
'''Parse options and arguments from command line.'''
|
||||
advice = 'Try `%(name)s --help\' for more information.'
|
||||
try:
|
||||
long_opt = ['help', 'version', 'update', 'output-file=',
|
||||
'quiet', 'silent', 'docstrings', 'suffix', 'backup']
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt)
|
||||
except getopt.error, msg:
|
||||
print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals()
|
||||
sys.exit(1)
|
||||
|
||||
option = Options(cmdline = True)
|
||||
for opt, arg in opts:
|
||||
if opt in ['-h', '--help']:
|
||||
help()
|
||||
elif opt in ['-V', '--version']:
|
||||
print '%(name)s %(__version__)s' % globals()
|
||||
sys.exit(0)
|
||||
elif opt in ['-o', '--output-file']:
|
||||
option.outfile = arg
|
||||
elif opt in ['-U', '--update']:
|
||||
option.update = True
|
||||
elif opt in ['-q', '--silent', '--quiet']:
|
||||
option.verbose = False
|
||||
elif opt in ['-D', '--docstrings']:
|
||||
option.docstrings = True
|
||||
elif opt in ['--suffix']:
|
||||
option.suffix = arg
|
||||
elif opt in ['--backup']:
|
||||
option.backup = arg
|
||||
|
||||
# Sanity checks
|
||||
warn = False
|
||||
if option.update and option.outfile:
|
||||
warn = '--update and --output-file are mutually exclusive.'
|
||||
if len(args) == 0:
|
||||
warn = 'no input files given.'
|
||||
elif len(args) == 1 or len(args) > 2:
|
||||
warn = 'exactly 2 input files required.'
|
||||
if warn:
|
||||
print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals()
|
||||
sys.exit(1)
|
||||
|
||||
if option.update:
|
||||
option.outfile = args[0]
|
||||
elif not option.outfile:
|
||||
option.outfile = '-'
|
||||
|
||||
defs, refs = args
|
||||
|
||||
try:
|
||||
merge(defs, refs, option = option)
|
||||
except MsgmergeError, err:
|
||||
print '%(name)s: ' % globals() + '%s' % err
|
||||
sys.exit(1)
|
||||
|
||||
def io(iofile, mode = 'rU'):
|
||||
'''Wrapper around open().'''
|
||||
try:
|
||||
fo = open(iofile, mode)
|
||||
if 'r' in mode and fo.read(3) != codecs.BOM_UTF8:
|
||||
fo.seek(0)
|
||||
|
||||
except IOError, msg:
|
||||
raise MsgmergeError('error while opening file: %s: %s.' %
|
||||
(msg[1], iofile))
|
||||
return fo
|
||||
|
||||
def backup(infile):
|
||||
'''Handle backup of files in update mode'''
|
||||
os.environ.get('VERSION_CONTROL', '')
|
||||
suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~')
|
||||
|
||||
backup_file = '%s%s' % (infile, suffix)
|
||||
|
||||
def changes(new, old):
|
||||
return cmp(''.join(old), '\n'.join(new))
|
||||
|
||||
def write(matches, outfile):
|
||||
'''Write the list <matches> to file <outfile>'''
|
||||
if not outfile == '-':
|
||||
fd = io(outfile, 'w')
|
||||
else:
|
||||
fd = sys.stdout
|
||||
fd.write('\n'.join(matches))
|
||||
|
||||
def merge(def_file, ref_file, update = True, outfile = '-',
|
||||
docstrings = True, suffix = '~', backup = True,
|
||||
verbose = True, **kwds):
|
||||
'''
|
||||
Merge po file <def_file> with pot file <ref_file> . If <update> is
|
||||
set to True then only update if there are changes to the po
|
||||
file. Set outfile to write updated po file to an another file. Set
|
||||
to `-\' for writing to standard out. If docstrings is False
|
||||
docstrings flag will removed. Set verbose to False to suppress
|
||||
progress indicators. <kwds> is used to pass options from the
|
||||
command line interface.
|
||||
'''
|
||||
global option
|
||||
option = kwds.get('option', Options(update = update,
|
||||
outfile = outfile,
|
||||
docstrings = docstrings,
|
||||
suffix = suffix,
|
||||
backup = backup,
|
||||
verbose = verbose))
|
||||
def_msgs = parse(def_file, 'msgstr')
|
||||
ref_msgs = parse(ref_file, 'msgid')
|
||||
if verbose and not __name__ == '__main__':
|
||||
print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file)
|
||||
updated_lines = match(def_msgs, ref_msgs)
|
||||
if option.verbose:
|
||||
print >> sys.stderr, ' done.'
|
||||
if not option.update:
|
||||
write(updated_lines, option.outfile)
|
||||
elif option.update and changes(updated_lines, io(def_file).readlines()):
|
||||
write(updated_lines, def_file)
|
||||
|
||||
def merge_dir(directory, pot = False, include = [], exclude = [],
|
||||
verbose = True):
|
||||
'''
|
||||
Tries to merge a directory of po files. Uses simple glob to find
|
||||
po files and pot file. The parameter <pot> can be used to specify
|
||||
the pot file in the directory. If the list <include> is given only
|
||||
files in this list is merged. Use the list <exclude> to exclude
|
||||
files to be merged. This function is only useful if po files and
|
||||
pot file are in the same directory. Set <verbose> to get
|
||||
information when running.
|
||||
'''
|
||||
if directory[-1] == '/':
|
||||
directory = os.path.dirname(directory)
|
||||
if pot:
|
||||
pot = os.path.basename(pot)
|
||||
else:
|
||||
pot = glob.glob('%s/*.pot' % directory)
|
||||
if not pot:
|
||||
raise MsgmergeError('No pot file found.')
|
||||
elif len(pot) > 1:
|
||||
raise MsgmergeError('More than one pot file found: %s.' % pot)
|
||||
pot = os.path.basename(pot[0])
|
||||
|
||||
if not include:
|
||||
pos = glob.glob('%s/*po' % directory)
|
||||
if not len(pos) > 1:
|
||||
raise MsgmergeError('No po file(s) found.')
|
||||
pos = [ os.path.basename(po) for po in pos ]
|
||||
else:
|
||||
pos = [ os.path.basename(po) for po in include ]
|
||||
|
||||
for po in exclude:
|
||||
try:
|
||||
pos.remove(po)
|
||||
except ValueError:
|
||||
pass
|
||||
format = '%s/%s'
|
||||
for po in pos:
|
||||
try:
|
||||
merge(format % (directory, po), format % (directory, pot),
|
||||
update = True, verbose = verbose,
|
||||
outfile = format % (directory, po))
|
||||
except MsgmergeError, err:
|
||||
if verbose:
|
||||
print >> sys.stderr, '%s Not updated.' % err
|
||||
else:
|
||||
print >> sys.stderr, '%s %s not updated.' % (err, po)
|
||||
|
||||
if __name__ == '__main__':
|
||||
cmdline()
|
@ -1,672 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Originally written by Barry Warsaw <barry@zope.com>
|
||||
#
|
||||
# Minimally patched to make it even more xgettext compatible
|
||||
# by Peter Funk <pf@artcom-gmbh.de>
|
||||
#
|
||||
# 2002-11-22 Jürgen Hermann <jh@web.de>
|
||||
# Added checks that _() only contains string literals, and
|
||||
# command line args are resolved to module lists, i.e. you
|
||||
# can now pass a filename, a module or package name, or a
|
||||
# directory (including globbing chars, important for Win32).
|
||||
# Made docstring fit in 80 chars wide displays using pydoc.
|
||||
#
|
||||
|
||||
# for selftesting
|
||||
try:
|
||||
import fintl
|
||||
_ = fintl.gettext
|
||||
except ImportError:
|
||||
_ = lambda s: s
|
||||
|
||||
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
||||
|
||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
the programming language and can be used from within Python programs.
|
||||
Martin von Loewis' work[1] helps considerably in this regard.
|
||||
|
||||
There's one problem though; xgettext is the program that scans source code
|
||||
looking for message strings, but it groks only C (or C++). Python
|
||||
introduces a few wrinkles, such as dual quoting characters, triple quoted
|
||||
strings, and raw strings. xgettext understands none of this.
|
||||
|
||||
Enter pygettext, which uses Python's standard tokenize module to scan
|
||||
Python source code, generating .pot files identical to what GNU xgettext[2]
|
||||
generates for C and C++ code. From there, the standard GNU tools can be
|
||||
used.
|
||||
|
||||
A word about marking Python strings as candidates for translation. GNU
|
||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
||||
and gettext_noop. But those can be a lot of text to include all over your
|
||||
code. C and C++ have a trick: they use the C preprocessor. Most
|
||||
internationalized C source includes a #define for gettext() to _() so that
|
||||
what has to be written in the source is much less. Thus these are both
|
||||
translatable strings:
|
||||
|
||||
gettext("Translatable String")
|
||||
_("Translatable String")
|
||||
|
||||
Python of course has no preprocessor so this doesn't work so well. Thus,
|
||||
pygettext searches only for _() by default, but see the -k/--keyword flag
|
||||
below for how to augment this.
|
||||
|
||||
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||
|
||||
NOTE: pygettext attempts to be option and feature compatible with GNU
|
||||
xgettext where ever possible. However some options are still missing or are
|
||||
not fully implemented. Also, xgettext's use of command line switches with
|
||||
option arguments is broken, and in these cases, pygettext just defines
|
||||
additional switches.
|
||||
|
||||
Usage: pygettext [options] inputfile ...
|
||||
|
||||
Options:
|
||||
|
||||
-a
|
||||
--extract-all
|
||||
Extract all strings.
|
||||
|
||||
-d name
|
||||
--default-domain=name
|
||||
Rename the default output file from messages.pot to name.pot.
|
||||
|
||||
-E
|
||||
--escape
|
||||
Replace non-ASCII characters with octal escape sequences.
|
||||
|
||||
-D
|
||||
--docstrings
|
||||
Extract module, class, method, and function docstrings. These do
|
||||
not need to be wrapped in _() markers, and in fact cannot be for
|
||||
Python to consider them docstrings. (See also the -X option).
|
||||
|
||||
-h
|
||||
--help
|
||||
Print this help message and exit.
|
||||
|
||||
-k word
|
||||
--keyword=word
|
||||
Keywords to look for in addition to the default set, which are:
|
||||
%(DEFAULTKEYWORDS)s
|
||||
|
||||
You can have multiple -k flags on the command line.
|
||||
|
||||
-K
|
||||
--no-default-keywords
|
||||
Disable the default set of keywords (see above). Any keywords
|
||||
explicitly added with the -k/--keyword option are still recognized.
|
||||
|
||||
--no-location
|
||||
Do not write filename/lineno location comments.
|
||||
|
||||
-n
|
||||
--add-location
|
||||
Write filename/lineno location comments indicating where each
|
||||
extracted string is found in the source. These lines appear before
|
||||
each msgid. The style of comments is controlled by the -S/--style
|
||||
option. This is the default.
|
||||
|
||||
-o filename
|
||||
--output=filename
|
||||
Rename the default output file from messages.pot to filename. If
|
||||
filename is `-' then the output is sent to standard out.
|
||||
|
||||
-p dir
|
||||
--output-dir=dir
|
||||
Output files will be placed in directory dir.
|
||||
|
||||
-S stylename
|
||||
--style stylename
|
||||
Specify which style to use for location comments. Two styles are
|
||||
supported:
|
||||
|
||||
Solaris # File: filename, line: line-number
|
||||
GNU #: filename:line
|
||||
|
||||
The style name is case insensitive. GNU style is the default.
|
||||
|
||||
-v
|
||||
--verbose
|
||||
Print the names of the files being processed.
|
||||
|
||||
-V
|
||||
--version
|
||||
Print the version of pygettext and exit.
|
||||
|
||||
-w columns
|
||||
--width=columns
|
||||
Set width of output to columns.
|
||||
|
||||
-x filename
|
||||
--exclude-file=filename
|
||||
Specify a file that contains a list of strings that are not be
|
||||
extracted from the input files. Each string to be excluded must
|
||||
appear on a line by itself in the file.
|
||||
|
||||
-X filename
|
||||
--no-docstrings=filename
|
||||
Specify a file that contains a list of files (one per line) that
|
||||
should not have their docstrings extracted. This is only useful in
|
||||
conjunction with the -D option above.
|
||||
|
||||
If `inputfile' is -, standard input is read.
|
||||
""")
|
||||
|
||||
import os
|
||||
import imp
|
||||
import sys
|
||||
import glob
|
||||
import time
|
||||
import getopt
|
||||
import token
|
||||
import tokenize
|
||||
import operator
|
||||
|
||||
from umit.pm.core.const import PM_VERSION
|
||||
|
||||
__version__ = '1.5'
|
||||
|
||||
default_keywords = ['_']
|
||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||
|
||||
EMPTYSTRING = ''
|
||||
|
||||
|
||||
|
||||
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
||||
# there.
|
||||
pot_header = _('''\
|
||||
# PacketManipulator catalog.
|
||||
# Copyright (C) 2009 Adriano Montero Marques
|
||||
# Francesco Piccinno <stack.box@gmail.com>, 2009
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PacketManipulator %(pm_version)s\\n"
|
||||
"POT-Creation-Date: %(time)s\\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
||||
"MIME-Version: 1.0\\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\\n"
|
||||
"Content-Transfer-Encoding: 8bit\\n"
|
||||
"Generated-By: pygettext.py %(version)s\\n"
|
||||
|
||||
''')
|
||||
|
||||
|
||||
def usage(code, msg=''):
|
||||
print >> sys.stderr, __doc__ % globals()
|
||||
if msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
|
||||
escapes = []
|
||||
|
||||
def make_escapes(pass_iso8859):
|
||||
global escapes
|
||||
if pass_iso8859:
|
||||
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
||||
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
||||
# escape any character outside the 32..126 range.
|
||||
mod = 128
|
||||
else:
|
||||
mod = 256
|
||||
for i in range(256):
|
||||
if 32 <= (i % mod) <= 126:
|
||||
escapes.append(chr(i))
|
||||
else:
|
||||
escapes.append("\\%03o" % i)
|
||||
escapes[ord('\\')] = '\\\\'
|
||||
escapes[ord('\t')] = '\\t'
|
||||
escapes[ord('\r')] = '\\r'
|
||||
escapes[ord('\n')] = '\\n'
|
||||
escapes[ord('\"')] = '\\"'
|
||||
|
||||
|
||||
def escape(s):
|
||||
global escapes
|
||||
s = list(s)
|
||||
for i in range(len(s)):
|
||||
s[i] = escapes[ord(s[i])]
|
||||
return EMPTYSTRING.join(s)
|
||||
|
||||
|
||||
def safe_eval(s):
|
||||
# unwrap quotes, safely
|
||||
return eval(s, {'__builtins__':{}}, {})
|
||||
|
||||
|
||||
def normalize(s):
|
||||
# This converts the various Python string types into a format that is
|
||||
# appropriate for .po files, namely much closer to C style.
|
||||
lines = s.split('\n')
|
||||
if len(lines) == 1:
|
||||
s = '"' + escape(s) + '"'
|
||||
else:
|
||||
if not lines[-1]:
|
||||
del lines[-1]
|
||||
lines[-1] = lines[-1] + '\n'
|
||||
for i in range(len(lines)):
|
||||
lines[i] = escape(lines[i])
|
||||
lineterm = '\\n"\n"'
|
||||
s = '""\n"' + lineterm.join(lines) + '"'
|
||||
return s
|
||||
|
||||
|
||||
def containsAny(str, set):
|
||||
"""Check whether 'str' contains ANY of the chars in 'set'"""
|
||||
return 1 in [c in str for c in set]
|
||||
|
||||
|
||||
def _visit_pyfiles(list, dirname, names):
|
||||
"""Helper for getFilesForName()."""
|
||||
# get extension for python source files
|
||||
if not globals().has_key('_py_ext'):
|
||||
global _py_ext
|
||||
_py_ext = [triple[0] for triple in imp.get_suffixes()
|
||||
if triple[2] == imp.PY_SOURCE][0]
|
||||
|
||||
# don't recurse into CVS directories
|
||||
if 'CVS' in names:
|
||||
names.remove('CVS')
|
||||
|
||||
# add all *.py files to list
|
||||
list.extend(
|
||||
[os.path.join(dirname, file) for file in names
|
||||
if os.path.splitext(file)[1] == _py_ext]
|
||||
)
|
||||
|
||||
|
||||
def _get_modpkg_path(dotted_name, pathlist=None):
|
||||
"""Get the filesystem path for a module or a package.
|
||||
|
||||
Return the file system path to a file for a module, and to a directory for
|
||||
a package. Return None if the name is not found, or is a builtin or
|
||||
extension module.
|
||||
"""
|
||||
# split off top-most name
|
||||
parts = dotted_name.split('.', 1)
|
||||
|
||||
if len(parts) > 1:
|
||||
# we have a dotted path, import top-level package
|
||||
try:
|
||||
file, pathname, description = imp.find_module(parts[0], pathlist)
|
||||
if file: file.close()
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
# check if it's indeed a package
|
||||
if description[2] == imp.PKG_DIRECTORY:
|
||||
# recursively handle the remaining name parts
|
||||
pathname = _get_modpkg_path(parts[1], [pathname])
|
||||
else:
|
||||
pathname = None
|
||||
else:
|
||||
# plain name
|
||||
try:
|
||||
file, pathname, description = imp.find_module(
|
||||
dotted_name, pathlist)
|
||||
if file:
|
||||
file.close()
|
||||
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
||||
pathname = None
|
||||
except ImportError:
|
||||
pathname = None
|
||||
|
||||
return pathname
|
||||
|
||||
|
||||
def getFilesForName(name):
|
||||
"""Get a list of module files for a filename, a module or package name,
|
||||
or a directory.
|
||||
"""
|
||||
if not os.path.exists(name):
|
||||
# check for glob chars
|
||||
if containsAny(name, "*?[]"):
|
||||
files = glob.glob(name)
|
||||
list = []
|
||||
for file in files:
|
||||
list.extend(getFilesForName(file))
|
||||
return list
|
||||
|
||||
# try to find module or package
|
||||
name = _get_modpkg_path(name)
|
||||
if not name:
|
||||
return []
|
||||
|
||||
if os.path.isdir(name):
|
||||
# find all python files in directory
|
||||
list = []
|
||||
os.path.walk(name, _visit_pyfiles, list)
|
||||
return list
|
||||
elif os.path.exists(name):
|
||||
# a single file
|
||||
return [name]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class TokenEater:
|
||||
def __init__(self, options):
|
||||
self.__options = options
|
||||
self.__messages = {}
|
||||
self.__state = self.__waiting
|
||||
self.__data = []
|
||||
self.__lineno = -1
|
||||
self.__freshmodule = 1
|
||||
self.__curfile = None
|
||||
|
||||
def __call__(self, ttype, tstring, stup, etup, line):
|
||||
# dispatch
|
||||
## import token
|
||||
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
||||
## 'tstring:', tstring
|
||||
self.__state(ttype, tstring, stup[0])
|
||||
|
||||
def __waiting(self, ttype, tstring, lineno):
|
||||
opts = self.__options
|
||||
# Do docstring extractions, if enabled
|
||||
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
||||
# module docstring?
|
||||
if self.__freshmodule:
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__freshmodule = 0
|
||||
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
||||
self.__freshmodule = 0
|
||||
return
|
||||
# class docstring?
|
||||
if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
||||
self.__state = self.__suiteseen
|
||||
return
|
||||
if ttype == tokenize.NAME and tstring in opts.keywords:
|
||||
self.__state = self.__keywordseen
|
||||
|
||||
def __suiteseen(self, ttype, tstring, lineno):
|
||||
# ignore anything until we see the colon
|
||||
if ttype == tokenize.OP and tstring == ':':
|
||||
self.__state = self.__suitedocstring
|
||||
|
||||
def __suitedocstring(self, ttype, tstring, lineno):
|
||||
# ignore any intervening noise
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__state = self.__waiting
|
||||
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
||||
tokenize.COMMENT):
|
||||
# there was no class docstring
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __keywordseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == '(':
|
||||
self.__data = []
|
||||
self.__lineno = lineno
|
||||
self.__state = self.__openseen
|
||||
else:
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __openseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == ')':
|
||||
# We've seen the last of the translatable strings. Record the
|
||||
# line number of the first line of the strings and update the list
|
||||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
self.__addentry(EMPTYSTRING.join(self.__data))
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING:
|
||||
self.__data.append(safe_eval(tstring))
|
||||
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||
token.NEWLINE, tokenize.NL]:
|
||||
# warn if we see anything else than STRING or whitespace
|
||||
print >> sys.stderr, _(
|
||||
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
||||
) % {
|
||||
'token': tstring,
|
||||
'file': self.__curfile,
|
||||
'lineno': self.__lineno
|
||||
}
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __addentry(self, msg, lineno=None, isdocstring=0):
|
||||
if lineno is None:
|
||||
lineno = self.__lineno
|
||||
if not msg in self.__options.toexclude:
|
||||
entry = (self.__curfile, lineno)
|
||||
self.__messages.setdefault(msg, {})[entry] = isdocstring
|
||||
|
||||
def set_filename(self, filename):
|
||||
self.__curfile = filename
|
||||
self.__freshmodule = 1
|
||||
|
||||
def write(self, fp):
|
||||
options = self.__options
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
||||
# The time stamp in the header doesn't have the same format as that
|
||||
# generated by xgettext...
|
||||
print >> fp, pot_header % {'time': timestamp, 'version': __version__,
|
||||
'pm_version': PM_VERSION}
|
||||
# Sort the entries. First sort each particular entry's keys, then
|
||||
# sort all the entries by their first item.
|
||||
reverse = {}
|
||||
for k, v in self.__messages.items():
|
||||
keys = v.keys()
|
||||
keys.sort()
|
||||
reverse.setdefault(tuple(keys), []).append((k, v))
|
||||
rkeys = reverse.keys()
|
||||
rkeys.sort()
|
||||
for rkey in rkeys:
|
||||
rentries = reverse[rkey]
|
||||
rentries.sort()
|
||||
for k, v in rentries:
|
||||
isdocstring = 0
|
||||
# If the entry was gleaned out of a docstring, then add a
|
||||
# comment stating so. This is to aid translators who may wish
|
||||
# to skip translating some unimportant docstrings.
|
||||
if reduce(operator.__add__, v.values()):
|
||||
isdocstring = 1
|
||||
# k is the message string, v is a dictionary-set of (filename,
|
||||
# lineno) tuples. We want to sort the entries in v first by
|
||||
# file name and then by line number.
|
||||
v = v.keys()
|
||||
v.sort()
|
||||
if not options.writelocations:
|
||||
pass
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
elif options.locationstyle == options.SOLARIS:
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
print >>fp, _(
|
||||
'# File: %(filename)s, line: %(lineno)d') % d
|
||||
elif options.locationstyle == options.GNU:
|
||||
# fit as many locations on one line, as long as the
|
||||
# resulting line length doesn't exceeds 'options.width'
|
||||
locline = '#:'
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
s = _(' %(filename)s:%(lineno)d') % d
|
||||
if len(locline) + len(s) <= options.width:
|
||||
locline = locline + s
|
||||
else:
|
||||
print >> fp, locline
|
||||
locline = "#:" + s
|
||||
if len(locline) > 2:
|
||||
print >> fp, locline
|
||||
if isdocstring:
|
||||
print >> fp, '#, docstring'
|
||||
print >> fp, 'msgid', normalize(k)
|
||||
print >> fp, 'msgstr ""\n'
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
global default_keywords
|
||||
try:
|
||||
opts, args = getopt.getopt(
|
||||
sys.argv[1:],
|
||||
'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
||||
['extract-all', 'default-domain=', 'escape', 'help',
|
||||
'keyword=', 'no-default-keywords',
|
||||
'add-location', 'no-location', 'output=', 'output-dir=',
|
||||
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
||||
'docstrings', 'no-docstrings',
|
||||
])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
# for holding option values
|
||||
class Options:
|
||||
# constants
|
||||
GNU = 1
|
||||
SOLARIS = 2
|
||||
# defaults
|
||||
extractall = 0 # FIXME: currently this option has no effect at all.
|
||||
escape = 0
|
||||
keywords = []
|
||||
outpath = ''
|
||||
outfile = 'messages.pot'
|
||||
writelocations = 1
|
||||
locationstyle = GNU
|
||||
verbose = 0
|
||||
width = 78
|
||||
excludefilename = ''
|
||||
docstrings = 0
|
||||
nodocstrings = {}
|
||||
|
||||
options = Options()
|
||||
locations = {'gnu' : options.GNU,
|
||||
'solaris' : options.SOLARIS,
|
||||
}
|
||||
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-a', '--extract-all'):
|
||||
options.extractall = 1
|
||||
elif opt in ('-d', '--default-domain'):
|
||||
options.outfile = arg + '.pot'
|
||||
elif opt in ('-E', '--escape'):
|
||||
options.escape = 1
|
||||
elif opt in ('-D', '--docstrings'):
|
||||
options.docstrings = 1
|
||||
elif opt in ('-k', '--keyword'):
|
||||
options.keywords.append(arg)
|
||||
elif opt in ('-K', '--no-default-keywords'):
|
||||
default_keywords = []
|
||||
elif opt in ('-n', '--add-location'):
|
||||
options.writelocations = 1
|
||||
elif opt in ('--no-location',):
|
||||
options.writelocations = 0
|
||||
elif opt in ('-S', '--style'):
|
||||
options.locationstyle = locations.get(arg.lower())
|
||||
if options.locationstyle is None:
|
||||
usage(1, _('Invalid value for --style: %s') % arg)
|
||||
elif opt in ('-o', '--output'):
|
||||
options.outfile = arg
|
||||
elif opt in ('-p', '--output-dir'):
|
||||
options.outpath = arg
|
||||
elif opt in ('-v', '--verbose'):
|
||||
options.verbose = 1
|
||||
elif opt in ('-V', '--version'):
|
||||
print _('pygettext.py (xgettext for Python) %s') % __version__
|
||||
sys.exit(0)
|
||||
elif opt in ('-w', '--width'):
|
||||
try:
|
||||
options.width = int(arg)
|
||||
except ValueError:
|
||||
usage(1, _('--width argument must be an integer: %s') % arg)
|
||||
elif opt in ('-x', '--exclude-file'):
|
||||
options.excludefilename = arg
|
||||
elif opt in ('-X', '--no-docstrings'):
|
||||
fp = open(arg)
|
||||
try:
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
options.nodocstrings[line[:-1]] = 1
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
# calculate escapes
|
||||
make_escapes(options.escape)
|
||||
|
||||
# calculate all keywords
|
||||
options.keywords.extend(default_keywords)
|
||||
|
||||
# initialize list of strings to exclude
|
||||
if options.excludefilename:
|
||||
try:
|
||||
fp = open(options.excludefilename)
|
||||
options.toexclude = fp.readlines()
|
||||
fp.close()
|
||||
except IOError:
|
||||
print >> sys.stderr, _(
|
||||
"Can't read --exclude-file: %s") % options.excludefilename
|
||||
sys.exit(1)
|
||||
else:
|
||||
options.toexclude = []
|
||||
|
||||
# resolve args to module lists
|
||||
expanded = []
|
||||
for arg in args:
|
||||
if arg == '-':
|
||||
expanded.append(arg)
|
||||
else:
|
||||
expanded.extend(getFilesForName(arg))
|
||||
args = expanded
|
||||
|
||||
# slurp through all the files
|
||||
eater = TokenEater(options)
|
||||
for filename in args:
|
||||
if filename == '-':
|
||||
if options.verbose:
|
||||
print _('Reading standard input')
|
||||
fp = sys.stdin
|
||||
closep = 0
|
||||
else:
|
||||
if options.verbose:
|
||||
print _('Working on %s') % filename
|
||||
fp = open(filename)
|
||||
closep = 1
|
||||
try:
|
||||
eater.set_filename(filename)
|
||||
try:
|
||||
tokenize.tokenize(fp.readline, eater)
|
||||
except tokenize.TokenError, e:
|
||||
print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
||||
e[0], filename, e[1][0], e[1][1])
|
||||
finally:
|
||||
if closep:
|
||||
fp.close()
|
||||
|
||||
# write the output
|
||||
if options.outfile == '-':
|
||||
fp = sys.stdout
|
||||
closep = 0
|
||||
else:
|
||||
if options.outpath:
|
||||
options.outfile = os.path.join(options.outpath, options.outfile)
|
||||
fp = open(options.outfile, 'w')
|
||||
closep = 1
|
||||
try:
|
||||
eater.write(fp)
|
||||
finally:
|
||||
if closep:
|
||||
fp.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
# some more test strings
|
||||
_(u'a unicode string')
|
||||
# this one creates a warning
|
||||
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
||||
_('more' 'than' 'one' 'string')
|
Loading…
Reference in New Issue
Block a user