veekun_pokedex/bin/poupdate

369 lines
14 KiB
Text
Raw Normal View History

#! /usr/bin/env python
# Encoding: UTF-8
u"""Creation and loading of GNU Gettext language files.
poupdate [options] [file1.csv file2.csv ...]
Use this script to
- Create .pot files (in pokedex/i18n/)
- Update the .po files (in pokedex/i18n/<lang>)
- Update the pokedex .csv files in (pokedex/data/csv/translations)
To make pos for a new language, make sure it is in the database, make
a directory for it in pokedex/i18n/, and run this.
You can also give one or more translation CSVs as arguments.
These are in the same format as veekun's main database CSVs, for example
pokedex/data/csv/ability_prose.csv. Be sure to set the correct language
ID (which implies the language must be in the database).
Also be sure to have the correct column order: first an appropriately named
foreign key, then local_language_id, and then the text columns.
"""
# Everything related to Gettext files, and the CLI interface, is here.
# General message handling and CSV I/O is in the pokedex library.
# Notes on how we use PO format:
# The source information is stored in the occurences fields, using
# "table_name.column_name" for file and object ID for line number. This is used
# as a message key, instead of the source string. So it's important not to
# discard location information. It also means "obsolete" and "fuzzy" mean
# pretty much the same in our context.
#
# Also note that a pot file is just a po file with all strings untranslated.
# So some functions here will work on either.
#
# Gettext context (msgctxt) is written to the files so that tools don't merge
# unrelated strings together. It is ignored when reading the PO files.
# Also of note, "polib" means "(do) kiss!" in Czech.
import os
import re
import sys
from datetime import datetime
from optparse import OptionParser
from collections import defaultdict
import pkg_resources
from pokedex.db import tables, translations
from pokedex.defaults import get_default_csv_dir
try:
import polib
except ImportError:
if __name__ == '__main__':
exit('This utility needs polib installed.\n$ pip install polib')
raise
number_replacement_flag = '-pokedex-number-replacement'
default_gettext_directory = pkg_resources.resource_filename('pokedex', 'i18n')
mapped_class_dict = dict((c.__name__, c) for c in tables.mapped_classes)
for cls in tables.mapped_classes:
mapped_class_dict.update(dict((c.__name__, cls) for c in cls.translation_classes))
class PokedexPot(polib.POFile):
def __init__(self, name):
super(PokedexPot, self).__init__()
self.metadata = {
'Project-Id-Version': 'pokedex-%s 0.1' % name,
'Report-Msgid-Bugs-To': 'encukou@gmail.com',
'POT-Creation-Date': datetime.now().isoformat(),
'PO-Revision-Date': 'YEAR-MO-DA HO:MI+ZONE',
'MIME-Version': '1.0',
'Content-Type': 'text/plain; charset=utf-8',
'Content-Transfer-Encoding': '8bit',
'Generated-By': "The pokedex",
}
self.seen_entries = {}
def append(self, entry):
"""Append an entry. POEntries that only differ in numbers are merged.
For example "Route 1", "Route 2", etc. are replaced by a single
"Route {num}".
Multiple numbers might be replaced, for example in "{num}--{num}
different Unown caught"
Entries without numbers are merged as well (e.g. "Has no overworld
effect" appears quite a few times in in AbilityChangelog)
"""
replaced = translations.number_re.sub('{num}', entry.msgid)
try:
common_entry = self.seen_entries[(entry.msgctxt, replaced)]
except KeyError:
self.seen_entries[(entry.msgctxt, replaced)] = entry
else:
common_entry.occurrences += entry.occurrences
# Only now is the actual entry replaced. So we get
# "Route {num}", but "Porygon2" because there's no Porygon3.
common_entry.msgid = replaced
common_entry.msgstr = translations.number_re.sub('{num}', common_entry.msgstr)
if replaced != entry.msgid and number_replacement_flag not in common_entry.flags:
common_entry.flags.append(number_replacement_flag)
return
self += [entry]
class PotDict(dict):
"""A defaultdict of pot files"""
def __missing__(self, name):
pot = PokedexPot(name)
self[name] = pot
return pot
def yield_po_messages(pos):
"""Yield messages from all given .po files
"""
merger = translations.Merge()
for po in pos.values():
merger.add_iterator(_yield_one_po_messages(po, merger))
return merger
def entry_sort_key(entry):
try:
cls_col, line = entry.occurrences[0]
except IndexError:
return
else:
if line:
classname, col = cls_col.split('.')
fuzzy = entry.obsolete or 'fuzzy' in entry.flags
try:
cls = mapped_class_dict[classname]
except KeyError, k:
# Renamed table?
print 'Warning: Unknown class %s' % classname
return '', int(line), col, fuzzy
else:
return cls.__name__, int(line), col, fuzzy
def _yield_one_po_messages(pofile, merger):
# Yield messages from one po file
#
# Messages in our po files are ordered by the first occurrence.
# The occurrences of a single message are also ordered.
# So just merge all the subsequences as we go
for entry in sorted(pofile, key=entry_sort_key):
if entry.msgstr:
fuzzy = (entry.obsolete or 'fuzzy' in entry.flags)
messages = []
for occurrence in entry.occurrences:
cls_colname, id = occurrence
if id:
clsname, colname = cls_colname.split('.')
cls = mapped_class_dict[clsname]
messages.append(translations.Message(
mapped_class_dict[clsname].__name__,
int(id),
colname,
entry.msgstr,
source=entry.msgid,
number_replacement=number_replacement_flag in entry.flags,
origin='PO file',
fuzzy=fuzzy,
))
if messages[1:]:
# Spawn extra iterators before yielding
merger.add_iterator(messages[1:])
if messages:
yield messages[0]
def create_pots(source, *translation_streams):
"""Convert an iterator of Messages to a dictionary of pot/po files
If translations are given, they're merged, and any exact matches are put
in the po file. Give some for po files, don't give any for pot files.
"""
obsolete = []
pots = PotDict()
merged = translations.merge_translations(source, *translation_streams, unused=obsolete.append)
for source, sourcehash, string, exact in merged:
ctxt = '.'.join((source.cls, source.colname))
entry = polib.POEntry(
msgid=source.string,
occurrences=[(ctxt, source.id)],
msgctxt=ctxt,
)
if string:
entry.msgstr = string
if not exact:
entry.flags.append('fuzzy')
pots[source.pot].append(entry)
for message in obsolete:
ctxt = '.'.join((message.cls, message.colname))
entry = polib.POEntry(
msgid=message.source or '???',
occurrences=[(ctxt, message.id)],
msgctxt=ctxt,
obsolete=True,
)
return pots
def save_pots(pots, gettext_directory=default_gettext_directory):
"""Save pot files to a directory."""
for name, pot in pots.items():
pot.save(os.path.join(gettext_directory, 'pokedex-%s.pot' % name))
def save_pos(pos, lang, gettext_directory=default_gettext_directory):
"""Save po files to the appropriate directory."""
for name, po in pos.items():
po.save(os.path.join(gettext_directory, lang, 'pokedex-%s.po' % name))
def read_pots(directory=default_gettext_directory, extension='.pot'):
"""Read all files from the given directory with the given extension as pofiles
Works on pos or pots.
"""
pots = {}
for filename in os.listdir(directory):
basename, ext = os.path.splitext(filename)
if ext == extension:
pots[basename] = polib.pofile(os.path.join(directory, filename))
return pots
def all_langs(gettext_directory=default_gettext_directory):
return [
d for d in os.listdir(gettext_directory)
if os.path.isdir(os.path.join(gettext_directory, d))
]
def merge_pos(transl, lang, language_directory):
"""Update all po files for the given language
Takes into account the source, the official translations from the database,
the existing PO files, and the current translation CSV, in that order.
Returns a name -> pofile dict
"""
return create_pots(
transl.source,
transl.official_messages(lang),
yield_po_messages(pos=read_pots(language_directory, '.po')),
transl.yield_target_messages(lang),
)
def bar(fraction, size, done_char='=', split_char='|', notdone_char='-'):
"""Build an ASCII art progress bar
"""
size -= 1
if fraction == 1:
split_char = done_char
completed = int(round(size * fraction))
bar = [done_char] * completed
bar.append(split_char)
bar += notdone_char * (size - completed)
return ''.join(bar)
def print_stats(pos):
"""Print out some fun stats about a set of po files
"""
template = u"{0:>10}: {1:4}/{2:4} {3:6.2f}% [{4}]"
total_translated = 0
total = 0
for name, po in pos.items():
num_translated = len(po.translated_entries())
total_translated += num_translated
fraction_translated = 1. * num_translated / len(po)
total += len(po)
print template.format(
name,
num_translated,
len(po),
100 * fraction_translated,
bar(fraction_translated, 47),
).encode('utf-8')
fraction_translated = 1. * total_translated / total
print template.format(
'Total',
total_translated,
total,
100 * fraction_translated,
bar(fraction_translated, 47),
).encode('utf-8')
if __name__ == '__main__':
parser = OptionParser(__doc__)
parser.add_option('-l', '--langs', dest='langs',
help="List of languages to handle, separated by commas (example: -l 'en,de,ja') (default: all in gettext directory)")
parser.add_option('-P', '--no-pots', dest='pots', action='store_false', default=True,
help='Do not create POT files (templates)')
parser.add_option('-p', '--no-pos', dest='pos', action='store_false', default=True,
help='Do not update PO files (message catalogs)')
parser.add_option('-c', '--no-csv', dest='csv', action='store_false', default=True,
help='Do not update pokedex translations files')
parser.add_option('-d', '--directory', dest='directory',
help='Veekun data directory')
parser.add_option('-L', '--source-language', dest='source_lang',
help="Source language identifier (default: 'en')")
parser.add_option('-g', '--gettext-dir', dest='gettext_directory', default=default_gettext_directory,
help='Gettext directory (default: pokedex/i18n/)')
parser.add_option('-q', '--quiet', dest='verbose', default=True, action='store_false',
help="Don't print what's going on")
options, arguments = parser.parse_args()
transl = translations.Translations.from_parsed_options(options)
gettext_directory = options.gettext_directory
if options.pots:
if options.verbose:
print 'Creating pots in', gettext_directory
save_pots(create_pots(transl.source), gettext_directory=gettext_directory)
if options.pos or options.csv:
# Merge in CSV files from command line
csv_streams = defaultdict(translations.Merge)
for argument in arguments:
# Add each message in its own stream, to sort them.
file = open(argument, 'rb')
with file:
for message in translations.yield_guessed_csv_messages(file):
lang = transl.language_identifiers[message.language_id]
csv_streams[lang].add_iterator([message])
streams = defaultdict(list)
for lang, stream in csv_streams.items():
streams[lang].append(stream)
# Merge in the PO files
if options.langs:
langs = options.langs.split(',')
else:
langs = all_langs(gettext_directory)
for lang in langs:
language_directory = os.path.join(gettext_directory, lang)
if options.verbose:
print 'Merging translations for %s in %s' % (lang, language_directory)
pos = merge_pos(transl, lang, language_directory)
if options.pos:
if options.verbose:
print 'Writing POs for %s' % lang
save_pos(pos, lang, gettext_directory=gettext_directory)
if options.verbose:
print_stats(pos)
streams[lang].append(yield_po_messages(pos))
if options.csv:
for lang, lang_streams in streams.items():
if options.verbose:
print "Merging %s translation stream/s for '%s'" % (len(lang_streams), lang)
existing_messages = list(transl.yield_target_messages(lang))
lang_streams.append(existing_messages)
transl.write_translations(lang, *lang_streams)