mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
369 lines
14 KiB
Text
369 lines
14 KiB
Text
|
#! /usr/bin/env python
|
||
|
# Encoding: UTF-8
|
||
|
|
||
|
u"""Creation and loading of GNU Gettext language files.
|
||
|
|
||
|
poupdate [options] [file1.csv file2.csv ...]
|
||
|
|
||
|
Use this script to
|
||
|
- Create .pot files (in pokedex/i18n/)
|
||
|
- Update the .po files (in pokedex/i18n/<lang>)
|
||
|
- Update the pokedex .csv files in (pokedex/data/csv/translations)
|
||
|
|
||
|
To make pos for a new language, make sure it is in the database, make
|
||
|
a directory for it in pokedex/i18n/, and run this.
|
||
|
|
||
|
You can also give one or more translation CSVs as arguments.
|
||
|
These are in the same format as veekun's main database CSVs, for example
|
||
|
pokedex/data/csv/ability_prose.csv. Be sure to set the correct language
|
||
|
ID (which implies the language must be in the database).
|
||
|
Also be sure to have the correct column order: first an appropriately named
|
||
|
foreign key, then local_language_id, and then the text columns.
|
||
|
|
||
|
"""
|
||
|
|
||
|
# Everything related to Gettext files, and the CLI interface, is here.
|
||
|
# General message handling and CSV I/O is in the pokedex library.
|
||
|
|
||
|
# Notes on how we use PO format:
|
||
|
# The source information is stored in the occurences fields, using
|
||
|
# "table_name.column_name" for file and object ID for line number. This is used
|
||
|
# as a message key, instead of the source string. So it's important not to
|
||
|
# discard location information. It also means "obsolete" and "fuzzy" mean
|
||
|
# pretty much the same in our context.
|
||
|
#
|
||
|
# Also note that a pot file is just a po file with all strings untranslated.
|
||
|
# So some functions here will work on either.
|
||
|
#
|
||
|
# Gettext context (msgctxt) is written to the files so that tools don't merge
|
||
|
# unrelated strings together. It is ignored when reading the PO files.
|
||
|
|
||
|
# Also of note, "polib" means "(do) kiss!" in Czech.
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
import sys
|
||
|
from datetime import datetime
|
||
|
from optparse import OptionParser
|
||
|
from collections import defaultdict
|
||
|
|
||
|
import pkg_resources
|
||
|
|
||
|
from pokedex.db import tables, translations
|
||
|
from pokedex.defaults import get_default_csv_dir
|
||
|
|
||
|
try:
|
||
|
import polib
|
||
|
except ImportError:
|
||
|
if __name__ == '__main__':
|
||
|
exit('This utility needs polib installed.\n$ pip install polib')
|
||
|
raise
|
||
|
|
||
|
number_replacement_flag = '-pokedex-number-replacement'
|
||
|
|
||
|
default_gettext_directory = pkg_resources.resource_filename('pokedex', 'i18n')
|
||
|
|
||
|
mapped_class_dict = dict((c.__name__, c) for c in tables.mapped_classes)
|
||
|
for cls in tables.mapped_classes:
|
||
|
mapped_class_dict.update(dict((c.__name__, cls) for c in cls.translation_classes))
|
||
|
|
||
|
class PokedexPot(polib.POFile):
|
||
|
def __init__(self, name):
|
||
|
super(PokedexPot, self).__init__()
|
||
|
self.metadata = {
|
||
|
'Project-Id-Version': 'pokedex-%s 0.1' % name,
|
||
|
'Report-Msgid-Bugs-To': 'encukou@gmail.com',
|
||
|
'POT-Creation-Date': datetime.now().isoformat(),
|
||
|
'PO-Revision-Date': 'YEAR-MO-DA HO:MI+ZONE',
|
||
|
'MIME-Version': '1.0',
|
||
|
'Content-Type': 'text/plain; charset=utf-8',
|
||
|
'Content-Transfer-Encoding': '8bit',
|
||
|
'Generated-By': "The pokedex",
|
||
|
}
|
||
|
self.seen_entries = {}
|
||
|
|
||
|
def append(self, entry):
|
||
|
"""Append an entry. POEntries that only differ in numbers are merged.
|
||
|
|
||
|
For example "Route 1", "Route 2", etc. are replaced by a single
|
||
|
"Route {num}".
|
||
|
|
||
|
Multiple numbers might be replaced, for example in "{num}--{num}
|
||
|
different Unown caught"
|
||
|
|
||
|
Entries without numbers are merged as well (e.g. "Has no overworld
|
||
|
effect" appears quite a few times in in AbilityChangelog)
|
||
|
"""
|
||
|
replaced = translations.number_re.sub('{num}', entry.msgid)
|
||
|
try:
|
||
|
common_entry = self.seen_entries[(entry.msgctxt, replaced)]
|
||
|
except KeyError:
|
||
|
self.seen_entries[(entry.msgctxt, replaced)] = entry
|
||
|
else:
|
||
|
common_entry.occurrences += entry.occurrences
|
||
|
# Only now is the actual entry replaced. So we get
|
||
|
# "Route {num}", but "Porygon2" because there's no Porygon3.
|
||
|
common_entry.msgid = replaced
|
||
|
common_entry.msgstr = translations.number_re.sub('{num}', common_entry.msgstr)
|
||
|
if replaced != entry.msgid and number_replacement_flag not in common_entry.flags:
|
||
|
common_entry.flags.append(number_replacement_flag)
|
||
|
return
|
||
|
self += [entry]
|
||
|
|
||
|
class PotDict(dict):
|
||
|
"""A defaultdict of pot files"""
|
||
|
def __missing__(self, name):
|
||
|
pot = PokedexPot(name)
|
||
|
self[name] = pot
|
||
|
return pot
|
||
|
|
||
|
def yield_po_messages(pos):
|
||
|
"""Yield messages from all given .po files
|
||
|
"""
|
||
|
merger = translations.Merge()
|
||
|
for po in pos.values():
|
||
|
merger.add_iterator(_yield_one_po_messages(po, merger))
|
||
|
return merger
|
||
|
|
||
|
def entry_sort_key(entry):
|
||
|
try:
|
||
|
cls_col, line = entry.occurrences[0]
|
||
|
except IndexError:
|
||
|
return
|
||
|
else:
|
||
|
if line:
|
||
|
classname, col = cls_col.split('.')
|
||
|
fuzzy = entry.obsolete or 'fuzzy' in entry.flags
|
||
|
try:
|
||
|
cls = mapped_class_dict[classname]
|
||
|
except KeyError, k:
|
||
|
# Renamed table?
|
||
|
print 'Warning: Unknown class %s' % classname
|
||
|
return '', int(line), col, fuzzy
|
||
|
else:
|
||
|
return cls.__name__, int(line), col, fuzzy
|
||
|
|
||
|
def _yield_one_po_messages(pofile, merger):
|
||
|
# Yield messages from one po file
|
||
|
#
|
||
|
# Messages in our po files are ordered by the first occurrence.
|
||
|
# The occurrences of a single message are also ordered.
|
||
|
# So just merge all the subsequences as we go
|
||
|
for entry in sorted(pofile, key=entry_sort_key):
|
||
|
if entry.msgstr:
|
||
|
fuzzy = (entry.obsolete or 'fuzzy' in entry.flags)
|
||
|
messages = []
|
||
|
for occurrence in entry.occurrences:
|
||
|
cls_colname, id = occurrence
|
||
|
if id:
|
||
|
clsname, colname = cls_colname.split('.')
|
||
|
cls = mapped_class_dict[clsname]
|
||
|
messages.append(translations.Message(
|
||
|
mapped_class_dict[clsname].__name__,
|
||
|
int(id),
|
||
|
colname,
|
||
|
entry.msgstr,
|
||
|
source=entry.msgid,
|
||
|
number_replacement=number_replacement_flag in entry.flags,
|
||
|
origin='PO file',
|
||
|
fuzzy=fuzzy,
|
||
|
))
|
||
|
if messages[1:]:
|
||
|
# Spawn extra iterators before yielding
|
||
|
merger.add_iterator(messages[1:])
|
||
|
if messages:
|
||
|
yield messages[0]
|
||
|
|
||
|
def create_pots(source, *translation_streams):
|
||
|
"""Convert an iterator of Messages to a dictionary of pot/po files
|
||
|
|
||
|
If translations are given, they're merged, and any exact matches are put
|
||
|
in the po file. Give some for po files, don't give any for pot files.
|
||
|
"""
|
||
|
obsolete = []
|
||
|
pots = PotDict()
|
||
|
merged = translations.merge_translations(source, *translation_streams, unused=obsolete.append)
|
||
|
for source, sourcehash, string, exact in merged:
|
||
|
ctxt = '.'.join((source.cls, source.colname))
|
||
|
entry = polib.POEntry(
|
||
|
msgid=source.string,
|
||
|
occurrences=[(ctxt, source.id)],
|
||
|
msgctxt=ctxt,
|
||
|
)
|
||
|
if string:
|
||
|
entry.msgstr = string
|
||
|
if not exact:
|
||
|
entry.flags.append('fuzzy')
|
||
|
pots[source.pot].append(entry)
|
||
|
for message in obsolete:
|
||
|
ctxt = '.'.join((message.cls, message.colname))
|
||
|
entry = polib.POEntry(
|
||
|
msgid=message.source or '???',
|
||
|
occurrences=[(ctxt, message.id)],
|
||
|
msgctxt=ctxt,
|
||
|
obsolete=True,
|
||
|
)
|
||
|
return pots
|
||
|
|
||
|
def save_pots(pots, gettext_directory=default_gettext_directory):
|
||
|
"""Save pot files to a directory."""
|
||
|
for name, pot in pots.items():
|
||
|
pot.save(os.path.join(gettext_directory, 'pokedex-%s.pot' % name))
|
||
|
|
||
|
def save_pos(pos, lang, gettext_directory=default_gettext_directory):
|
||
|
"""Save po files to the appropriate directory."""
|
||
|
for name, po in pos.items():
|
||
|
po.save(os.path.join(gettext_directory, lang, 'pokedex-%s.po' % name))
|
||
|
|
||
|
def read_pots(directory=default_gettext_directory, extension='.pot'):
|
||
|
"""Read all files from the given directory with the given extension as pofiles
|
||
|
|
||
|
Works on pos or pots.
|
||
|
"""
|
||
|
pots = {}
|
||
|
for filename in os.listdir(directory):
|
||
|
basename, ext = os.path.splitext(filename)
|
||
|
if ext == extension:
|
||
|
pots[basename] = polib.pofile(os.path.join(directory, filename))
|
||
|
|
||
|
return pots
|
||
|
|
||
|
def all_langs(gettext_directory=default_gettext_directory):
|
||
|
return [
|
||
|
d for d in os.listdir(gettext_directory)
|
||
|
if os.path.isdir(os.path.join(gettext_directory, d))
|
||
|
]
|
||
|
|
||
|
def merge_pos(transl, lang, language_directory):
|
||
|
"""Update all po files for the given language
|
||
|
|
||
|
Takes into account the source, the official translations from the database,
|
||
|
the existing PO files, and the current translation CSV, in that order.
|
||
|
|
||
|
Returns a name -> pofile dict
|
||
|
"""
|
||
|
return create_pots(
|
||
|
transl.source,
|
||
|
transl.official_messages(lang),
|
||
|
yield_po_messages(pos=read_pots(language_directory, '.po')),
|
||
|
transl.yield_target_messages(lang),
|
||
|
)
|
||
|
|
||
|
def bar(fraction, size, done_char='=', split_char='|', notdone_char='-'):
|
||
|
"""Build an ASCII art progress bar
|
||
|
"""
|
||
|
size -= 1
|
||
|
if fraction == 1:
|
||
|
split_char = done_char
|
||
|
completed = int(round(size * fraction))
|
||
|
bar = [done_char] * completed
|
||
|
bar.append(split_char)
|
||
|
bar += notdone_char * (size - completed)
|
||
|
return ''.join(bar)
|
||
|
|
||
|
def print_stats(pos):
|
||
|
"""Print out some fun stats about a set of po files
|
||
|
"""
|
||
|
template = u"{0:>10}: {1:4}/{2:4} {3:6.2f}% [{4}]"
|
||
|
total_translated = 0
|
||
|
total = 0
|
||
|
for name, po in pos.items():
|
||
|
num_translated = len(po.translated_entries())
|
||
|
total_translated += num_translated
|
||
|
fraction_translated = 1. * num_translated / len(po)
|
||
|
total += len(po)
|
||
|
print template.format(
|
||
|
name,
|
||
|
num_translated,
|
||
|
len(po),
|
||
|
100 * fraction_translated,
|
||
|
bar(fraction_translated, 47),
|
||
|
).encode('utf-8')
|
||
|
fraction_translated = 1. * total_translated / total
|
||
|
print template.format(
|
||
|
'Total',
|
||
|
total_translated,
|
||
|
total,
|
||
|
100 * fraction_translated,
|
||
|
bar(fraction_translated, 47),
|
||
|
).encode('utf-8')
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = OptionParser(__doc__)
|
||
|
|
||
|
parser.add_option('-l', '--langs', dest='langs',
|
||
|
help="List of languages to handle, separated by commas (example: -l 'en,de,ja') (default: all in gettext directory)")
|
||
|
parser.add_option('-P', '--no-pots', dest='pots', action='store_false', default=True,
|
||
|
help='Do not create POT files (templates)')
|
||
|
parser.add_option('-p', '--no-pos', dest='pos', action='store_false', default=True,
|
||
|
help='Do not update PO files (message catalogs)')
|
||
|
|
||
|
parser.add_option('-c', '--no-csv', dest='csv', action='store_false', default=True,
|
||
|
help='Do not update pokedex translations files')
|
||
|
|
||
|
parser.add_option('-d', '--directory', dest='directory',
|
||
|
help='Veekun data directory')
|
||
|
parser.add_option('-L', '--source-language', dest='source_lang',
|
||
|
help="Source language identifier (default: 'en')")
|
||
|
|
||
|
parser.add_option('-g', '--gettext-dir', dest='gettext_directory', default=default_gettext_directory,
|
||
|
help='Gettext directory (default: pokedex/i18n/)')
|
||
|
|
||
|
parser.add_option('-q', '--quiet', dest='verbose', default=True, action='store_false',
|
||
|
help="Don't print what's going on")
|
||
|
|
||
|
options, arguments = parser.parse_args()
|
||
|
|
||
|
transl = translations.Translations.from_parsed_options(options)
|
||
|
|
||
|
gettext_directory = options.gettext_directory
|
||
|
if options.pots:
|
||
|
if options.verbose:
|
||
|
print 'Creating pots in', gettext_directory
|
||
|
save_pots(create_pots(transl.source), gettext_directory=gettext_directory)
|
||
|
|
||
|
if options.pos or options.csv:
|
||
|
# Merge in CSV files from command line
|
||
|
csv_streams = defaultdict(translations.Merge)
|
||
|
for argument in arguments:
|
||
|
# Add each message in its own stream, to sort them.
|
||
|
file = open(argument, 'rb')
|
||
|
with file:
|
||
|
for message in translations.yield_guessed_csv_messages(file):
|
||
|
lang = transl.language_identifiers[message.language_id]
|
||
|
csv_streams[lang].add_iterator([message])
|
||
|
streams = defaultdict(list)
|
||
|
for lang, stream in csv_streams.items():
|
||
|
streams[lang].append(stream)
|
||
|
|
||
|
# Merge in the PO files
|
||
|
if options.langs:
|
||
|
langs = options.langs.split(',')
|
||
|
else:
|
||
|
langs = all_langs(gettext_directory)
|
||
|
|
||
|
for lang in langs:
|
||
|
language_directory = os.path.join(gettext_directory, lang)
|
||
|
if options.verbose:
|
||
|
print 'Merging translations for %s in %s' % (lang, language_directory)
|
||
|
pos = merge_pos(transl, lang, language_directory)
|
||
|
|
||
|
if options.pos:
|
||
|
if options.verbose:
|
||
|
print 'Writing POs for %s' % lang
|
||
|
save_pos(pos, lang, gettext_directory=gettext_directory)
|
||
|
|
||
|
if options.verbose:
|
||
|
print_stats(pos)
|
||
|
|
||
|
streams[lang].append(yield_po_messages(pos))
|
||
|
|
||
|
if options.csv:
|
||
|
for lang, lang_streams in streams.items():
|
||
|
if options.verbose:
|
||
|
print "Merging %s translation stream/s for '%s'" % (len(lang_streams), lang)
|
||
|
existing_messages = list(transl.yield_target_messages(lang))
|
||
|
lang_streams.append(existing_messages)
|
||
|
transl.write_translations(lang, *lang_streams)
|