Merge remote-tracking branch 'origin/encukou'

2024-08-20 18:16:34 +00:00 · 2011-03-29 08:06:34 -07:00 · 2011-03-29 08:06:34 -07:00 · 4445305e7c
commit 4445305e7c
parent c91da22989 c25db1d2cf
4 changed files with 298 additions and 165 deletions
--- a/pokedex/init.py
+++ b/pokedex/init.py
@ -122,6 +122,8 @@ def command_load(*args):
    parser = get_parser(verbose=True)
    parser.add_option('-d', '--directory', dest='directory', default=None)
    parser.add_option('-D', '--drop-tables', dest='drop_tables', default=False, action='store_true')
    parser.add_option('-S', '--safe', dest='safe', default=False, action='store_true',
        help="Do not use backend-specific optimalizations.")
    options, tables = parser.parse_args(list(args))
    if not options.engine_uri:
@ -138,7 +140,7 @@ def command_load(*args):
                                  drop_tables=options.drop_tables,
                                  tables=tables,
                                  verbose=options.verbose,
-                                  safe=False)
+                                  safe=options.safe)
 def command_reindex(*args):
    parser = get_parser(verbose=True)
--- a/pokedex/db/load.py
+++ b/pokedex/db/load.py
@ -140,12 +140,16 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
    # Drop all tables if requested
    if drop_tables:
        print_start('Dropping tables')
-        for table in reversed(table_objs):
+        for n, table in enumerate(reversed(table_objs)):
            table.drop(checkfirst=True)
            print_status('%s/%s' % (n, len(table_objs)))
        print_done()
-    for table in table_objs:
+    print_start('Creating tables')
    for n, table in enumerate(table_objs):
        table.create()
        print_status('%s/%s' % (n, len(table_objs)))
    print_done()
    connection = session.connection()
    # Okay, run through the tables and actually load the data now
@ -168,6 +172,36 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
        reader = csv.reader(csvfile, lineterminator='\n')
        column_names = [unicode(column) for column in reader.next()]
        if not safe and session.connection().dialect.name == 'postgresql':
            """
            Postgres' CSV dialect is nearly the same as ours, except that it
            treats completely empty values as NULL, and empty quoted
            strings ("") as an empty strings.
            Pokedex dump does not quote empty strings. So, both empty strings
            and NULLs are read in as NULL.
            For an empty string in a NOT NULL column, the load will fail, and
            load will fall back to the cross-backend row-by-row loading. And in
            nullable columns, we already load empty stings as NULL.
            """
            session.commit()
            not_null_cols = [c for c in column_names if not table_obj.c[c].nullable]
            if not_null_cols:
                force_not_null = 'FORCE NOT NULL ' + ','.join('"%s"' % c for c in not_null_cols)
            else:
                force_not_null = ''
            command = "COPY {table_name} ({columns}) FROM '{csvpath}' CSV HEADER {force_not_null}"
            session.connection().execute(
                    command.format(
                            table_name=table_name,
                            csvpath=csvpath,
                            columns=','.join('"%s"' % c for c in column_names),
                            force_not_null=force_not_null,
                        )
                )
            session.commit()
            print_done()
            continue
        # Self-referential tables may contain rows with foreign keys of other
        # rows in the same table that do not yet exist.  Pull these out and add
        # them to the session last
--- a/pokedex/roomaji.py
+++ b/pokedex/roomaji.py
@ -1,173 +1,244 @@
 # encoding: utf8
-"""Provides `romanize()` for romanizing simple Japanese text."""
+"""Provides `romanize()` for romanizing simple Japanese text.
-_roomaji_kana = {
+Also provides available romanizers in a dictionary keyed by language identifier.
-    # Hiragana
+"""
    u'あ': 'a',     u'い': 'i',     u'う': 'u',     u'え': 'e',     u'お': 'o',
    u'か': 'ka',    u'き': 'ki',    u'く': 'ku',    u'け': 'ke',    u'こ': 'ko',
    u'さ': 'sa',    u'し': 'shi',   u'す': 'su',    u'せ': 'se',    u'そ': 'so',
    u'た': 'ta',    u'ち': 'chi',   u'つ': 'tsu',   u'て': 'te',    u'と': 'to',
    u'な': 'na',    u'に': 'ni',    u'ぬ': 'nu',    u'ね': 'ne',    u'の': 'no',
    u'は': 'ha',    u'ひ': 'hi',    u'ふ': 'fu',    u'へ': 'he',    u'ほ': 'ho',
    u'ま': 'ma',    u'み': 'mi',    u'む': 'mu',    u'め': 'me',    u'も': 'mo',
    u'や': 'ya',                    u'ゆ': 'yu',                    u'よ': 'yo',
    u'ら': 'ra',    u'り': 'ri',    u'る': 'ru',    u'れ': 're',    u'ろ': 'ro',
    u'わ': 'wa',    u'ゐ': 'wi',                    u'ゑ': 'we',    u'を': 'wo',
                                                                    u'ん': 'n',
    u'が': 'ga',    u'ぎ': 'gi',    u'ぐ': 'gu',    u'げ': 'ge',    u'ご': 'go',
    u'ざ': 'za',    u'じ': 'ji',    u'ず': 'zu',    u'ぜ': 'ze',    u'ぞ': 'zo',
    u'だ': 'da',    u'ぢ': 'ji',    u'づ': 'dzu',   u'で': 'de',    u'ど': 'do',
    u'ば': 'ba',    u'び': 'bi',    u'ぶ': 'bu',    u'べ': 'be',    u'ぼ': 'bo',
    u'ぱ': 'pa',    u'ぴ': 'pi',    u'ぷ': 'pu',    u'ぺ': 'pe',    u'ぽ': 'po',
-    # Katakana
+class Romanizer(object):
-    u'ア': 'a',     u'イ': 'i',     u'ウ': 'u',     u'エ': 'e',     u'オ': 'o',
+    def __init__(self, parent=None, **tables):
-    u'カ': 'ka',    u'キ': 'ki',    u'ク': 'ku',    u'ケ': 'ke',    u'コ': 'ko',
+        """Create a Romanizer
    u'サ': 'sa',    u'シ': 'shi',   u'ス': 'su',    u'セ': 'se',    u'ソ': 'so',
    u'タ': 'ta',    u'チ': 'chi',   u'ツ': 'tsu',   u'テ': 'te',    u'ト': 'to',
    u'ナ': 'na',    u'ニ': 'ni',    u'ヌ': 'nu',    u'ネ': 'ne',    u'ノ': 'no',
    u'ハ': 'ha',    u'ヒ': 'hi',    u'フ': 'fu',    u'ヘ': 'he',    u'ホ': 'ho',
    u'マ': 'ma',    u'ミ': 'mi',    u'ム': 'mu',    u'メ': 'me',    u'モ': 'mo',
    u'ヤ': 'ya',                    u'ユ': 'yu',                    u'ヨ': 'yo',
    u'ラ': 'ra',    u'リ': 'ri',    u'ル': 'ru',    u'レ': 're',    u'ロ': 'ro',
    u'ワ': 'wa',    u'ヰ': 'wi',                    u'ヱ': 'we',    u'ヲ': 'wo',
                                                                    u'ン': 'n',
    u'ガ': 'ga',    u'ギ': 'gi',    u'グ': 'gu',    u'ゲ': 'ge',    u'ゴ': 'go',
    u'ザ': 'za',    u'ジ': 'ji',    u'ズ': 'zu',    u'ゼ': 'ze',    u'ゾ': 'zo',
    u'ダ': 'da',    u'ヂ': 'ji',    u'ヅ': 'dzu',   u'デ': 'de',    u'ド': 'do',
    u'バ': 'ba',    u'ビ': 'bi',    u'ブ': 'bu',    u'ベ': 'be',    u'ボ': 'bo',
    u'パ': 'pa',    u'ピ': 'pi',    u'プ': 'pu',    u'ペ': 'pe',    u'ポ': 'po',
                                    u'ヴ': 'vu',
 }
-_roomaji_youon = {
+        parent: A LookupTables to base this one on
-    # Hiragana
+        tables: Dicts that become the object's attributes. If a parent is given,
-    u'ゃ': 'ya',                    u'ゅ': 'yu',                    u'ょ': 'yo',
+            its tables are used, and updated with the given ones
-
+        """
-    # Katakana
+        self.parent = parent
-    u'ャ': 'ya',                    u'ュ': 'yu',                    u'ョ': 'yo',
+        if parent:
-}
+            self.tables = parent.tables
-
+            for name, table in tables.items():
-# XXX If romanize() ever handles hiragana, it will need to make sure that the
+                # Take a copy -- don't want to clobber the parent's tables
-# preceding character was a katakana
+                self.tables[name] = dict(self.tables[name])
-# This does not include every small kana combination, but should include every
+                self.tables[name].update(table)
 # one used in a Pokémon name.  An exhaustive list would be..  very long
 _roomaji_small_kana = {
    u'ァ': 'a',     u'ィ': 'i',     u'ゥ': 'u',     u'ェ': 'e',     u'ォ': 'o',
 }
 _roomaji_small_kana_combos = {
    # These are, by the way, fairly arbitrary.  "shi xi" to mean "sy" is
    # particularly weird, but it seems to be what GF intends
    # Simple vowel replacement
                    u'ウィ': 'wi',  u'ウゥ': 'wu',  u'ウェ': 'we',  u'ウォ': 'wo',
    u'ヴァ': 'va',  u'ヴィ': 'vi',                  u'ヴェ': 've',  u'ヴォ': 'vo',
                                                    u'チェ': 'che',
                                                    u'シェ': 'she',
                                                    u'ジェ': 'je',
    u'テァ': 'tha', u'ティ': 'ti',  u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
    u'デァ': 'dha', u'ディ': 'di',  u'デゥ': 'dhu', u'デェ': 'dye', u'デォ': 'dho',
    u'ファ': 'fa',  u'フィ': 'fi',  u'ホゥ': 'hu',  u'フェ': 'fe',  u'フォ': 'fo',
    # Not so much
    u'シィ': 'sy',
    u'ミィ': 'my',
    u'ビィ': 'by',
    u'ピィ': 'py',
 }
 def romanize(string):
    """Converts a string of kana to roomaji."""
    vowels = ['a', 'e', 'i', 'o', 'u', 'y']
    characters = []
    last_kana = None  # Used for ー; っ or ッ; ん or ン
    last_char = None  # Used for small kana combos
    for char in string:
        # Full-width Latin
        if 0xff01 <= ord(char) <= 0xff5e:
            if last_kana == 'sokuon':
                raise ValueError("Sokuon cannot precede Latin characters.")
            # XXX Real Unicode decomposition would be nicer
            char = chr(ord(char) - 0xff01 + 0x21)
            characters.append(char)
            last_kana = None
        # Small vowel kana
        elif char in _roomaji_small_kana:
            combo = last_char + char
            if combo in _roomaji_small_kana_combos:
                characters[-1] = _roomaji_small_kana_combos[combo]
            else:
                # If we don't know what it is...  act dumb and treat it as a
                # full-size vowel.  Better than bailing, and seems to occur a
                # lot, e.g. ピィ is "pii"
                characters.append(_roomaji_small_kana[char])
            last_kana = _roomaji_small_kana[char]
        # Youon
        elif char in _roomaji_youon:
            if not last_kana or last_kana[-1] != 'i' or last_kana == 'i':
                raise ValueError("Youon must follow an -i sound.")
            # Drop the -i and append the ya/yu/yo sound
            new_sound = _roomaji_youon[char]
            if last_kana in ['chi', 'shi', 'ji']:
                # Strip the y-
                new_char = last_kana[:-1] + new_sound[1:]
            else:
                new_char = last_kana[:-1] + new_sound
            characters[-1] = new_char
            last_kana = new_char
        # Sokuon
        elif char in (u'っ', u'ッ'):
            # Remember it and double the consonant next time around
            last_kana = 'sokuon'
        # Extended vowel or n
        elif char == u'ー':
            if last_kana[-1] not in vowels:
                raise ValueError(u"'ー' must follow by a vowel.")
            characters.append(last_kana[-1])
            last_kana = None
        # Regular ol' kana
        elif char in _roomaji_kana:
            kana = _roomaji_kana[char]
            if last_kana == 'sokuon':
                if kana[0] in vowels:
                    raise ValueError("Sokuon cannot precede a vowel.")
                characters.append(kana[0])
            elif last_kana == 'n' and kana[0] in vowels:
                characters.append("'")
            characters.append(kana)
            last_kana = kana
        # Not Japanese?
        else:
-            if last_kana == 'sokuon':
+            self.tables = tables
                raise ValueError("Sokuon must be followed by another kana.")
-            characters.append(char)
+        for name, table in self.tables.items():
            setattr(self, name, table)
-            last_kana = None
+    def romanize(self, string):
        """Convert a string of kana to roomaji."""
-        last_char = char
+        vowels = ['a', 'e', 'i', 'o', 'u', 'y']
        characters = []
        last_kana = None  # Used for ー; っ or ッ; ん or ン
        last_char = None  # Used for small kana combos
        for char in string:
            # Full-width Latin
            if 0xff01 <= ord(char) <= 0xff5e:
                if last_kana == 'sokuon':
                    raise ValueError("Sokuon cannot precede Latin characters.")
                # XXX Real Unicode decomposition would be nicer
                char = chr(ord(char) - 0xff01 + 0x21)
                characters.append(char)
                last_kana = None
            # Small vowel kana
            elif char in self.roomaji_small_kana:
                combo = last_char + char
                if combo in self.roomaji_small_kana_combos:
                    characters[-1] = self.roomaji_small_kana_combos[combo]
                else:
                    # If we don't know what it is...  act dumb and treat it as a
                    # full-size vowel.  Better than bailing, and seems to occur a
                    # lot, e.g. ピィ is "pii"
                    characters.append(self.roomaji_small_kana[char])
                last_kana = self.roomaji_small_kana[char]
            # Youon
            elif char in self.roomaji_youon:
                if not last_kana or last_kana[-1] != 'i' or last_kana == 'i':
                    raise ValueError("Youon must follow an -i sound.")
                # Drop the -i and append the ya/yu/yo sound
                new_sound = self.roomaji_youon[char]
                if last_kana in self.y_drop:
                    # Strip the y-
                    new_char = self.y_drop[last_kana] + new_sound[1:]
                else:
                    new_char = last_kana[:-1] + new_sound
                characters[-1] = new_char
                last_kana = new_char
            # Sokuon
            elif char in (u'っ', u'ッ'):
                # Remember it and double the consonant next time around
                last_kana = 'sokuon'
            # Extended vowel or n
            elif char == u'ー':
                if last_kana[-1] not in vowels:
                    raise ValueError(u"'ー' must follow by a vowel.")
                if last_kana[-1] in self.lengthened_vowels:
                    characters[-1] = characters[-1][:-1]
                    characters.append(self.lengthened_vowels[last_kana[-1]])
                else:
                    characters.append(last_kana[-1])
                last_kana = None
            # Regular ol' kana
            elif char in self.roomaji_kana:
                kana = self.roomaji_kana[char]
                if last_kana == 'sokuon':
                    if kana[0] in vowels:
                        raise ValueError("Sokuon cannot precede a vowel.")
                    characters.append(kana[0])
                elif last_kana == 'n' and kana[0] in vowels:
                    characters.append("'")
                # Special characters fo doubled kana
                if kana[0] in self.lengthened_vowels and characters and kana == characters[-1][-1]:
                    kana = self.lengthened_vowels[kana[0]]
                    characters[-1] = characters[-1][:-1]
                characters.append(kana)
                last_kana = kana
            # Not Japanese?
            else:
                if last_kana == 'sokuon':
                    raise ValueError("Sokuon must be followed by another kana.")
                characters.append(char)
                last_kana = None
            last_char = char
-    if last_kana == 'sokuon':
+        if last_kana == 'sokuon':
-        raise ValueError("Sokuon cannot be the last character.")
+            raise ValueError("Sokuon cannot be the last character.")
-    return unicode(''.join(characters))
+        return unicode(''.join(characters))
 romanizers = dict()
 romanizers['en'] = Romanizer(
    roomaji_kana={
        # Hiragana
        u'あ': 'a',     u'い': 'i',     u'う': 'u',     u'え': 'e',     u'お': 'o',
        u'か': 'ka',    u'き': 'ki',    u'く': 'ku',    u'け': 'ke',    u'こ': 'ko',
        u'さ': 'sa',    u'し': 'shi',   u'す': 'su',    u'せ': 'se',    u'そ': 'so',
        u'た': 'ta',    u'ち': 'chi',   u'つ': 'tsu',   u'て': 'te',    u'と': 'to',
        u'な': 'na',    u'に': 'ni',    u'ぬ': 'nu',    u'ね': 'ne',    u'の': 'no',
        u'は': 'ha',    u'ひ': 'hi',    u'ふ': 'fu',    u'へ': 'he',    u'ほ': 'ho',
        u'ま': 'ma',    u'み': 'mi',    u'む': 'mu',    u'め': 'me',    u'も': 'mo',
        u'や': 'ya',                    u'ゆ': 'yu',                    u'よ': 'yo',
        u'ら': 'ra',    u'り': 'ri',    u'る': 'ru',    u'れ': 're',    u'ろ': 'ro',
        u'わ': 'wa',    u'ゐ': 'wi',                    u'ゑ': 'we',    u'を': 'wo',
                                                                        u'ん': 'n',
        u'が': 'ga',    u'ぎ': 'gi',    u'ぐ': 'gu',    u'げ': 'ge',    u'ご': 'go',
        u'ざ': 'za',    u'じ': 'ji',    u'ず': 'zu',    u'ぜ': 'ze',    u'ぞ': 'zo',
        u'だ': 'da',    u'ぢ': 'ji',    u'づ': 'dzu',   u'で': 'de',    u'ど': 'do',
        u'ば': 'ba',    u'び': 'bi',    u'ぶ': 'bu',    u'べ': 'be',    u'ぼ': 'bo',
        u'ぱ': 'pa',    u'ぴ': 'pi',    u'ぷ': 'pu',    u'ぺ': 'pe',    u'ぽ': 'po',
        # Katakana
        u'ア': 'a',     u'イ': 'i',     u'ウ': 'u',     u'エ': 'e',     u'オ': 'o',
        u'カ': 'ka',    u'キ': 'ki',    u'ク': 'ku',    u'ケ': 'ke',    u'コ': 'ko',
        u'サ': 'sa',    u'シ': 'shi',   u'ス': 'su',    u'セ': 'se',    u'ソ': 'so',
        u'タ': 'ta',    u'チ': 'chi',   u'ツ': 'tsu',   u'テ': 'te',    u'ト': 'to',
        u'ナ': 'na',    u'ニ': 'ni',    u'ヌ': 'nu',    u'ネ': 'ne',    u'ノ': 'no',
        u'ハ': 'ha',    u'ヒ': 'hi',    u'フ': 'fu',    u'ヘ': 'he',    u'ホ': 'ho',
        u'マ': 'ma',    u'ミ': 'mi',    u'ム': 'mu',    u'メ': 'me',    u'モ': 'mo',
        u'ヤ': 'ya',                    u'ユ': 'yu',                    u'ヨ': 'yo',
        u'ラ': 'ra',    u'リ': 'ri',    u'ル': 'ru',    u'レ': 're',    u'ロ': 'ro',
        u'ワ': 'wa',    u'ヰ': 'wi',                    u'ヱ': 'we',    u'ヲ': 'wo',
                                                                        u'ン': 'n',
        u'ガ': 'ga',    u'ギ': 'gi',    u'グ': 'gu',    u'ゲ': 'ge',    u'ゴ': 'go',
        u'ザ': 'za',    u'ジ': 'ji',    u'ズ': 'zu',    u'ゼ': 'ze',    u'ゾ': 'zo',
        u'ダ': 'da',    u'ヂ': 'ji',    u'ヅ': 'dzu',   u'デ': 'de',    u'ド': 'do',
        u'バ': 'ba',    u'ビ': 'bi',    u'ブ': 'bu',    u'ベ': 'be',    u'ボ': 'bo',
        u'パ': 'pa',    u'ピ': 'pi',    u'プ': 'pu',    u'ペ': 'pe',    u'ポ': 'po',
                                        u'ヴ': 'vu',
    },
    roomaji_youon={
        # Hiragana
        u'ゃ': 'ya',                    u'ゅ': 'yu',                    u'ょ': 'yo',
        # Katakana
        u'ャ': 'ya',                    u'ュ': 'yu',                    u'ョ': 'yo',
    },
    # XXX If romanize() ever handles hiragana, it will need to make sure that the
    # preceding character was a katakana
    # This does not include every small kana combination, but should include every
    # one used in a Pokémon name.  An exhaustive list would be..  very long
    roomaji_small_kana={
        u'ァ': 'a',     u'ィ': 'i',     u'ゥ': 'u',     u'ェ': 'e',     u'ォ': 'o',
    },
    roomaji_small_kana_combos={
        # These are, by the way, fairly arbitrary.  "shi xi" to mean "sy" is
        # particularly weird, but it seems to be what GF intends
        # Simple vowel replacement
                        u'ウィ': 'wi',  u'ウゥ': 'wu',  u'ウェ': 'we',  u'ウォ': 'wo',
        u'ヴァ': 'va',  u'ヴィ': 'vi',                  u'ヴェ': 've',  u'ヴォ': 'vo',
                                                        u'チェ': 'che',
                                                        u'シェ': 'she',
                                                        u'ジェ': 'je',
        u'テァ': 'tha', u'ティ': 'ti',  u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
        u'デァ': 'dha', u'ディ': 'di',  u'デゥ': 'dhu', u'デェ': 'dye', u'デォ': 'dho',
        u'ファ': 'fa',  u'フィ': 'fi',  u'ホゥ': 'hu',  u'フェ': 'fe',  u'フォ': 'fo',
        # Not so much
        u'シィ': 'sy',
        u'ミィ': 'my',
        u'ビィ': 'by',
        u'ピィ': 'py',
    },
    lengthened_vowels={},
    y_drop={'chi': 'ch', 'shi': 'sh', 'ji': 'j'},
 )
 romanizers['cs'] = Romanizer(parent=romanizers['en'],
    roomaji_kana={
        u'し': u'ši', u'ち': u'či', u'つ': u'cu',
        u'や': u'ja', u'ゆ': u'ju', u'よ': u'jo',
        u'じ': u'dži', u'ぢ': u'dži',
        u'シ': u'ši', u'チ': u'či', u'ツ': u'cu',
        u'ヤ': u'ja', u'ユ': u'ju', u'ヨ': 'jo',
        u'ジ': u'dži', u'ヂ': u'dži',
    },
    roomaji_youon={
        u'ゃ': 'ja', u'ゅ': 'ju', u'ょ': 'jo',
        u'ャ': 'ja', u'ュ': 'ju', u'ョ': 'jo',
    },
    roomaji_small_kana_combos={
        u'チェ': u'če', u'シェ': u'še', u'ジェ': u'dže',
        u'テェ': u'tje', u'デェ': u'dje',
        u'シィ': u'sí', u'ミィ': u'mí', u'ビィ': u'bí', u'ピィ': u'pí',
    },
    lengthened_vowels={'a': u'á', 'e': u'é', 'i': u'í', 'o': u'ó', 'u': u'ú'},
    y_drop={u'či': u'č', u'ši': u'š', u'dži': u'dž', u'ni': u'ňj'},
 )
 def romanize(string, lang='en'):
    """Convert a string of kana to roomaji."""
    # Get the correct romanizer; fall back to English
    romanizer = romanizers.get(lang, 'en')
    # Romanize away!
    return romanizer.romanize(string)
--- a/pokedex/tests/test_roomaji.py
+++ b/pokedex/tests/test_roomaji.py
@ -12,6 +12,7 @@ def test_roomaji():
        # Elongated vowel
        (u'イーブイ',           'iibui'),
        (u'ホーホー',           'hoohoo'),
        (u'ピカチュウ',         u'pikachuu'),
        # Combined characters
        (u'ニャース',           'nyaasu'),
@ -28,3 +29,28 @@ def test_roomaji():
    for kana, roomaji in tests:
        result = pokedex.roomaji.romanize(kana)
        assert_equal(result, roomaji, u"'%s' romanizes correctly" % roomaji)
 def test_roomaji_cs():
    tests = [
        (u'ヤミカラス',         u'jamikarasu'),
        # Elongated vowel
        (u'イーブイ',           u'íbui'),
        (u'ホーホー',           u'hóhó'),
        (u'ピカチュウ',         u'pikačú'),
        # Combined characters
        (u'ニャース',           u'ňjásu'),
        (u'ジャ',              u'dža'),
        (u'ぎゃくてん',         u'gjakuten'),
        (u'ウェザーボール',     u'wezábóru'),
        # Special katakana combinations
        (u'ラティアス',         u'ratiasu'),
        (u'ウィー',             u'wí'),
        (u'セレビィ',           u'serebí'),
    ]
    for kana, roomaji in tests:
        result = pokedex.roomaji.romanize(kana, 'cs')
        assert_equal(result, roomaji, u"'%s' romanizes correctly for Czech" % roomaji)