Added hiragana support to roomaji.py. #100

This commit is contained in:
Eevee 2009-09-28 21:29:01 -07:00
parent c66fe4042c
commit 28be0b3c60
2 changed files with 54 additions and 1 deletions

View file

@ -2,6 +2,25 @@
"""Provides `romanize()` for romanizing simple Japanese text.""" """Provides `romanize()` for romanizing simple Japanese text."""
_roomaji_kana = { _roomaji_kana = {
# Hiragana
u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o',
u'': 'ka', u'': 'ki', u'': 'ku', u'': 'ke', u'': 'ko',
u'': 'sa', u'': 'shi', u'': 'su', u'': 'se', u'': 'so',
u'': 'ta', u'': 'chi', u'': 'tsu', u'': 'te', u'': 'to',
u'': 'na', u'': 'ni', u'': 'nu', u'': 'ne', u'': 'no',
u'': 'ha', u'': 'hi', u'': 'fu', u'': 'he', u'': 'ho',
u'': 'ma', u'': 'mi', u'': 'mu', u'': 'me', u'': 'mo',
u'': 'ya', u'': 'yu', u'': 'yo',
u'': 'ra', u'': 'ri', u'': 'ru', u'': 're', u'': 'ro',
u'': 'wa', u'': 'wi', u'': 'we', u'': 'wo',
u'': 'n',
u'': 'ga', u'': 'gi', u'': 'gu', u'': 'ge', u'': 'go',
u'': 'za', u'': 'ji', u'': 'zu', u'': 'ze', u'': 'zo',
u'': 'da', u'': 'ji', u'': 'dzu', u'': 'de', u'': 'do',
u'': 'ba', u'': 'bi', u'': 'bu', u'': 'be', u'': 'bo',
u'': 'pa', u'': 'pi', u'': 'pu', u'': 'pe', u'': 'po',
# Katakana
u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o', u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o',
u'': 'ka', u'': 'ki', u'': 'ku', u'': 'ke', u'': 'ko', u'': 'ka', u'': 'ki', u'': 'ku', u'': 'ke', u'': 'ko',
u'': 'sa', u'': 'shi', u'': 'su', u'': 'se', u'': 'so', u'': 'sa', u'': 'shi', u'': 'su', u'': 'se', u'': 'so',
@ -21,8 +40,11 @@ _roomaji_kana = {
} }
_roomaji_youon = { _roomaji_youon = {
# Hiragana
u'': 'ya', u'': 'yu', u'': 'yo',
# Katakana
u'': 'ya', u'': 'yu', u'': 'yo', u'': 'ya', u'': 'yu', u'': 'yo',
#u'ゃ': 'ya', u'ゅ': 'yu', u'ょ': 'yo',
} }
# XXX If romanize() ever handles hiragana, it will need to make sure that the # XXX If romanize() ever handles hiragana, it will need to make sure that the
@ -33,6 +55,7 @@ _roomaji_small_kana = {
u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o', u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o',
} }
_roomaji_small_kana_combos = { _roomaji_small_kana_combos = {
u'ウィ': 'wi',
u'チェ': 'che', u'チェ': 'che',
u'シェ': 'she', u'シェ': 'she',
u'テァ': 'tha', u'ティ': 'ti', u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho', u'テァ': 'tha', u'ティ': 'ti', u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
@ -71,6 +94,8 @@ def romanize(string):
# lot, e.g. ピィ is "pii" # lot, e.g. ピィ is "pii"
characters.append(_roomaji_small_kana[char]) characters.append(_roomaji_small_kana[char])
last_kana = _roomaji_small_kana[char]
# Youon # Youon
elif char in _roomaji_youon: elif char in _roomaji_youon:
if not last_kana or last_kana[-1] != 'i' or last_kana == 'i': if not last_kana or last_kana[-1] != 'i' or last_kana == 'i':

View file

@ -0,0 +1,28 @@
# encoding: utf8
from nose.tools import *
import unittest
import pokedex.roomaji
def test_roomaji():
tests = [
(u'ヤミカラス', 'yamikarasu'),
# Elongated vowel
(u'イーブイ', 'iibui'),
(u'ホーホー', 'hoohoo'),
# Combined characters
(u'ニャース', 'nyaasu'),
(u'ジャ', 'ja'),
(u'ぎゃくてん', 'gyakuten'),
# Special katakana combinations
(u'ラティアス', 'ratiasu'),
(u'ウィー', 'wii'),
]
for kana, roomaji in tests:
result = pokedex.roomaji.romanize(kana)
assert_equal(result, roomaji, u"'%s' romanizes correctly" % roomaji)