mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Added hiragana support to roomaji.py. #100
This commit is contained in:
parent
c66fe4042c
commit
28be0b3c60
2 changed files with 54 additions and 1 deletions
|
@ -2,6 +2,25 @@
|
||||||
"""Provides `romanize()` for romanizing simple Japanese text."""
|
"""Provides `romanize()` for romanizing simple Japanese text."""
|
||||||
|
|
||||||
_roomaji_kana = {
|
_roomaji_kana = {
|
||||||
|
# Hiragana
|
||||||
|
u'あ': 'a', u'い': 'i', u'う': 'u', u'え': 'e', u'お': 'o',
|
||||||
|
u'か': 'ka', u'き': 'ki', u'く': 'ku', u'け': 'ke', u'こ': 'ko',
|
||||||
|
u'さ': 'sa', u'し': 'shi', u'す': 'su', u'せ': 'se', u'そ': 'so',
|
||||||
|
u'た': 'ta', u'ち': 'chi', u'つ': 'tsu', u'て': 'te', u'と': 'to',
|
||||||
|
u'な': 'na', u'に': 'ni', u'ぬ': 'nu', u'ね': 'ne', u'の': 'no',
|
||||||
|
u'は': 'ha', u'ひ': 'hi', u'ふ': 'fu', u'へ': 'he', u'ほ': 'ho',
|
||||||
|
u'ま': 'ma', u'み': 'mi', u'む': 'mu', u'め': 'me', u'も': 'mo',
|
||||||
|
u'や': 'ya', u'ゆ': 'yu', u'よ': 'yo',
|
||||||
|
u'ら': 'ra', u'り': 'ri', u'る': 'ru', u'れ': 're', u'ろ': 'ro',
|
||||||
|
u'わ': 'wa', u'ゐ': 'wi', u'ゑ': 'we', u'を': 'wo',
|
||||||
|
u'ん': 'n',
|
||||||
|
u'が': 'ga', u'ぎ': 'gi', u'ぐ': 'gu', u'げ': 'ge', u'ご': 'go',
|
||||||
|
u'ざ': 'za', u'じ': 'ji', u'ず': 'zu', u'ぜ': 'ze', u'ぞ': 'zo',
|
||||||
|
u'だ': 'da', u'ぢ': 'ji', u'づ': 'dzu', u'で': 'de', u'ど': 'do',
|
||||||
|
u'ば': 'ba', u'び': 'bi', u'ぶ': 'bu', u'べ': 'be', u'ぼ': 'bo',
|
||||||
|
u'ぱ': 'pa', u'ぴ': 'pi', u'ぷ': 'pu', u'ぺ': 'pe', u'ぽ': 'po',
|
||||||
|
|
||||||
|
# Katakana
|
||||||
u'ア': 'a', u'イ': 'i', u'ウ': 'u', u'エ': 'e', u'オ': 'o',
|
u'ア': 'a', u'イ': 'i', u'ウ': 'u', u'エ': 'e', u'オ': 'o',
|
||||||
u'カ': 'ka', u'キ': 'ki', u'ク': 'ku', u'ケ': 'ke', u'コ': 'ko',
|
u'カ': 'ka', u'キ': 'ki', u'ク': 'ku', u'ケ': 'ke', u'コ': 'ko',
|
||||||
u'サ': 'sa', u'シ': 'shi', u'ス': 'su', u'セ': 'se', u'ソ': 'so',
|
u'サ': 'sa', u'シ': 'shi', u'ス': 'su', u'セ': 'se', u'ソ': 'so',
|
||||||
|
@ -21,8 +40,11 @@ _roomaji_kana = {
|
||||||
}
|
}
|
||||||
|
|
||||||
_roomaji_youon = {
|
_roomaji_youon = {
|
||||||
|
# Hiragana
|
||||||
|
u'ゃ': 'ya', u'ゅ': 'yu', u'ょ': 'yo',
|
||||||
|
|
||||||
|
# Katakana
|
||||||
u'ャ': 'ya', u'ュ': 'yu', u'ョ': 'yo',
|
u'ャ': 'ya', u'ュ': 'yu', u'ョ': 'yo',
|
||||||
#u'ゃ': 'ya', u'ゅ': 'yu', u'ょ': 'yo',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# XXX If romanize() ever handles hiragana, it will need to make sure that the
|
# XXX If romanize() ever handles hiragana, it will need to make sure that the
|
||||||
|
@ -33,6 +55,7 @@ _roomaji_small_kana = {
|
||||||
u'ァ': 'a', u'ィ': 'i', u'ゥ': 'u', u'ェ': 'e', u'ォ': 'o',
|
u'ァ': 'a', u'ィ': 'i', u'ゥ': 'u', u'ェ': 'e', u'ォ': 'o',
|
||||||
}
|
}
|
||||||
_roomaji_small_kana_combos = {
|
_roomaji_small_kana_combos = {
|
||||||
|
u'ウィ': 'wi',
|
||||||
u'チェ': 'che',
|
u'チェ': 'che',
|
||||||
u'シェ': 'she',
|
u'シェ': 'she',
|
||||||
u'テァ': 'tha', u'ティ': 'ti', u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
|
u'テァ': 'tha', u'ティ': 'ti', u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
|
||||||
|
@ -71,6 +94,8 @@ def romanize(string):
|
||||||
# lot, e.g. ピィ is "pii"
|
# lot, e.g. ピィ is "pii"
|
||||||
characters.append(_roomaji_small_kana[char])
|
characters.append(_roomaji_small_kana[char])
|
||||||
|
|
||||||
|
last_kana = _roomaji_small_kana[char]
|
||||||
|
|
||||||
# Youon
|
# Youon
|
||||||
elif char in _roomaji_youon:
|
elif char in _roomaji_youon:
|
||||||
if not last_kana or last_kana[-1] != 'i' or last_kana == 'i':
|
if not last_kana or last_kana[-1] != 'i' or last_kana == 'i':
|
||||||
|
|
28
pokedex/tests/test_roomaji.py
Normal file
28
pokedex/tests/test_roomaji.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# encoding: utf8
|
||||||
|
from nose.tools import *
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import pokedex.roomaji
|
||||||
|
|
||||||
|
|
||||||
|
def test_roomaji():
|
||||||
|
tests = [
|
||||||
|
(u'ヤミカラス', 'yamikarasu'),
|
||||||
|
|
||||||
|
# Elongated vowel
|
||||||
|
(u'イーブイ', 'iibui'),
|
||||||
|
(u'ホーホー', 'hoohoo'),
|
||||||
|
|
||||||
|
# Combined characters
|
||||||
|
(u'ニャース', 'nyaasu'),
|
||||||
|
(u'ジャ', 'ja'),
|
||||||
|
(u'ぎゃくてん', 'gyakuten'),
|
||||||
|
|
||||||
|
# Special katakana combinations
|
||||||
|
(u'ラティアス', 'ratiasu'),
|
||||||
|
(u'ウィー', 'wii'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for kana, roomaji in tests:
|
||||||
|
result = pokedex.roomaji.romanize(kana)
|
||||||
|
assert_equal(result, roomaji, u"'%s' romanizes correctly" % roomaji)
|
Loading…
Reference in a new issue