Fix handling of some fullwidth Latin characters in romanization.

Including fullwidth 0, which was acutally a problem.
This commit is contained in:
Zhorken 2010-09-24 04:18:20 -04:00
parent 2ab807a647
commit 77c7cb9952

View file

@ -86,11 +86,12 @@ def romanize(string):
last_char = None # Used for small kana combos
for char in string:
# Full-width Latin
if ord(char) >= 0xff11 and ord(char) <= 0xff5e:
if 0xff01 <= ord(char) <= 0xff5e:
if last_kana == 'sokuon':
raise ValueError("Sokuon cannot precede Latin characters.")
char = chr(ord(char) - 0xff11 + 0x31)
# XXX Real Unicode decomposition would be nicer
char = chr(ord(char) - 0xff01 + 0x21)
characters.append(char)
last_kana = None