From d17a772b4f25f2f2d4be175998009c842ca64dc7 Mon Sep 17 00:00:00 2001 From: skylar Date: Thu, 23 Aug 2018 02:01:58 -0400 Subject: [PATCH] Resolve some Python 3 encoding issues on Windows (#244) When you open a file in Python 3, it defaults to using the system charset to encode the file, which is typically UTF-8 on linux systems (good) but Windows-1251 on Windows (bad). We need to add explicit encoding=utf-8 arguments to open() calls when we open CSV files for reading or writing. To complicate matters, the csv module works only with byte strings in Python 2, and only with unicode strings in Python 3, so we can't just blindly use `io.open` everywhere. --- pokedex/db/load.py | 7 +++++-- pokedex/db/translations.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pokedex/db/load.py b/pokedex/db/load.py index 6e43429..f34ee46 100644 --- a/pokedex/db/load.py +++ b/pokedex/db/load.py @@ -210,7 +210,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s try: csvpath = "%s/%s.csv" % (directory, table_name) - csvfile = open(csvpath, 'r') + if six.PY2: + csvfile = open(csvpath, 'r') + else: + csvfile = open(csvpath, 'r', encoding="utf8") except IOError: # File doesn't exist; don't load anything! print_done('missing?') @@ -416,7 +419,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None): # CSV module only works with bytes on 2 and only works with text on 3! if six.PY3: - writer = csv.writer(open(filename, 'w', newline=''), lineterminator='\n') + writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n') columns = [col.name for col in table.columns] else: writer = csv.writer(open(filename, 'wb'), lineterminator='\n') diff --git a/pokedex/db/translations.py b/pokedex/db/translations.py index 606a66d..8f55473 100755 --- a/pokedex/db/translations.py +++ b/pokedex/db/translations.py @@ -262,11 +262,15 @@ class Translations(object): def reader_for_class(self, cls, reader_class=csv.reader): tablename = cls.__table__.name csvpath = os.path.join(self.csv_directory, tablename + '.csv') - return reader_class(open(csvpath, 'r'), lineterminator='\n') + if six.PY2: + read = open(csvpath, 'r') + else: + read = open(csvpath, 'r', encoding='utf-8') + return reader_class(read, lineterminator='\n') def writer_for_lang(self, lang): csvpath = os.path.join(self.translation_directory, '%s.csv' % lang) - return csv.writer(io.open(csvpath, 'w', newline=''), lineterminator='\n') + return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n') def yield_source_messages(self, language_id=None): """Yield all messages from source CSV files @@ -307,7 +311,10 @@ class Translations(object): """ path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang) try: - file = open(path, 'r') + if six.PY2: + file = open(path, 'r') + else: + file = open(path, 'r', encoding="utf8") except IOError: return () return yield_translation_csv_messages(file)