Resolve some Python 3 encoding issues on Windows (#244)

When you open a file in Python 3, it defaults to using the system charset to encode the file, which is typically UTF-8 on linux systems (good) but Windows-1251 on Windows (bad).

We need to add explicit encoding=utf-8 arguments to open() calls when we open CSV files for reading or writing. To complicate matters, the csv module works only with byte strings in Python 2, and only with unicode strings in Python 3, so we can't just blindly use `io.open` everywhere.
This commit is contained in:
skylar 2018-08-23 02:01:58 -04:00 committed by Andrew Ekstedt
parent 51af10b995
commit d17a772b4f
2 changed files with 15 additions and 5 deletions

View file

@ -210,7 +210,10 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s
try: try:
csvpath = "%s/%s.csv" % (directory, table_name) csvpath = "%s/%s.csv" % (directory, table_name)
if six.PY2:
csvfile = open(csvpath, 'r') csvfile = open(csvpath, 'r')
else:
csvfile = open(csvpath, 'r', encoding="utf8")
except IOError: except IOError:
# File doesn't exist; don't load anything! # File doesn't exist; don't load anything!
print_done('missing?') print_done('missing?')
@ -416,7 +419,7 @@ def dump(session, tables=[], directory=None, verbose=False, langs=None):
# CSV module only works with bytes on 2 and only works with text on 3! # CSV module only works with bytes on 2 and only works with text on 3!
if six.PY3: if six.PY3:
writer = csv.writer(open(filename, 'w', newline=''), lineterminator='\n') writer = csv.writer(open(filename, 'w', newline='', encoding="utf8"), lineterminator='\n')
columns = [col.name for col in table.columns] columns = [col.name for col in table.columns]
else: else:
writer = csv.writer(open(filename, 'wb'), lineterminator='\n') writer = csv.writer(open(filename, 'wb'), lineterminator='\n')

View file

@ -262,11 +262,15 @@ class Translations(object):
def reader_for_class(self, cls, reader_class=csv.reader): def reader_for_class(self, cls, reader_class=csv.reader):
tablename = cls.__table__.name tablename = cls.__table__.name
csvpath = os.path.join(self.csv_directory, tablename + '.csv') csvpath = os.path.join(self.csv_directory, tablename + '.csv')
return reader_class(open(csvpath, 'r'), lineterminator='\n') if six.PY2:
read = open(csvpath, 'r')
else:
read = open(csvpath, 'r', encoding='utf-8')
return reader_class(read, lineterminator='\n')
def writer_for_lang(self, lang): def writer_for_lang(self, lang):
csvpath = os.path.join(self.translation_directory, '%s.csv' % lang) csvpath = os.path.join(self.translation_directory, '%s.csv' % lang)
return csv.writer(io.open(csvpath, 'w', newline=''), lineterminator='\n') return csv.writer(io.open(csvpath, 'w', newline='', encoding="utf8"), lineterminator='\n')
def yield_source_messages(self, language_id=None): def yield_source_messages(self, language_id=None):
"""Yield all messages from source CSV files """Yield all messages from source CSV files
@ -307,7 +311,10 @@ class Translations(object):
""" """
path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang) path = os.path.join(self.csv_directory, 'translations', '%s.csv' % lang)
try: try:
if six.PY2:
file = open(path, 'r') file = open(path, 'r')
else:
file = open(path, 'r', encoding="utf8")
except IOError: except IOError:
return () return ()
return yield_translation_csv_messages(file) return yield_translation_csv_messages(file)