Make edit-csv-as-yaml a bit more usable

- editing multiple files
- adding/reordering columns
- leaves out empty values
- int values don't need quotes & are written without them
This commit is contained in:
Petr Viktorin 2011-09-24 22:04:12 +03:00
parent 6f8a0bdc2e
commit 9a2f5093d8

View file

@ -3,7 +3,7 @@
for you to fiddle with it, then convert back to csv and replace the original for you to fiddle with it, then convert back to csv and replace the original
file. file.
Run me as: $0 some_file.csv Run me as: $0 some_file.csv [other_file.csv ...]
The editor used is $EDITOR, of course. The editor used is $EDITOR, of course.
@ -17,6 +17,7 @@ import os
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import shlex
try: try:
import yaml import yaml
@ -47,8 +48,10 @@ yaml.add_representer(
) )
### Do actual work! ### Do actual work!
infilename, = sys.argv[1:] infilenames = sys.argv[1:]
all_data = []
for infilename in infilenames:
data = [] data = []
with open(infilename) as infile: with open(infilename) as infile:
reader = csv.reader(infile, lineterminator='\n') reader = csv.reader(infile, lineterminator='\n')
@ -58,10 +61,25 @@ with open(infilename) as infile:
for row in reader: for row in reader:
datum = odict() datum = odict()
for col, value in zip(column_names, row): for col, value in zip(column_names, row):
# Skip empty values
if value:
datum[col] = value.decode('utf-8') datum[col] = value.decode('utf-8')
try:
# Numbers to numbers
if unicode(int(value)) == value:
datum[col] = int(value)
except ValueError:
pass
data.append(datum) data.append(datum)
file_info = odict((
('name', infilename),
('column_names', column_names),
('rows', data),
))
all_data.append(file_info)
# Monkeypatch yaml to use > syntax for multiline text; easier to edit # Monkeypatch yaml to use > syntax for multiline text; easier to edit
from yaml.emitter import Emitter from yaml.emitter import Emitter
@ -76,7 +94,7 @@ Emitter.choose_scalar_style = new_choose_scalar_style
# Write to a tempfile # Write to a tempfile
with tempfile.NamedTemporaryFile(suffix='.yml') as tmp: with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
yaml.safe_dump(data, tmp, yaml.safe_dump(all_data, tmp,
default_flow_style=False, default_flow_style=False,
allow_unicode=True, allow_unicode=True,
indent=4, indent=4,
@ -85,17 +103,21 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
error_line = '' # used on errors error_line = '' # used on errors
while True: while True:
args = [os.environ['EDITOR'], tmp.name] editor = shlex.split(os.environ['EDITOR'])
if 'vim' in os.environ['EDITOR']: args = editor + [tmp.name]
if 'vim' in editor[0]:
# vim has an arg for jumping to a line: # vim has an arg for jumping to a line:
args.append("+{0}".format(error_line)) args.append("+{0}".format(error_line))
elif 'kate' in editor[0]:
# so does kate!
args.append("-l {0}".format(error_line))
# Run the user's editor and wait for it to close # Run the user's editor and wait for it to close
subprocess.Popen(args).wait() subprocess.Popen(args).wait()
tmp.seek(0) tmp.seek(0)
try: try:
new_data = yaml.safe_load(tmp) all_new_data = yaml.safe_load(tmp)
break break
except yaml.YAMLError as e: except yaml.YAMLError as e:
if hasattr(e, 'problem_mark'): if hasattr(e, 'problem_mark'):
@ -111,11 +133,15 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
print "Press Enter to try again, or I guess ctrl-c to bail." print "Press Enter to try again, or I guess ctrl-c to bail."
raw_input() raw_input()
with open(infilename, 'wb') as outfile: for dct in all_new_data:
filename = dct['name']
new_data = dct['rows']
column_names = dct['column_names']
with open(filename, 'wb') as outfile:
writer = csv.writer(outfile, lineterminator='\n') writer = csv.writer(outfile, lineterminator='\n')
writer.writerow([ column.encode('utf8') for column in column_names ]) writer.writerow([ column.encode('utf8') for column in column_names ])
for datum in new_data: for datum in new_data:
writer.writerow([ writer.writerow([
datum[column].encode('utf8') for column in column_names unicode(datum.get(column, '')).encode('utf-8') for column in column_names
]) ])