From 9a2f5093d87f81ce087c167f63c91ba447005ee2 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 24 Sep 2011 22:04:12 +0300 Subject: [PATCH] Make edit-csv-as-yaml a bit more usable - editing multiple files - adding/reordering columns - leaves out empty values - int values don't need quotes & are written without them --- bin/edit-csv-as-yaml | 72 ++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/bin/edit-csv-as-yaml b/bin/edit-csv-as-yaml index d028dab..96d521c 100755 --- a/bin/edit-csv-as-yaml +++ b/bin/edit-csv-as-yaml @@ -3,7 +3,7 @@ for you to fiddle with it, then convert back to csv and replace the original file. -Run me as: $0 some_file.csv +Run me as: $0 some_file.csv [other_file.csv ...] The editor used is $EDITOR, of course. @@ -17,6 +17,7 @@ import os import subprocess import sys import tempfile +import shlex try: import yaml @@ -47,20 +48,37 @@ yaml.add_representer( ) ### Do actual work! -infilename, = sys.argv[1:] +infilenames = sys.argv[1:] -data = [] -with open(infilename) as infile: - reader = csv.reader(infile, lineterminator='\n') - column_names = [unicode(column) for column in next(reader)] +all_data = [] +for infilename in infilenames: + data = [] + with open(infilename) as infile: + reader = csv.reader(infile, lineterminator='\n') + column_names = [unicode(column) for column in next(reader)] - # Read data... - for row in reader: - datum = odict() - for col, value in zip(column_names, row): - datum[col] = value.decode('utf-8') + # Read data... + for row in reader: + datum = odict() + for col, value in zip(column_names, row): + # Skip empty values + if value: + datum[col] = value.decode('utf-8') + try: + # Numbers to numbers + if unicode(int(value)) == value: + datum[col] = int(value) + except ValueError: + pass - data.append(datum) + data.append(datum) + + file_info = odict(( + ('name', infilename), + ('column_names', column_names), + ('rows', data), + )) + all_data.append(file_info) # Monkeypatch yaml to use > syntax for multiline text; easier to edit @@ -76,7 +94,7 @@ Emitter.choose_scalar_style = new_choose_scalar_style # Write to a tempfile with tempfile.NamedTemporaryFile(suffix='.yml') as tmp: - yaml.safe_dump(data, tmp, + yaml.safe_dump(all_data, tmp, default_flow_style=False, allow_unicode=True, indent=4, @@ -85,17 +103,21 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp: error_line = '' # used on errors while True: - args = [os.environ['EDITOR'], tmp.name] - if 'vim' in os.environ['EDITOR']: + editor = shlex.split(os.environ['EDITOR']) + args = editor + [tmp.name] + if 'vim' in editor[0]: # vim has an arg for jumping to a line: args.append("+{0}".format(error_line)) + elif 'kate' in editor[0]: + # so does kate! + args.append("-l {0}".format(error_line)) # Run the user's editor and wait for it to close subprocess.Popen(args).wait() tmp.seek(0) try: - new_data = yaml.safe_load(tmp) + all_new_data = yaml.safe_load(tmp) break except yaml.YAMLError as e: if hasattr(e, 'problem_mark'): @@ -111,11 +133,15 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp: print "Press Enter to try again, or I guess ctrl-c to bail." raw_input() -with open(infilename, 'wb') as outfile: - writer = csv.writer(outfile, lineterminator='\n') - writer.writerow([ column.encode('utf8') for column in column_names ]) +for dct in all_new_data: + filename = dct['name'] + new_data = dct['rows'] + column_names = dct['column_names'] + with open(filename, 'wb') as outfile: + writer = csv.writer(outfile, lineterminator='\n') + writer.writerow([ column.encode('utf8') for column in column_names ]) - for datum in new_data: - writer.writerow([ - datum[column].encode('utf8') for column in column_names - ]) + for datum in new_data: + writer.writerow([ + unicode(datum.get(column, '')).encode('utf-8') for column in column_names + ])