Make edit-csv-as-yaml a bit more usable

- editing multiple files - adding/reordering columns - leaves out empty values - int values don't need quotes & are written without them
2024-08-20 18:16:34 +00:00 · 2011-09-24 22:04:12 +03:00 · 2011-09-24 22:04:12 +03:00 · f72b6a8fce
commit f72b6a8fce
parent b96b5aa956
1 changed files with 49 additions and 23 deletions
--- a/bin/edit-csv-as-yaml
+++ b/bin/edit-csv-as-yaml
@ -3,7 +3,7 @@
 for you to fiddle with it, then convert back to csv and replace the original
 file.
-Run me as: $0 some_file.csv
+Run me as: $0 some_file.csv [other_file.csv ...]
 The editor used is $EDITOR, of course.
@ -17,6 +17,7 @@ import os
 import subprocess
 import sys
 import tempfile
 import shlex
 try:
    import yaml
@ -47,10 +48,12 @@ yaml.add_representer(
 )
 ### Do actual work!
-infilename, = sys.argv[1:]
+infilenames = sys.argv[1:]
-data = []
+all_data = []
-with open(infilename) as infile:
+for infilename in infilenames:
    data = []
    with open(infilename) as infile:
        reader = csv.reader(infile, lineterminator='\n')
        column_names = [unicode(column) for column in next(reader)]
@ -58,10 +61,25 @@ with open(infilename) as infile:
        for row in reader:
            datum = odict()
            for col, value in zip(column_names, row):
                # Skip empty values
                if value:
                    datum[col] = value.decode('utf-8')
                    try:
                        # Numbers to numbers
                        if unicode(int(value)) == value:
                            datum[col] = int(value)
                    except ValueError:
                        pass
            data.append(datum)
        file_info = odict((
                ('name', infilename),
                ('column_names', column_names),
                ('rows', data),
            ))
    all_data.append(file_info)
 # Monkeypatch yaml to use > syntax for multiline text; easier to edit
 from yaml.emitter import Emitter
@ -76,7 +94,7 @@ Emitter.choose_scalar_style = new_choose_scalar_style
 # Write to a tempfile
 with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
-    yaml.safe_dump(data, tmp,
+    yaml.safe_dump(all_data, tmp,
        default_flow_style=False,
        allow_unicode=True,
        indent=4,
@ -85,17 +103,21 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
    error_line = ''  # used on errors
    while True:
-        args = [os.environ['EDITOR'], tmp.name]
+        editor = shlex.split(os.environ['EDITOR'])
-        if 'vim' in os.environ['EDITOR']:
+        args = editor + [tmp.name]
        if 'vim' in editor[0]:
            # vim has an arg for jumping to a line:
            args.append("+{0}".format(error_line))
        elif 'kate' in editor[0]:
            # so does kate!
            args.append("-l {0}".format(error_line))
        # Run the user's editor and wait for it to close
        subprocess.Popen(args).wait()
        tmp.seek(0)
        try:
-            new_data = yaml.safe_load(tmp)
+            all_new_data = yaml.safe_load(tmp)
            break
        except yaml.YAMLError as e:
            if hasattr(e, 'problem_mark'):
@ -111,11 +133,15 @@ with tempfile.NamedTemporaryFile(suffix='.yml') as tmp:
            print "Press Enter to try again, or I guess ctrl-c to bail."
            raw_input()
-with open(infilename, 'wb') as outfile:
+for dct in all_new_data:
    filename = dct['name']
    new_data = dct['rows']
    column_names = dct['column_names']
    with open(filename, 'wb') as outfile:
        writer = csv.writer(outfile, lineterminator='\n')
        writer.writerow([ column.encode('utf8') for column in column_names ])
        for datum in new_data:
            writer.writerow([
-            datum[column].encode('utf8') for column in column_names
+                unicode(datum.get(column, '')).encode('utf-8') for column in column_names
            ])