veekun_pokedex/scripts/disambiguate-location-identifiers.py
Petr Viktorin ae5225c932 Automatically disambiguate location identifiers
See the script docstring for what was done

Victory Roads and Halls of Origin should probably be hand-tuned later
2011-04-12 07:23:11 +03:00

80 lines
No EOL
2.7 KiB
Python

# Encoding: UTF-8
"""Automatically disambiguate location identifiers
This is an unmaintained one-shot script, only included in the repo for reference.
Disambiguates identifiers that aren't unique, Routes and Sea Routes, and
generic names like 'villa' or 'game corner' that could appear in future
generations again.
Does this by prepending the region name, and if that isn't enough, appends
numbers.
"""
import sys
import re
from collections import defaultdict
from pokedex.db import connect, tables
ambiguous_re = re.compile(r'^(sea-)?route-\d+$')
ambiguous_set = set('foreign-building game-corner global-terminal lighthouse '
'restaurant flower-shop cycle-shop cafe shopping-mall villa'.split())
def main(*argv):
session = connect()
location_dict = defaultdict(list)
for location in session.query(tables.Location).order_by(tables.Location.id):
location_dict[location.identifier].append(location)
changes = False
for identifier, locations in sorted(location_dict.items()):
disambiguate = any((
len(locations) > 1,
ambiguous_re.match(identifier),
identifier in ambiguous_set,
))
print len(locations), ' *'[disambiguate], identifier,
if disambiguate:
changes = True
print u''.encode('utf-8'),
by_region = defaultdict(list)
for location in locations:
if location.region:
by_region[location.region.identifier].append(location)
else:
by_region[None].append(location)
for region_identifier, region_locations in by_region.items():
if region_identifier:
new_identifier = '%s-%s' % (region_identifier, identifier)
else:
# No change
new_identifier = identifier
if len(region_locations) == 1:
location = region_locations[0]
# The region was enough
print new_identifier,
location.identifier = new_identifier
else:
# Need to number the locations :(
for i, location in enumerate(region_locations, start=1):
numbered_identifier = '%s-%s' % (new_identifier, i)
print numbered_identifier,
location.identifier = numbered_identifier
print
if changes:
if argv and argv[0] == '--commit':
session.commit()
print 'Committed'
else:
print 'Run with --commit to commit changes'
else:
print 'No changes needed'
if __name__ == '__main__':
main(*sys.argv[1:])