# Encoding: UTF-8 """Rewrite markdown links from [Label]{category:thing} to just {category:thing} There was a version of this script that rewrote stuff from an even earlier format. Git log should find it without problems. This is an unmaintained one-shot script, only included in the repo for reference. """ from functools import partial import sys import re from sqlalchemy.orm.exc import MultipleResultsFound from sqlalchemy.sql.expression import func from pokedex.db import connect, tables, util sanity_re = re.compile(ur"^[-A-Za-z0-9 é\[\]{}.%':;,×/()\"|–`—!*♂♀\\]$") # RE that matches anything that might look like a link fuzzy_link_re = re.compile(r""" \[ [^]]+ \]? \{ [^}]+ \}""", re.VERBOSE) # Very specific RE that matches links that appear in source Markdown strings strict_link_re = re.compile(r""" \[ (?P<label> [-A-Za-z 0-9'.]{,30} ) \] \{ (?P<category> [a-z]{,20} ) : (?P<target> [-a-z 0-9]{,40} ) \} """, re.VERBOSE) # Format of the resulting links result_link_re = re.compile(r""" ^ \[ (?P<label> [^]]* ) \] \{ (?P<category> [a-z]+ ) : (?P<target> [-a-z0-9]+ ) \} $ """, re.VERBOSE) english_id = 9 manual_replacements = { '[Pewter Museum of Science]{location:pewter-city}': 'the Museum of Science in {location:pewter-city}', '[Oreburgh Mining Museum]{location:mining-museum}': '{location:mining-museum} in {location:oreburgh-city}', } def is_md_col(column): return column.info.get('format') == 'markdown' def get_replacement(session, entire_text, context, matchobj): label = matchobj.group('label') category = matchobj.group('category') target = matchobj.group('target') or label try: result = manual_replacements[matchobj.group(0)] except KeyError: if category == 'mechanic': target = target.lower() target = target.replace(' ', '-') wanted_label = '' else: query = None if category == 'item': table = tables.Item elif category == 'ability': table = tables.Ability elif category == 'move': table = tables.Move elif category == 'type': table = tables.Type elif category == 'pokemon': table = tables.Pokemon elif category == 'location': table = tables.Location else: print print repr(entire_text) print repr(matchobj.group(0)) raise ValueError('Category %s not implemented' % category) try: thingy = util.get(session, table, target) wanted_label = thingy.name except: print print repr(entire_text) print repr(matchobj.group(0)) raise if wanted_label.lower() == label.lower(): result = "[]{%s:%s}" % (category, target) else: result = "[%s]{%s:%s}" % (label, category, target) if wanted_label: print print context print "%-40s" % matchobj.group(0), print '%s != %s' % (label, wanted_label) assert result_link_re.match(result), result return result def main(argv): session = connect() for cls in tables.mapped_classes: for translation_class in cls.translation_classes: columns = translation_class.__table__.c md_columns = [c for c in columns if c.info.get('format') == 'markdown'] if not md_columns: continue for row in session.query(translation_class): if row.local_language_id != english_id: continue for column in md_columns: markdown = getattr(row, column.name) if not markdown: continue text = unicode(markdown) # Make sure everything that remotely looks like a link is one links = fuzzy_link_re.findall(text) if not links: continue for link in links: assert strict_link_re.findall(link), (strict_link_re.findall(link), [link]) # Do the replacement context = '%s %s %s' % (translation_class.__name__, row.foreign_id, column.name) replaced = strict_link_re.sub( partial(get_replacement, session, text, context), text, ) setattr(row, column.name, replaced) if argv and argv[0] == '--commit': session.commit() print 'Committed' else: print 'Run with --commit to commit changes' if __name__ == '__main__': main(sys.argv[1:])