veekun_pokedex/scripts/markdown-identifiers.py

169 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Encoding: UTF-8
"""Rewrite markdown links from [Label]{category:thing} to just {category:thing}
There was a version of this script that rewrote stuff from an even earlier
format. Git log should find it without problems.
This is an unmaintained one-shot script, only included in the repo for
reference.
"""
from functools import partial
import sys
import re
from sqlalchemy.orm.exc import MultipleResultsFound
from sqlalchemy.sql.expression import func
from pokedex.db import connect, tables, util
sanity_re = re.compile(ur"^[-A-Za-z0-9 é\[\]{}.%':;,×/()\"|`—!*♂♀\\]$")
# RE that matches anything that might look like a link
fuzzy_link_re = re.compile(r"""
\[
[^]]+
\]?
\{
[^}]+
\}""", re.VERBOSE)
# Very specific RE that matches links that appear in source Markdown strings
strict_link_re = re.compile(r"""
\[
(?P<label>
[-A-Za-z 0-9'.]{,30}
)
\]
\{
(?P<category>
[a-z]{,20}
)
:
(?P<target>
[-a-z 0-9]{,40}
)
\}
""", re.VERBOSE)
# Format of the resulting links
result_link_re = re.compile(r"""
^
\[
(?P<label>
[^]]*
)
\]
\{
(?P<category>
[a-z]+
)
:
(?P<target>
[-a-z0-9]+
)
\}
$
""", re.VERBOSE)
english_id = 9
manual_replacements = {
'[Pewter Museum of Science]{location:pewter-city}':
'the Museum of Science in {location:pewter-city}',
'[Oreburgh Mining Museum]{location:mining-museum}':
'{location:mining-museum} in {location:oreburgh-city}',
}
def is_md_col(column):
return column.info.get('format') == 'markdown'
def get_replacement(session, entire_text, context, matchobj):
label = matchobj.group('label')
category = matchobj.group('category')
target = matchobj.group('target') or label
try:
result = manual_replacements[matchobj.group(0)]
except KeyError:
if category == 'mechanic':
target = target.lower()
target = target.replace(' ', '-')
wanted_label = ''
else:
query = None
if category == 'item':
table = tables.Item
elif category == 'ability':
table = tables.Ability
elif category == 'move':
table = tables.Move
elif category == 'type':
table = tables.Type
elif category == 'pokemon':
table = tables.Pokemon
elif category == 'location':
table = tables.Location
else:
print
print repr(entire_text)
print repr(matchobj.group(0))
raise ValueError('Category %s not implemented' % category)
try:
thingy = util.get(session, table, target)
wanted_label = thingy.name
except:
print
print repr(entire_text)
print repr(matchobj.group(0))
raise
if wanted_label.lower() == label.lower():
result = "[]{%s:%s}" % (category, target)
else:
result = "[%s]{%s:%s}" % (label, category, target)
if wanted_label:
print
print context
print "%-40s" % matchobj.group(0),
print '%s != %s' % (label, wanted_label)
assert result_link_re.match(result), result
return result
def main(argv):
session = connect()
for cls in tables.mapped_classes:
for translation_class in cls.translation_classes:
columns = translation_class.__table__.c
md_columns = [c for c in columns if c.info.get('format') == 'markdown']
if not md_columns:
continue
for row in session.query(translation_class):
if row.local_language_id != english_id:
continue
for column in md_columns:
markdown = getattr(row, column.name)
if not markdown:
continue
text = unicode(markdown)
# Make sure everything that remotely looks like a link is one
links = fuzzy_link_re.findall(text)
if not links:
continue
for link in links:
assert strict_link_re.findall(link), (strict_link_re.findall(link), [link])
# Do the replacement
context = '%s %s %s' % (translation_class.__name__, row.foreign_id, column.name)
replaced = strict_link_re.sub(
partial(get_replacement, session, text, context),
text,
)
setattr(row, column.name, replaced)
if argv and argv[0] == '--commit':
session.commit()
print 'Committed'
else:
print 'Run with --commit to commit changes'
if __name__ == '__main__':
main(sys.argv[1:])