veekun_pokedex/pokedex/extract/rby.py

1915 lines
57 KiB
Python
Raw Normal View History

"""Extract all the juicy details from a Gen I game.
This was a pain in the ass! Thank you SO MUCH to:
pokered
pokeyellow
vixie
http://www.pastraiser.com/cpu/gameboy/gameboy_opcodes.html
"""
# TODO fix that docstring
# TODO note terminology somewhere: id, index, identifier
from collections import OrderedDict
import hashlib
import io
import logging
from pathlib import Path
import sys
from camel import Camel
from classtools import reify
from construct import *
from pokedex.extract.lib.gbz80 import find_code
import pokedex.schema as schema
# TODO set this up to colorcode and use {} formatting
log = logging.getLogger(__name__)
# Known official games, the languages they were released in, and hashes of
# their contents
GAME_RELEASE_MD5SUMS = {
# Set 0: Original Red/Green, only released in Japan
'jp-red': {
'ja': [
'912d4f77d118390a2e2c42b2016a19d4', # original
'4c44844f8d5aa3305a0cf2c95cf96333', # revision A
],
},
'jp-green': {
'ja': [
'e30ffbab1f239f09b226477d84db1368', # original
'16ddd8897092936fbc0e286c6a6b23a2', # revision A
],
},
# Set 1: Blue in Japan, split into Red and Blue worldwide
'jp-blue': {
'ja': ['c1adf0a77809ac91d905a4828888a2f0'],
},
'ww-red': {
'de': ['8ed0e8d45a81ca34de625d930148a512'],
'en': ['3d45c1ee9abd5738df46d2bdda8b57dc'],
'es': ['463c241c8721ab1d1da17c91de9f8a32'],
'fr': ['669700657cb06ed09371cdbdef69e8a3'],
'it': ['6468fb0652dde30eb968a44f17c686f1'],
},
'ww-blue': {
'de': ['a1ec7f07c7b4251d5fafc50622d546f8'],
'en': ['50927e843568814f7ed45ec4f944bd8b'],
'es': ['6e7663f908334724548a66fc9c386002'],
'fr': ['35c8154c81abb2ab850689fd28a03515'],
'it': ['ebe0742b472b3e80a9c6749f06181073'],
},
# Set 2: Yellow, pretty much the same everywhere
# TODO is that true?
# TODO missing other languages
'yellow': {
'en': ['d9290db87b1f0a23b89f99ee4469e34b'],
'ja': [
'aa13e886a47fd473da63b7d5ddf2828d', # original
'96c1f411671b6e1761cf31884dde0dbb', # revision A
'5d9c071cf6eb5f3a697bbcd9311b4d04', # revision B
],
}
}
# Same, but rearranged to md5 => (game, language)
GAME_RELEASE_MD5SUM_INDEX = {
md5sum: (game, language)
for (game, language_sums) in GAME_RELEASE_MD5SUMS.items()
for (language, md5sums) in language_sums.items()
for md5sum in md5sums
}
# ------------------------------------------------------------------------------
# Game structure stuff
#
# A lot of this was made much, much easier by the work done on pokered:
# https://github.com/pret/pokered
# Thank y'all so much!
# TODO possibly some of this should be in a shared place, not this file
GROWTH_RATES = {
0: 'growth-rate.medium',
3: 'growth-rate.medium-slow',
4: 'growth-rate.fast',
5: 'growth-rate.slow',
}
EVOLUTION_TRIGGERS = {
1: 'evolution-trigger.level-up',
2: 'evolution-trigger.use-item',
3: 'evolution-trigger.trade',
}
# TODO these are loci, not enums, so hardcoding all their identifiers here
# makes me nonspecifically uncomfortable
POKEMON_IDENTIFIERS = {
1: 'pokemon.bulbasaur',
2: 'pokemon.ivysaur',
3: 'pokemon.venusaur',
4: 'pokemon.charmander',
5: 'pokemon.charmeleon',
6: 'pokemon.charizard',
7: 'pokemon.squirtle',
8: 'pokemon.wartortle',
9: 'pokemon.blastoise',
10: 'pokemon.caterpie',
11: 'pokemon.metapod',
12: 'pokemon.butterfree',
13: 'pokemon.weedle',
14: 'pokemon.kakuna',
15: 'pokemon.beedrill',
16: 'pokemon.pidgey',
17: 'pokemon.pidgeotto',
18: 'pokemon.pidgeot',
19: 'pokemon.rattata',
20: 'pokemon.raticate',
21: 'pokemon.spearow',
22: 'pokemon.fearow',
23: 'pokemon.ekans',
24: 'pokemon.arbok',
25: 'pokemon.pikachu',
26: 'pokemon.raichu',
27: 'pokemon.sandshrew',
28: 'pokemon.sandslash',
29: 'pokemon.nidoran-f',
30: 'pokemon.nidorina',
31: 'pokemon.nidoqueen',
32: 'pokemon.nidoran-m',
33: 'pokemon.nidorino',
34: 'pokemon.nidoking',
35: 'pokemon.clefairy',
36: 'pokemon.clefable',
37: 'pokemon.vulpix',
38: 'pokemon.ninetales',
39: 'pokemon.jigglypuff',
40: 'pokemon.wigglytuff',
41: 'pokemon.zubat',
42: 'pokemon.golbat',
43: 'pokemon.oddish',
44: 'pokemon.gloom',
45: 'pokemon.vileplume',
46: 'pokemon.paras',
47: 'pokemon.parasect',
48: 'pokemon.venonat',
49: 'pokemon.venomoth',
50: 'pokemon.diglett',
51: 'pokemon.dugtrio',
52: 'pokemon.meowth',
53: 'pokemon.persian',
54: 'pokemon.psyduck',
55: 'pokemon.golduck',
56: 'pokemon.mankey',
57: 'pokemon.primeape',
58: 'pokemon.growlithe',
59: 'pokemon.arcanine',
60: 'pokemon.poliwag',
61: 'pokemon.poliwhirl',
62: 'pokemon.poliwrath',
63: 'pokemon.abra',
64: 'pokemon.kadabra',
65: 'pokemon.alakazam',
66: 'pokemon.machop',
67: 'pokemon.machoke',
68: 'pokemon.machamp',
69: 'pokemon.bellsprout',
70: 'pokemon.weepinbell',
71: 'pokemon.victreebel',
72: 'pokemon.tentacool',
73: 'pokemon.tentacruel',
74: 'pokemon.geodude',
75: 'pokemon.graveler',
76: 'pokemon.golem',
77: 'pokemon.ponyta',
78: 'pokemon.rapidash',
79: 'pokemon.slowpoke',
80: 'pokemon.slowbro',
81: 'pokemon.magnemite',
82: 'pokemon.magneton',
83: 'pokemon.farfetchd',
84: 'pokemon.doduo',
85: 'pokemon.dodrio',
86: 'pokemon.seel',
87: 'pokemon.dewgong',
88: 'pokemon.grimer',
89: 'pokemon.muk',
90: 'pokemon.shellder',
91: 'pokemon.cloyster',
92: 'pokemon.gastly',
93: 'pokemon.haunter',
94: 'pokemon.gengar',
95: 'pokemon.onix',
96: 'pokemon.drowzee',
97: 'pokemon.hypno',
98: 'pokemon.krabby',
99: 'pokemon.kingler',
100: 'pokemon.voltorb',
101: 'pokemon.electrode',
102: 'pokemon.exeggcute',
103: 'pokemon.exeggutor',
104: 'pokemon.cubone',
105: 'pokemon.marowak',
106: 'pokemon.hitmonlee',
107: 'pokemon.hitmonchan',
108: 'pokemon.lickitung',
109: 'pokemon.koffing',
110: 'pokemon.weezing',
111: 'pokemon.rhyhorn',
112: 'pokemon.rhydon',
113: 'pokemon.chansey',
114: 'pokemon.tangela',
115: 'pokemon.kangaskhan',
116: 'pokemon.horsea',
117: 'pokemon.seadra',
118: 'pokemon.goldeen',
119: 'pokemon.seaking',
120: 'pokemon.staryu',
121: 'pokemon.starmie',
122: 'pokemon.mr-mime',
123: 'pokemon.scyther',
124: 'pokemon.jynx',
125: 'pokemon.electabuzz',
126: 'pokemon.magmar',
127: 'pokemon.pinsir',
128: 'pokemon.tauros',
129: 'pokemon.magikarp',
130: 'pokemon.gyarados',
131: 'pokemon.lapras',
132: 'pokemon.ditto',
133: 'pokemon.eevee',
134: 'pokemon.vaporeon',
135: 'pokemon.jolteon',
136: 'pokemon.flareon',
137: 'pokemon.porygon',
138: 'pokemon.omanyte',
139: 'pokemon.omastar',
140: 'pokemon.kabuto',
141: 'pokemon.kabutops',
142: 'pokemon.aerodactyl',
143: 'pokemon.snorlax',
144: 'pokemon.articuno',
145: 'pokemon.zapdos',
146: 'pokemon.moltres',
147: 'pokemon.dratini',
148: 'pokemon.dragonair',
149: 'pokemon.dragonite',
150: 'pokemon.mewtwo',
151: 'pokemon.mew',
}
TYPE_IDENTIFIERS = {
0: 'type.normal',
1: 'type.fighting',
2: 'type.flying',
3: 'type.poison',
4: 'type.ground',
5: 'type.rock',
#6: 'type.bird',
7: 'type.bug',
8: 'type.ghost',
9: 'type.steel',
20: 'type.fire',
21: 'type.water',
22: 'type.grass',
23: 'type.electric',
24: 'type.psychic',
25: 'type.ice',
26: 'type.dragon',
27: 'type.dark',
}
MOVE_IDENTIFIERS = {
# TODO stupid hack for initial moveset
0: '--',
1: 'move.pound',
2: 'move.karate-chop',
3: 'move.double-slap',
4: 'move.comet-punch',
5: 'move.mega-punch',
6: 'move.pay-day',
7: 'move.fire-punch',
8: 'move.ice-punch',
9: 'move.thunder-punch',
10: 'move.scratch',
11: 'move.vice-grip',
12: 'move.guillotine',
13: 'move.razor-wind',
14: 'move.swords-dance',
15: 'move.cut',
16: 'move.gust',
17: 'move.wing-attack',
18: 'move.whirlwind',
19: 'move.fly',
20: 'move.bind',
21: 'move.slam',
22: 'move.vine-whip',
23: 'move.stomp',
24: 'move.double-kick',
25: 'move.mega-kick',
26: 'move.jump-kick',
27: 'move.rolling-kick',
28: 'move.sand-attack',
29: 'move.headbutt',
30: 'move.horn-attack',
31: 'move.fury-attack',
32: 'move.horn-drill',
33: 'move.tackle',
34: 'move.body-slam',
35: 'move.wrap',
36: 'move.take-down',
37: 'move.thrash',
38: 'move.double-edge',
39: 'move.tail-whip',
40: 'move.poison-sting',
41: 'move.twineedle',
42: 'move.pin-missile',
43: 'move.leer',
44: 'move.bite',
45: 'move.growl',
46: 'move.roar',
47: 'move.sing',
48: 'move.supersonic',
49: 'move.sonic-boom',
50: 'move.disable',
51: 'move.acid',
52: 'move.ember',
53: 'move.flamethrower',
54: 'move.mist',
55: 'move.water-gun',
56: 'move.hydro-pump',
57: 'move.surf',
58: 'move.ice-beam',
59: 'move.blizzard',
60: 'move.psybeam',
61: 'move.bubble-beam',
62: 'move.aurora-beam',
63: 'move.hyper-beam',
64: 'move.peck',
65: 'move.drill-peck',
66: 'move.submission',
67: 'move.low-kick',
68: 'move.counter',
69: 'move.seismic-toss',
70: 'move.strength',
71: 'move.absorb',
72: 'move.mega-drain',
73: 'move.leech-seed',
74: 'move.growth',
75: 'move.razor-leaf',
76: 'move.solar-beam',
77: 'move.poison-powder',
78: 'move.stun-spore',
79: 'move.sleep-powder',
80: 'move.petal-dance',
81: 'move.string-shot',
82: 'move.dragon-rage',
83: 'move.fire-spin',
84: 'move.thunder-shock',
85: 'move.thunderbolt',
86: 'move.thunder-wave',
87: 'move.thunder',
88: 'move.rock-throw',
89: 'move.earthquake',
90: 'move.fissure',
91: 'move.dig',
92: 'move.toxic',
93: 'move.confusion',
94: 'move.psychic',
95: 'move.hypnosis',
96: 'move.meditate',
97: 'move.agility',
98: 'move.quick-attack',
99: 'move.rage',
100: 'move.teleport',
101: 'move.night-shade',
102: 'move.mimic',
103: 'move.screech',
104: 'move.double-team',
105: 'move.recover',
106: 'move.harden',
107: 'move.minimize',
108: 'move.smokescreen',
109: 'move.confuse-ray',
110: 'move.withdraw',
111: 'move.defense-curl',
112: 'move.barrier',
113: 'move.light-screen',
114: 'move.haze',
115: 'move.reflect',
116: 'move.focus-energy',
117: 'move.bide',
118: 'move.metronome',
119: 'move.mirror-move',
120: 'move.self-destruct',
121: 'move.egg-bomb',
122: 'move.lick',
123: 'move.smog',
124: 'move.sludge',
125: 'move.bone-club',
126: 'move.fire-blast',
127: 'move.waterfall',
128: 'move.clamp',
129: 'move.swift',
130: 'move.skull-bash',
131: 'move.spike-cannon',
132: 'move.constrict',
133: 'move.amnesia',
134: 'move.kinesis',
135: 'move.soft-boiled',
136: 'move.high-jump-kick',
137: 'move.glare',
138: 'move.dream-eater',
139: 'move.poison-gas',
140: 'move.barrage',
141: 'move.leech-life',
142: 'move.lovely-kiss',
143: 'move.sky-attack',
144: 'move.transform',
145: 'move.bubble',
146: 'move.dizzy-punch',
147: 'move.spore',
148: 'move.flash',
149: 'move.psywave',
150: 'move.splash',
151: 'move.acid-armor',
152: 'move.crabhammer',
153: 'move.explosion',
154: 'move.fury-swipes',
155: 'move.bonemerang',
156: 'move.rest',
157: 'move.rock-slide',
158: 'move.hyper-fang',
159: 'move.sharpen',
160: 'move.conversion',
161: 'move.tri-attack',
162: 'move.super-fang',
163: 'move.slash',
164: 'move.substitute',
165: 'move.struggle',
}
def unbank(*args):
"""Convert a "bank" identifier, XX:YYYY, to a real address. The Game Boy
is all about banks internally, and it's what pokered uses, so I've kept
them intact in this file.
The scheme is fairly simple:
- XX is the bank; YYYY is an address. Banks are 0x4000 bytes.
- For bank 00, YYYY is already a real address, and should be between 0x0000
and 0x4000.
- For any other bank, YYYY is between 0x4000 and 0x8000, and they're just
arranged in order. So for bank 01, YYYY is already a real address; for
bank 02, you add 0x4000; and so on.
Accepts either two ints (XX and YYYY) or a string in the form 'XX:YYYY'.
"""
if len(args) == 1:
banked_address, = args
banks, addrs = banked_address.split(':')
bank = int(banks, 16)
addr = int(addrs, 16)
else:
bank, addr = args
if bank:
assert 0x4000 <= addr < 0x8000
return addr + (bank - 1) * 0x4000
else:
assert 0 <= addr < 0x4000
return addr
def bank(addr):
"""Inverse of the above transformation."""
if addr < 0x4000:
return 0, addr
bank, addr = divmod(addr, 0x4000)
addr += 0x4000
return bank, addr
EN_TEXT_MAP = {
# Sort of faux movement macros
0x00: "", # "Start text"?
0x4E: "\n", # Move to next line
0x49: "\f", # Start a new Pokédex page
0x5F: ".", # End of Pokédex entry, adds a period
0x05: "",
0x06: "",
0x07: "",
0x08: "",
0x09: "",
0x0A: "",
0x0B: "",
0x0C: "",
0x0D: "",
0x0E: "",
0x0F: "",
0x10: "",
0x11: "",
0x12: "",
0x13: "",
0x19: "",
0x1A: "",
0x1B: "",
0x1C: "",
0x26: "",
0x27: "",
0x28: "",
0x29: "",
0x2A: "",
0x2B: "",
0x2C: "",
0x2D: "",
0x2E: "",
0x2F: "",
0x30: "",
0x31: "",
0x32: "",
0x33: "",
0x34: "",
0x3A: "",
0x3B: "",
0x3C: "",
0x3D: "",
0x3E: "",
0x40: "",
0x41: "",
0x42: "",
0x43: "",
0x44: "",
0x45: "",
0x46: "",
0x47: "",
0x48: "",
0x80: "",
0x81: "",
0x82: "",
0x83: "",
0x84: "",
0x85: "",
0x86: "",
0x87: "",
0x88: "",
0x89: "",
0x8A: "",
0x8B: "",
0x8C: "",
0x8D: "",
0x8E: "",
0x8F: "",
0x90: "",
0x91: "",
0x92: "",
0x93: "",
0x94: "",
0x95: "",
0x96: "",
0x97: "",
0x98: "",
0x99: "",
0x9A: "",
0x9B: "",
0x9C: "",
0x9D: "",
0x9E: "",
0x9F: "",
0xA0: "",
0xA1: "",
0xA2: "",
0xA3: "",
0xA4: "",
0xA5: "",
0xA6: "",
0xA7: "",
0xA8: "",
0xA9: "",
0xAA: "",
0xAB: "",
0xAC: "",
0xAD: "",
0xAE: "",
0xAF: "",
0xB0: "",
0xB1: "",
0xB2: "",
0xB3: "",
0xB4: "",
0xB5: "",
0xB6: "",
0xB7: "",
0xB8: "",
0xB9: "",
0xBA: "",
0xBB: "",
0xBC: "",
0xBD: "",
0xBE: "",
0xBF: "",
0xC0: "",
0xC1: "",
0xC2: "",
0xC3: "",
0xC4: "",
0xC5: "",
0xC6: "",
0xC7: "",
0xC8: "",
0xC9: "",
0xCA: "",
0xCB: "",
0xCC: "",
0xCD: "",
0xCE: "",
0xCF: "",
0xD0: "",
0xD1: "",
0xD2: "",
0xD3: "",
0xD4: "",
0xD5: "",
0xD6: "",
0xD7: "",
0xD8: "",
0xD9: "",
0xDA: "",
0xDB: "",
0xDC: "",
0xDD: "",
0xDE: "",
0xDF: "",
0xE0: "",
0xE1: "",
0xE2: "",
0xE3: "",
0x50: "@",
0x54: "#",
0x54: "POKé",
0x75: "",
0x79: "",
0x7A: "",
0x7B: "",
0x7C: "",
0x7D: "",
0x7E: "",
0x74: "",
0x7F: " ",
0x80: "A",
0x81: "B",
0x82: "C",
0x83: "D",
0x84: "E",
0x85: "F",
0x86: "G",
0x87: "H",
0x88: "I",
0x89: "J",
0x8A: "K",
0x8B: "L",
0x8C: "M",
0x8D: "N",
0x8E: "O",
0x8F: "P",
0x90: "Q",
0x91: "R",
0x92: "S",
0x93: "T",
0x94: "U",
0x95: "V",
0x96: "W",
0x97: "X",
0x98: "Y",
0x99: "Z",
0x9A: "(",
0x9B: ")",
0x9C: ":",
0x9D: ";",
0x9E: "[",
0x9F: "]",
0xA0: "a",
0xA1: "b",
0xA2: "c",
0xA3: "d",
0xA4: "e",
0xA5: "f",
0xA6: "g",
0xA7: "h",
0xA8: "i",
0xA9: "j",
0xAA: "k",
0xAB: "l",
0xAC: "m",
0xAD: "n",
0xAE: "o",
0xAF: "p",
0xB0: "q",
0xB1: "r",
0xB2: "s",
0xB3: "t",
0xB4: "u",
0xB5: "v",
0xB6: "w",
0xB7: "x",
0xB8: "y",
0xB9: "z",
0xBA: "é",
0xBB: "'d",
0xBC: "'l",
0xBD: "'s",
0xBE: "'t",
0xBF: "'v",
0xE0: "'",
0xE3: "-",
0xE4: "'r",
0xE5: "'m",
0xE6: "?",
0xE7: "!",
0xE8: ".",
0xED: "",
0xEF: "",
0xF0: "¥",
0xF1: "×",
0xF3: "/",
0xF4: ",",
0xF5: "",
0xF6: "0",
0xF7: "1",
0xF8: "2",
0xF9: "3",
0xFA: "4",
0xFB: "5",
0xFC: "6",
0xFD: "7",
0xFE: "8",
0xFF: "9",
}
JA_CHARMAP = {
**EN_TEXT_MAP,
0x05: "",
0x06: "",
0x07: "",
0x08: "",
0x09: "",
0x0A: "",
0x0B: "",
0x0C: "",
0x0D: "",
0x0E: "",
0x0F: "",
0x10: "",
0x11: "",
0x12: "",
0x13: "",
0x19: "",
0x1A: "",
0x1B: "",
0x1C: "",
0x26: "",
0x27: "",
0x28: "",
0x29: "",
0x2A: "",
0x2B: "",
0x2C: "",
0x2D: "",
0x2E: "",
0x2F: "",
0x30: "",
0x31: "",
0x32: "",
0x33: "",
0x34: "",
0x3A: "",
0x3B: "",
0x3C: "",
0x3D: "",
0x3E: "",
0x40: "",
0x41: "",
0x42: "",
0x43: "",
0x44: "",
0x45: "",
0x46: "",
0x47: "",
0x48: "",
0x80: "",
0x81: "",
0x82: "",
0x83: "",
0x84: "",
0x85: "",
0x86: "",
0x87: "",
0x88: "",
0x89: "",
0x8A: "",
0x8B: "",
0x8C: "",
0x8D: "",
0x8E: "",
0x8F: "",
0x90: "",
0x91: "",
0x92: "",
0x93: "",
0x94: "",
0x95: "",
0x96: "",
0x97: "",
0x98: "",
0x99: "",
0x9A: "",
0x9B: "",
0x9C: "",
0x9D: "",
0x9E: "",
0x9F: "",
0xA0: "",
0xA1: "",
0xA2: "",
0xA3: "",
0xA4: "",
0xA5: "",
0xA6: "",
0xA7: "",
0xA8: "",
0xA9: "",
0xAA: "",
0xAB: "",
0xAC: "",
0xAD: "",
0xAE: "",
0xAF: "",
0xB0: "",
0xB1: "",
0xB2: "",
0xB3: "",
0xB4: "",
0xB5: "",
0xB6: "",
0xB7: "",
0xB8: "",
0xB9: "",
0xBA: "",
0xBB: "",
0xBC: "",
0xBD: "",
0xBE: "",
0xBF: "",
0xC0: "",
0xC1: "",
0xC2: "",
0xC3: "",
0xC4: "",
0xC5: "",
0xC6: "",
0xC7: "",
0xC8: "",
0xC9: "",
0xCA: "",
0xCB: "",
0xCC: "",
0xCD: "",
0xCE: "",
0xCF: "",
0xD0: "",
0xD1: "",
0xD2: "",
0xD3: "",
0xD4: "",
0xD5: "",
0xD6: "",
0xD7: "",
0xD8: "",
0xD9: "",
0xDA: "",
0xDB: "",
0xDC: "",
0xDD: "",
0xDE: "",
0xDF: "",
0xE0: "",
0xE1: "",
0xE2: "",
0xE3: "",
0xE9: "",
}
for n in range(0x100):
if not n in JA_CHARMAP:
JA_CHARMAP[n] = '<EFBFBD>'
# ty, tachyon
DE_FR_TEXT_MAP = dict(enumerate([
# 0x0X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x1X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x2X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x3X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x4X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x5X
"", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x6X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x7X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", " ",
# 0x8X
"A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P",
# 0x9X
"Q", "R", "S", "T", "U", "V", "W", "X",
"Y", "Z", "(", ")", ":", ";", "[", "]",
# 0xAX
"a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p",
# 0xBX
"q", "r", "s", "t", "u", "v", "w", "x",
"y", "z", "à", "è", "é", "ù", "ß", "ç",
# 0xCX
"Ä", "Ö", "Ü", "ä", "ö", "ü", "ë", "ï",
"â", "ô", "û", "ê", "î", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0xDX
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "cʼ", "dʼ", "jʼ", "lʼ",
"mʼ", "nʼ", "pʼ", "sʼ", "ʼs", "tʼ", "uʼ", "yʼ",
# 0xEX
"'", "P\u200dk", "M\u200dn", "-", "¿", "¡", "?", "!",
".", "", "", "", "", "", "", "",
# 0xFX
"$", "×", ".", "/", ",", "", "0", "1",
"2", "3", "4", "5", "6", "7", "8", "9",
]))
DE_FR_TEXT_MAP.update({
0x00: "", # "Start text"?
0x4E: "\n", # Move to next line
0x49: "\f", # Start a new Pokédex page
0x5F: ".", # End of Pokédex entry, adds a period
0x54: "POKé",
})
ES_IT_CHARMAP = dict(enumerate([
# 0x0X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x1X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x2X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x3X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x4X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x5X
"@", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x6X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
# 0x7X
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>",
"<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", " ",
# 0x8X
"A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P",
# 0x9X
"Q", "R", "S", "T", "U", "V", "W", "X",
"Y", "Z", "(", ")", ":", ";", "[", "]",
# 0xAX
"a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p",
# 0xBX
"q", "r", "s", "t", "u", "v", "w", "x",
"y", "z", "à", "è", "é", "ù", "À", "Á",
# 0xCX
"Ä", "Ö", "Ü", "ä", "ö", "ü", "È", "É",
"Ì", "Í", "Ñ", "Ò", "Ó", "Ù", "Ú", "á",
# 0xDX
"ì", "í", "ñ", "ò", "ó", "ú", "º", "&",
"ʼd", "ʼl", "ʼm", "ʼr", "ʼs", "ʼt", "ʼv", " ",
# 0xEX
"'", "P\u200dk", "M\u200dn", "-", "¿", "¡", "?", "!",
".", "", "", "", "", "", "", "",
# 0xFX
"$", "×", ".", "/", ",", "", "0", "1",
"2", "3", "4", "5", "6", "7", "8", "9"
]))
ES_IT_CHARMAP.update({
0x00: "", # "Start text"?
0x4E: "\n", # Move to next line
0x49: "\f", # Start a new Pokédex page
0x5F: ".", # End of Pokédex entry, adds a period
0x54: "POKé",
})
class PokemonString:
"""A string encoded using the goofy Gen I scheme."""
def __init__(self, raw):
self.raw = raw
def decrypt(self, language):
if language == 'ja':
charmap = JA_CHARMAP
elif language == 'en':
charmap = EN_TEXT_MAP
elif language in ('es', 'it'):
charmap = ES_IT_CHARMAP
elif language in ('de', 'fr'):
charmap = DE_FR_TEXT_MAP
else:
raise ValueError("Not a known language: {!r}".format(language))
return ''.join(
charmap.get(ch, '<EFBFBD>') for ch in self.raw)
class PokemonCString(Adapter):
"""Construct thing for `PokemonString`."""
def __init__(self, name, length=None):
# No matter which charmap, the "end of string" character is always
# encoded as P
if length is None:
subcon = CString(name, terminators=b'P')
else:
subcon = String(name, length, padchar=b'P')
super().__init__(subcon)
def _encode(self, obj, context):
raise NotImplementedError
def _decode(self, obj, context):
return PokemonString(obj)
class NullTerminatedArray(Subconstruct):
_peeker = Peek(ULInt8('___'))
__slots__ = ()
def __init__(self, subcon):
super().__init__(subcon)
self._clear_flag(self.FLAG_COPY_CONTEXT)
self._set_flag(self.FLAG_DYNAMIC)
def _parse(self, stream, context):
from construct.lib import ListContainer
obj = ListContainer()
orig_context = context
while True:
nextbyte = self._peeker.parse_stream(stream)
if nextbyte == 0:
break
if self.subcon.conflags & self.FLAG_COPY_CONTEXT:
context = orig_context.__copy__()
# TODO what if we hit the end of the stream
obj.append(self.subcon._parse(stream, context))
# Consume the trailing zero
stream.read(1)
return obj
def _build(self, obj, stream, context):
raise NotImplementedError
# TODO ???
#def _sizeof(self, context):
def IdentEnum(subcon, mapping):
return Enum(subcon, **{v: k for (k, v) in mapping.items()})
# Game Boy header, at 0x0100
# http://gbdev.gg8.se/wiki/articles/The_Cartridge_Header
# TODO hey! i wish i had a little cli entry point that would spit this out for a game. and do other stuff like scan for likely pokemon text or graphics. that would be really cool in fact. maybe put this in a gb module and make that exist sometime.
game_boy_header_struct = Struct(
'game_boy_header',
# Entry point for the game; generally contains a jump to 0x0150
String('entry_point', 4),
# Nintendo logo; must be exactly this or booting will not continue
Const(
String('nintendo_logo', 48),
bytes.fromhex("""
CE ED 66 66 CC 0D 00 0B 03 73 00 83 00 0C 00 0D
00 08 11 1F 88 89 00 0E DC CC 6E E6 DD DD D9 99
BB BB 67 63 6E 0E EC CC DD DC 99 9F BB B9 33 3E
""".replace('\n', '')),
),
String('title', 11, padchar=b'\x00'),
String('manufacturer_code', 4),
ULInt8('cgb_flag'),
String('new_licensee_code', 2),
ULInt8('sgb_flag'), # 3 for super game boy support
ULInt8('cartridge_type'),
ULInt8('rom_size'),
ULInt8('ram_size'),
ULInt8('region_code'), # 0 for japan, 1 for not japan
ULInt8('old_licensee_code'), # 0x33 means to use licensee_code
ULInt8('game_version'),
ULInt8('header_checksum'),
UBInt16('cart_checksum'),
)
# The mother lode — Pokémon base stats
pokemon_struct = Struct(
'pokemon',
ULInt8('pokedex_number'),
ULInt8('base_hp'),
ULInt8('base_attack'),
ULInt8('base_defense'),
ULInt8('base_speed'),
ULInt8('base_special'),
IdentEnum(ULInt8('type1'), TYPE_IDENTIFIERS),
IdentEnum(ULInt8('type2'), TYPE_IDENTIFIERS),
ULInt8('catch_rate'),
ULInt8('base_experience'),
# TODO ???? "sprite dimensions"
ULInt8('_sprite_dimensions'),
ULInt16('front_sprite_pointer'),
ULInt16('back_sprite_pointer'),
# TODO somehow rig this to discard trailing zeroes; there's a paddedstring that does it
Array(4, IdentEnum(ULInt8('initial_moveset'), MOVE_IDENTIFIERS)),
IdentEnum(ULInt8('growth_rate'), GROWTH_RATES),
# TODO argh, this is a single huge integer; i want an array, but then i lose the byteswapping!
Bitwise(
BitField('machines', 7 * 8, swapped=True),
),
Padding(1),
)
evos_moves_struct = Struct(
'evos_moves',
NullTerminatedArray(
Struct(
'evolutions',
IdentEnum(ULInt8('evo_trigger'), EVOLUTION_TRIGGERS),
Embedded(Switch(
'evo_arguments',
lambda ctx: ctx.evo_trigger, {
'evolution-trigger.level-up': Struct(
'---',
ULInt8('evo_level'),
),
'evolution-trigger.use-item': Struct(
'---',
# TODO item enum too wow!
ULInt8('evo_item'),
# TODO ??? always seems to be 1
ULInt8('evo_level'),
),
# TODO ??? always seems to be 1 here too
'evolution-trigger.trade': Struct(
'---',
ULInt8('evo_level'),
),
},
)),
# TODO alas, the species here is a number, because it's an internal
# id and we switch those back using data from the game...
ULInt8('evo_species'),
),
),
NullTerminatedArray(
Struct(
'level_up_moves',
ULInt8('level'),
IdentEnum(ULInt8('move'), MOVE_IDENTIFIERS),
Peek(ULInt8('_end')),
),
),
)
evos_moves_pointer = Struct(
'xxx',
ULInt16('offset'),
# TODO hardcoded as the same bank, ugh
Pointer(lambda ctx: ctx.offset + (0xE - 1) * 0x4000, evos_moves_struct),
)
pokedex_flavor_struct = Struct(
'pokedex_flavor',
PokemonCString('species'),
# TODO HA HA FUCK ME, SOME GAMES USE METRIC SOME (OK JUST THE US) USE IMPERIAL
#ULInt8('height_feet'),
#ULInt8('height_inches'),
#ULInt16('weight_pounds'),
ULInt8('height_decimeters'),
ULInt16('weight_hectograms'),
# This appears to technically be a string containing a single macro, for
# "load other string from this address", but it always takes this same form
# so there's no need to actually evaluate it.
Const(ULInt8('macro'), 0x17), # 0x17 is the "far" macro
ULInt16('address'),
ULInt8('bank'),
Const(ULInt8('nul'), 0x50), # faux nul marking the end of the string
Pointer(
lambda ctx: ctx.address + (ctx.bank - 1) * 0x4000,
PokemonCString('flavor_text'),
),
)
# TODO this works very awkwardly as a struct
pokedex_flavor_pointer = Struct(
'xxx',
ULInt16('offset'),
# TODO hardcoded 0x10, same bank
# TODO this has to be on-demand because missingno's struct is actually bogus!
OnDemandPointer(lambda ctx: ctx.offset + (0x10 - 1) * 0x4000, pokedex_flavor_struct),
)
class CartDetectionError(Exception):
pass
class RBYCart:
NUM_POKEMON = 151
NUM_MOVES = 165
NUM_MACHINES = 55
def __init__(self, path):
with path.open('rb') as f:
self.data = f.read()
self.stream = io.BytesIO(self.data)
self.path = path
# Scrape these first; language detection relies on examining text
self.addrs = self.detect_addresses()
self.game, self.language = self.detect_game()
# And snag this before anything else happens; prevents some silly
# problems where a reified property seeks, then tries to read this, and
# it ends up seeking again
self.max_pokemon_index
def detect_addresses(self):
"""The addresses of some important landmarks can vary between versions
and languages. Attempt to detect them automatically.
Return a dict of raw file offsets. The keys are the names used in the
pokered project.
"""
# The ideal approach is to find some assembly code that appears just
# before the data of interest. It's pretty hacky, but since
# translators (and even modders) would have little reason to rearrange
# functions or inject new ones in these odd places, it ought to work
# well enough. And it's better than ferreting out and hard-coding
# piles of addresses.
# The only hard part is that assembly code that contains an address
# won't work, since that address will also vary per game.
# Each of the landmarks used here appears in every official cartridge
# exactly once.
addresses = {}
# This is an entire function used by the Pokédex and which immediately
# precedes all the flavor text.
asm_DrawTileLine = bytes.fromhex('c5d5 7019 0d20 fbd1 c1c9')
try:
idx = self.data.index(asm_DrawTileLine)
except ValueError:
raise CartDetectionError("Can't find flavor text pointers")
addresses['PokedexEntryPointers'] = idx + len(asm_DrawTileLine)
# This is a helper function for figuring out moves, followed by another
# 5-byte function, then the table of evolutions and moves.
asm_WriteMonMoves_ShiftMoveData = bytes.fromhex('0e03 131a 220d 20fa c9')
try:
idx = self.data.index(asm_WriteMonMoves_ShiftMoveData)
except ValueError:
raise CartDetectionError("Can't find evolution and moveset table")
addresses['EvosMovesPointerTable'] = idx + len(asm_WriteMonMoves_ShiftMoveData) + 5
# Several lists of names are accessed by a single function, which looks
# through a list of pointers to find the right set of names to use.
# That's great news for me: I can just grab all of those delicious
# pointers at once. Here's an excerpt from GetName.
match = find_code(self.data, '''
inc d
;.skip
ld hl, #NamePointers
add hl,de
ld a,[hl+]
ldh [$96],a
ld a,[hl]
ldh [$95],a
ldh a,[$95]
ld h,a
ldh a,[$96]
ld l,a
ld a,[#wd0b5]
ld b,a
ld c,0
;.nextName
ld d,h
ld e,l
;.nextChar
ld a,[hl+]
cp $50 ; terminator @, encoded
''')
if not match:
raise CartDetectionError("Can't find name array")
rem, inputs = match
start = inputs['NamePointers']
name_pointers = Array(7, ULInt16('dummy')).parse(
self.data[start:start + 14])
# One downside to the Game Boy memory structure is that banks are
# not stored anywhere near their corresponding addresses. Most
# bank numbers are hardcoded here, but Pokémon names are in a different
# bank in Japanese games, so we've gotta scrape the bank too...
match = find_code(self.data, '''
;GetMonName::
push hl
ldh a,[#H_LOADEDROMBANK]
push af
ld a,#BANK_MonsterNames
ldh [#H_LOADEDROMBANK],a
ld [#MBC1RomBank],a
ld a,[#wd11e]
dec a
ld hl,#MonsterNames
''',
H_LOADEDROMBANK=0xB8, # full address is $FFB8; ldh adds the $FF
MBC1RomBank=0x2000,
MonsterNames=name_pointers[0]
)
if not match:
raise CartDetectionError("Can't find Pokémon names")
rem, inputs = match
addresses['MonsterNames'] = unbank(
inputs['BANK_MonsterNames'], name_pointers[0])
addresses['MoveNames'] = unbank(0x2C, name_pointers[1])
# 2: UnusedNames (unused, obviously)
addresses['ItemNames'] = unbank(0x01, name_pointers[3])
# 4: wPartyMonOT (only useful while the game is running)
# 5: wEnemyMonOT (only useful while the game is running)
addresses['TrainerNames'] = unbank(0x0E, name_pointers[6])
# Finding TMs is a bit harder. They come right after a function for
# looking up a TM number, which is very short and very full of
# addresses. So here's a regex.
# `wd11e` is some address used all over the game for passing arguments
# around, which unfortunately also differs from language to language.
# In English it is, unsurprisingly, 0xD11E.
# `TechnicalMachines` is the address we're looking for, which should
# immediately follow what this matches.
match = find_code(self.data, '''
ld a, [#wd11e]
dec a
ld hl, #TechnicalMachines
ld b, $0
ld c, a
add hl, bc
ld a, [hl]
ld [#wd11e], a
ret
''')
if match:
rem, inputs = match
# TODO this should mayybe also check that the address immediately follows this code
matched_addr = inputs['TechnicalMachines']
tentative_addr = rem.end()
# Remember, addresses don't include the bank!
_, banked_addr = bank(tentative_addr)
if matched_addr == banked_addr:
asm_wd11e_addr = inputs['wd11e']
addresses['TechnicalMachines'] = tentative_addr
else:
raise RuntimeError
# TODO should there really be more than one match?
else:
raise CartDetectionError("Can't find technical machines list")
# Pokédex order is similarly tricky. Much like the above, this
# function converts a Pokémon's game index to its national dex number.
# These are almost immediately after the Pokédex entries themselves,
# but this actually seems easier than figuring out where a table of
# pointers ends.
match = find_code(self.data, '''
push bc
push hl
ld a, [#wd11e]
dec a
ld hl, #PokedexOrder
ld b, 0
ld c, a
add hl, bc
ld a, [hl]
ld [#wd11e], a
pop hl
pop bc
ret
''', wd11e=asm_wd11e_addr)
if match:
rem, inputs = match
matched_addr = inputs['PokedexOrder']
tentative_addr = rem.end()
# Remember, addresses don't include the bank!
_, banked_addr = bank(tentative_addr)
if matched_addr == banked_addr:
addresses['PokedexOrder'] = tentative_addr
else:
raise RuntimeError
else:
raise CartDetectionError("Can't find Pokédex order")
# Ah, but then, we have base stats. These don't have code nearby;
# they're just stuck immediately after moves. Except in R/G, where
# they appear /before/ moves! And we don't know what version we're
# running yet, because the addresses detected in this method are used
# for language detection. Hmm.
# Here's plan B: look for the function that /loads/ base stats, and
# scrape the address out of it. This function is a bit hairy; I've had
# to expand some of pokered's macros and rewrite the jumps to something
# that the rudimentary code matcher can understand.
match = find_code(self.data, '''
ldh a, [#H_LOADEDROMBANK]
push af
ld a, #BANK_BaseStats
ldh [#H_LOADEDROMBANK], a
ld [#MBC1RomBank], a
push bc
push de
push hl
ld a, [#wd11e]
push af
ld a,[#wd0b5]
ld [#wd11e],a
ld de,#FossilKabutopsPic
ld b,$66 ; size of Kabutops fossil and Ghost sprites
cp #FOSSIL_KABUTOPS ; Kabutops fossil
jr z,#specialID1
ld de,#GhostPic
cp #MON_GHOST ; Ghost
jr z,#specialID2
ld de,#FossilAerodactylPic
ld b,$77 ; size of Aerodactyl fossil sprite
cp #FOSSIL_AERODACTYL ; Aerodactyl fossil
jr z,#specialID3
cp #MEW
jr z,#mew
ld a, #IndexToPokedexPredef
call #IndexToPokedex ; convert pokemon ID in [wd11e] to pokedex number
ld a,[#wd11e]
dec a
ld bc, #MonBaseStatsLength
ld hl, #BaseStats
call #AddNTimes
ld de, #wMonHeader
ld bc, #MonBaseStatsLength
call #CopyData
jr #done1
;.specialID
ld hl, #wMonHSpriteDim
ld [hl], b ; write sprite dimensions
inc hl
ld [hl], e ; write front sprite pointer
inc hl
ld [hl], d
jr #done2
;.mew
ld hl, #MewBaseStats
ld de, #wMonHeader
ld bc, #MonBaseStatsLength
ld a, #BANK_MewBaseStats
call #FarCopyData
''',
# These are constants; I left them in the above code for clarity
H_LOADEDROMBANK=0xB8, # full address is $FFB8; ldh adds the $FF
MBC1RomBank=0x2000,
# This was scraped previously
wd11e=asm_wd11e_addr,
)
if match:
rem, inputs = match
addresses['BaseStats'] = unbank(inputs['BANK_BaseStats'], inputs['BaseStats'])
addresses['MewBaseStats'] = unbank(inputs['BANK_MewBaseStats'], inputs['MewBaseStats'])
else:
raise CartDetectionError("Can't find base stats")
return addresses
def detect_game(self):
"""Given a cart image, return the game and language.
This is a high-level interface; it prints stuff to stdout and raises
exceptions. Its two helpers do not.
"""
# TODO raise, don't print to stdout
# We have checksums for each of the games, but we also want to support
# a heuristic so this same code can be used for trimmed carts,
# bootlegs, fan hacks, corrupted carts, and other interesting variants.
# Try both, and warn if they don't agree.
game_c, language_c = self.detect_game_checksum()
game_h, language_h = self.detect_game_heuristic()
game = game_c or game_h
language = language_c or language_h
if game and language:
print("Detected {filename} as {game}, {language}".format(
filename=self.path.name, game=game, language=language))
else:
print("Can't figure out what game {filename} is! ".format(
filename=self.path.name), end='')
if game:
# TODO should probably be a way to override this
print("It seems to be {}, but I can't figure out the language.".format(game))
elif language:
print("It seems to use {} text, but I can't figure out the version.".format(language))
else:
print("Nothing about it is familiar to me.")
print("Bailing, sorry :(")
sys.exit(1)
# Warn about a potentially bad checksum
if not game_c or not language_c:
log.warn(
"Hmm. I don't recognize the checksum for {}, but I'll "
"continue anyway.",
self.path.name)
elif game_c != game_h or language_c != language_h:
log.warn(
"This is very surprising. The checksum indicates that this "
"game should be {}, {}, but I detected it as {}, {}. Probably "
"my fault, not yours. Continuing anyway.",
game_c, language_c, game_h, language_h)
return game, language
def detect_game_checksum(self):
h = hashlib.md5()
h.update(self.data)
md5sum = h.hexdigest()
return GAME_RELEASE_MD5SUM_INDEX.get(md5sum, (None, None))
def detect_game_heuristic(self):
# Okay, so, fun story: there's nothing /officially/ distinguishing the
# games. There's a flag in the cartridge header that's 0 for Japan and
# 1 for anywhere other than Japan, but every copy of the game I've seen
# has it set to anything other than 0 or 1, so that doesn't seem
# particularly reliable. I can't find any official and documented
# difference. It's as if they just changed the text, reassembled, and
# called it a day. In fact that's probably exactly what happened.
# That makes life a little more difficult, so let's take this a step at
# a time. We can get the name of the game for free, at least, from the
# cartridge header.
self.stream.seek(0x100)
header = game_boy_header_struct.parse_stream(self.stream)
# Nintendo decided to lop off the last five bytes of the title for
# other purposes /after/ creating the Game Boy, so the last three
# letters of e.g. POKEMON YELLOW end up in the manufacturer code.
# Let's just, ah, put those back on.
title = header.title + header.manufacturer_code.rstrip(b'\x00')
if title == b'POKEMON RED':
version = 'red'
elif title == b'POKEMON GREEN':
version = 'green'
elif title == b'POKEMON BLUE':
version = 'blue'
elif title == b'POKEMON YELLOW':
version = 'yellow'
else:
version = None
# There's still a problem here: "red" might mean the Red from
# Red/Green, released only in Japan; or the Red from Red/Blue, the pair
# released worldwide, based on Japanese Blue.
# Easy way to tell: Red and Green are the only games in the entire
# series to use a half megabyte cartridge. Any other game, even if
# trimmed, will be just barely too big to fit in that size.
if header.rom_size == 4: # 512K -> Red/Green
if version == 'red':
game = 'jp-red'
elif version == 'green':
game = 'jp-green'
else:
# No other game is this size
game = None
elif header.rom_size == 5: # 1M -> Red/Blue/Yellow
if version == 'green':
# Doesn't make sense; there was no green game bigger than 512K
game = None
elif version == 'red':
game = 'ww-red'
elif version == 'blue':
# Can't know which Blue this is until we get the language
game = None
else:
game = version
else: # ???
return None, None
# Now for language. If the game is Japanese Red or Green, then it must
# be in Japanese, so we're done.
if game in ('jp-red', 'jp-green'):
language = 'ja'
return game, language
# Otherwise, the only way to be absolutely sure is to find some text
# and see what language it's in.
self.stream.seek(self.addrs['ItemNames'])
# Item 0 is MASTER BALL. The first item with a different name in every
# single language is item 4, TOWN MAP, so chew through five names.
single_string_struct = PokemonCString('dummy')
for _ in range(5):
name = single_string_struct.parse_stream(self.stream)
for language, expected_name in [
('de', 'KARTE'),
('en', 'TOWN MAP'),
('es', 'MAPA PUEBLO'),
('fr', 'CARTE'),
('it', 'MAPPA CITTÀ'),
('ja', 'タウンマップ'),
]:
if name.decrypt(language) == expected_name:
break
else:
# TODO raise probably
language = None
# Blue is a special case, remember
if game is None and version == 'blue':
if language is None:
pass
elif language == 'ja':
game = 'jp-blue'
else:
game = 'ww-blue'
# And done!
return game, language
### From here it's all reified properties that extract on demand
@reify
def pokedex_order(self):
"""Maps internal Pokémon indices to the more familiar Pokédex order.
Note that this maps to ONE LESS THAN National Dex number, so lists
can be zero-indexed.
"""
# Fetch the conversions between internal numbering and Pokédex order,
# because that's a thing Gen 1 does, for some reason.
self.stream.seek(self.addrs['PokedexOrder'])
# I don't know exactly how many numbers are in this array, but it's
# more than the number of Pokémon, because there are some MISSINGNO
# gaps. It's single bytes anyway, so I'm going to keep reading them
# until I've seen every valid dex number.
unseen_dex_numbers = set(range(1, self.NUM_POKEMON + 1))
internal_to_dex_order = {}
for index, dex_number in enumerate(self.stream.read(256), start=1):
if dex_number == 0:
continue
internal_to_dex_order[index] = dex_number - 1
unseen_dex_numbers.remove(dex_number)
if not unseen_dex_numbers:
break
assert not unseen_dex_numbers
return internal_to_dex_order
@reify
def max_pokemon_index(self):
"""Largest valid value of a Pokémon index. Note that not every index
between 0 and this number is necessarily a valid Pokémon; many of them
are Missingno. Only numbers that appear in `pokedex_order` are legit.
"""
return max(self.pokedex_order)
@reify
def pokemon_names(self):
"""List of Pokémon names, in Pokédex order."""
ret = [None] * self.NUM_POKEMON
self.stream.seek(self.addrs['MonsterNames'])
# TODO i don't like this, but they don't have explicit terminators...
if self.language == 'ja':
name_length = 5
else:
name_length = 10
for index, pokemon_name in enumerate(Array(self.max_pokemon_index, PokemonCString('...', name_length)).parse_stream(self.stream), start=1):
try:
id = self.pokedex_order[index]
except KeyError:
continue
ret[id] = pokemon_name.decrypt(self.language)
return ret
@reify
def machine_moves(self):
"""List of move identifiers corresponding to TMs/HMs."""
self.stream.seek(self.addrs['TechnicalMachines'])
return Array(self.NUM_MACHINES, IdentEnum(ULInt8('move'), MOVE_IDENTIFIERS)).parse_stream(self.stream)
@reify
def pokemon_records(self):
"""List of pokemon_structs."""
self.stream.seek(self.addrs['BaseStats'])
records = Array(self.NUM_POKEMON - 1, pokemon_struct).parse_stream(self.stream)
# Mew's data is, awkwardly, stored separately
self.stream.seek(self.addrs['MewBaseStats'])
records.append(pokemon_struct.parse_stream(self.stream))
return records
@reify
def pokemon_evos_and_moves(self):
"""List of evos_moves_structs, including both evolutions and level-up
moves.
"""
ret = [None] * self.NUM_POKEMON
self.stream.seek(self.addrs['EvosMovesPointerTable'])
for index, pointer in enumerate(Array(self.max_pokemon_index, evos_moves_pointer).parse_stream(self.stream), start=1):
try:
id = self.pokedex_order[index]
except KeyError:
continue
ret[id] = pointer.evos_moves
return ret
@reify
def pokedex_entries(self):
"""List of pokedex_flavor_structs."""
ret = [None] * self.NUM_POKEMON
self.stream.seek(self.addrs['PokedexEntryPointers'])
for index, pointer in enumerate(Array(self.max_pokemon_index, pokedex_flavor_pointer).parse_stream(self.stream), start=1):
try:
id = self.pokedex_order[index]
except KeyError:
continue
ret[id] = pointer.pokedex_flavor.value
record = pokemon_records_by_internal[index]
pokedex_flavor = pointer.pokedex_flavor.value
# TODO FUCKKKK IMPERIALLLLL
#record.height = pokedex_flavor.height_feet * 12 + pokedex_flavor.height_inches
#record.weight = pokedex_flavor.weight_pounds
record.height = pokedex_flavor.height_decimeters
record.weight = pokedex_flavor.weight_hectograms
record.species = pokedex_flavor.species.decrypt(language)
record.flavor_text = pokedex_flavor.flavor_text.decrypt(language)
@reify
def move_names(self):
self.stream.seek(self.addrs['MoveNames'])
return Array(NUM_MOVES, PokemonCString('move_name')).parse_stream(self.stream)
class RBYLoader:
def __init__(self, *carts):
self.carts = carts
# TODO require all the same game
def load(self):
pass
# TODO would be slick to convert this to a construct... construct
def bitfield_to_machines(bits, machine_moves):
machines = []
for i, move in enumerate(machine_moves, start=1):
bit = bits & 0x1
bits >>= 1
if bit:
machines.append(move)
return machines
class WriterWrapper:
def __init__(self, locus, language):
self.locus = locus
self.language = language
def __setattr__(self, key, value):
# TODO finish this...
# 1. disallow reassigning an existing attr with a value
setattr(self.locus, key, value)
def __getattr__(self, key):
return getattr(self.locus, key)
def main(root):
# TODO does this need to take arguments? or like, sprite mode i guess
carts = []
for filename in sys.argv[1:]:
cart = RBYCart(Path(filename))
carts.append(cart)
root /= carts[0].game
root.mkdir(exist_ok=True)
#loader = RBYLoader(*carts)
pokemons = OrderedDict([
(POKEMON_IDENTIFIERS[id + 1], schema.Pokemon())
for id in range(carts[0].NUM_POKEMON)
])
for cart in carts:
for id in range(cart.NUM_POKEMON):
pokemon = pokemons[POKEMON_IDENTIFIERS[id + 1]]
#writer = WriterWrapper(pokemon)
writer = pokemon
# TODO LOLLLL
if 'name' not in writer.__dict__:
writer.name = {}
writer.name[cart.language] = cart.pokemon_names[id]
record = cart.pokemon_records[id]
# TODO put this in construct
types = [record.type1]
if record.type1 != record.type2:
types.append(record.type2)
writer.types = types
writer.base_stats = {
'hp': record.base_hp,
'attack': record.base_attack,
'defense': record.base_defense,
'speed': record.base_speed,
'special': record.base_special,
}
writer.growth_rate = record.growth_rate
writer.base_experience = record.base_experience
#writer.pokedex_numbers = dict(kanto=record.pokedex_number)
# Starting moves are stored with the Pokémon; other level-up moves are
# stored with evolutions
level_up_moves = [
{1: move}
for move in record.initial_moveset
# TODO UGH
if move != '--'
]
for level_up_move in cart.pokemon_evos_and_moves[id].level_up_moves:
level_up_moves.append({
level_up_move.level: level_up_move.move,
})
# TODO LOLLLL
if 'moves' not in writer.__dict__:
writer.moves = {}
writer.moves['level-up'] = level_up_moves
writer.moves['machines'] = bitfield_to_machines(
record.machines, cart.machine_moves)
# Evolution
# TODO alas, the species here is a number, because it's an internal id
# and we switch those back using data from the game...
evolutions = []
for evo_datum in cart.pokemon_evos_and_moves[id].evolutions:
evo = {
'into': POKEMON_IDENTIFIERS[cart.pokedex_order[evo_datum.evo_species] + 1],
'trigger': evo_datum.evo_trigger,
'minimum-level': evo_datum.evo_level,
}
# TODO insert the item trigger!
evolutions.append(evo)
writer.evolutions = evolutions
with (root / 'pokemon.yaml').open('w') as f:
f.write(Camel([schema.POKEDEX_TYPES]).dump(pokemons))
if __name__ == '__main__':
# TODO yeah fix this up
main(Path('pokedex/data'))