Checkpoint some Sun/Moon progress

The bulk of the data is in the same format as ORAS, so most of the
changes were just tracking down where files moved to.

The code was a mess, and is still a mess.  Oh, well.

- Made pretty good progress on dumping ORAS encounters; remaining work
  largely boils down to figuring out names for individual zones.

- Hacked the CLIM decoder to also work with SUMO's slightly modified box
  sprite format, FLIM.

- Added a Nintendo-flavored ETC1 decoder; this is the format used for
  Pokédex sprites in SUMO.

- Cleaned up sprite dumping a wee bit.
This commit is contained in:
Eevee (Lexy Munroe) 2016-12-07 06:29:44 -08:00
parent 7f7cca6c58
commit b12166648e
4 changed files with 817 additions and 104 deletions

View file

@ -5,7 +5,7 @@ import construct as c
clim_header_struct = c.Struct(
'clim_header',
c.Magic(b'CLIM'),
c.Magic(b'FLIM'), # TODO 'FLIM' in SUMO
c.Const(c.ULInt16('endianness'), 0xfeff),
c.Const(c.ULInt16('header_length'), 0x14),
c.ULInt32('version'),
@ -18,24 +18,26 @@ imag_header_struct = c.Struct(
c.Const(c.ULInt32('section_length'), 0x10),
c.ULInt16('width'),
c.ULInt16('height'),
c.Enum(
c.ULInt32('format'),
L8=0,
A8=1,
LA4=2,
LA8=3,
HILO8=4,
RGB565=5,
RGB8=6,
RGBA5551=7,
RGBA4=8,
RGBA8=9,
ETC1=10,
ETC1A4=11,
L4=12,
A4=13,
#ETC1=19,
)
# TODO this seems to have been expanded into several things in SUMO
#c.Enum(
# c.ULInt32('format'),
# L8=0,
# A8=1,
# LA4=2,
# LA8=3,
# HILO8=4,
# RGB565=5,
# RGB8=6,
# RGBA5551=7,
# RGBA4=8,
# RGBA8=9,
# ETC1=10,
# ETC1A4=11,
# L4=12,
# A4=13,
# #ETC1=19,
#)
)
@ -97,7 +99,7 @@ def uncuddle_paletted_pixels(palette, data):
return data
def untile_pixels(raw_pixels, width, height):
def untile_pixels(raw_pixels, width, height, *, is_flim):
"""Unscramble pixels into plain old rows.
The pixels are arranged in 8×8 tiles, and each tile is a third-
@ -111,6 +113,7 @@ def untile_pixels(raw_pixels, width, height):
stored_height = 2 ** math.ceil(math.log(height) / math.log(2))
num_pixels = stored_width * stored_height
tile_width = stored_width // 8
tile_height = stored_height // 8
pixels = [
[None for x in range(width)]
@ -124,8 +127,15 @@ def untile_pixels(raw_pixels, width, height):
# Find the coordinates of the top-left corner of the current tile.
# n.b. The image is eight tiles wide, and each tile is 8×8 pixels.
tile_num = n // 64
tile_y = tile_num // tile_width * 8
tile_x = tile_num % tile_width * 8
if is_flim:
# The FLIM format seems to pseudo-rotate the entire image to the
# right, so tiles start in the bottom left and go up
tile_y = (tile_height - 1 - (tile_num % tile_height)) * 8
tile_x = tile_num // tile_height * 8
else:
# CLIM has the more conventional right-then-down order
tile_y = tile_num // tile_width * 8
tile_x = tile_num % tile_width * 8
# Determine the pixel's coordinates within the tile
# http://en.wikipedia.org/wiki/Z-order_curve#Coordinate_values
@ -142,6 +152,10 @@ def untile_pixels(raw_pixels, width, height):
(within_tile & 0b100000) >> 3
)
if is_flim:
# Individual tiles are also rotated. Unrotate them
sub_x, sub_y = sub_y, 7 - sub_x
# Add up the pixel's coordinates within the whole image
x = tile_x + sub_x
y = tile_y + sub_y
@ -153,7 +167,19 @@ def untile_pixels(raw_pixels, width, height):
def decode_clim(data):
file_format = data[-40:-36]
if file_format == b'CLIM':
is_flim = False
elif file_format == b'FLIM':
is_flim = True
else:
raise ValueError("Unknown image format {}".format(file_format))
imag_header = imag_header_struct.parse(data[-20:])
if is_flim:
# TODO SUMO hack; not sure how to get format out of this header
imag_header.format = 'RGBA5551'
if imag_header.format not in COLOR_DECODERS:
raise ValueError(
"don't know how to decode {} pixels".format(imag_header.format))
@ -174,5 +200,6 @@ def decode_clim(data):
scrambled_pixels,
imag_header.width,
imag_header.height,
is_flim=is_flim,
)
return imag_header.width, imag_header.height, color_depth, palette, pixels

111
pokedex/extract/lib/etc1.py Normal file
View file

@ -0,0 +1,111 @@
"""Parse ETC1, a terrible micro block-based image compression format.
Please enjoy the docs.
https://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
"""
import io
import itertools
three_bit_twos_complement = [0, 1, 2, 3, -4, -3, -2, -1]
etc1_modifier_tables = [
( 2, 8, -2, -8),
( 5, 17, -5, -17),
( 9, 29, -9, -29),
(13, 42, -13, -42),
(18, 60, -18, -60),
(24, 80, -24, -80),
(33, 106, -33, -106),
(47, 183, -47, -183),
]
def decode_etc1(data):
# TODO sizes are hardcoded here
f = io.BytesIO(data)
f.read(0x80)
outpixels = [[None] * 128 for _ in range(128)]
for blocky in range(0, 128, 8):
for blockx in range(0, 128, 8):
for z in range(4):
row = f.read(16)
if not row:
raise RuntimeError
alpha = row[:8]
etc1 = int.from_bytes(row[8:], 'big')
diffbit = row[12] & 2
flipbit = row[12] & 1
lopixelbits = int.from_bytes(row[8:10], 'little')
hipixelbits = int.from_bytes(row[10:12], 'little')
if diffbit:
red1 = row[15] >> 3
red2 = max(0, red1 + three_bit_twos_complement[row[15] & 0x7])
green1 = row[14] >> 3
green2 = max(0, green1 + three_bit_twos_complement[row[14] & 0x7])
blue1 = row[13] >> 3
blue2 = max(0, blue1 + three_bit_twos_complement[row[13] & 0x7])
red1 = (red1 << 3) | (red1 >> 2)
green1 = (green1 << 3) | (green1 >> 2)
blue1 = (blue1 << 3) | (blue1 >> 2)
red2 = (red2 << 3) | (red2 >> 2)
green2 = (green2 << 3) | (green2 >> 2)
blue2 = (blue2 << 3) | (blue2 >> 2)
else:
red1 = row[15] >> 4
red2 = row[15] & 0xf
green1 = row[14] >> 4
green2 = row[14] & 0xf
blue1 = row[13] >> 4
blue2 = row[13] & 0xf
red1 = (red1 << 4) | red1
green1 = (green1 << 4) | green1
blue1 = (blue1 << 4) | blue1
red2 = (red2 << 4) | red2
green2 = (green2 << 4) | green2
blue2 = (blue2 << 4) | blue2
base1 = red1, green1, blue1
base2 = red2, green2, blue2
codeword1 = row[12] >> 5
codeword2 = (row[12] >> 2) & 0x7
table1 = etc1_modifier_tables[codeword1]
table2 = etc1_modifier_tables[codeword2]
def nybbles(b):
for byte in b:
yield (byte & 0xf) << 4
yield byte >> 4 << 4
it = nybbles(alpha)
for c in range(4):
for r in range(4):
x = blockx + c
y = blocky + r
if z in (1, 3):
x += 4
if z in (2, 3):
y += 4
if flipbit:
# Horizontal
whichblock = 1 if r < 2 else 2
else:
whichblock = 1 if c < 2 else 2
if whichblock == 1:
table = table1
base = base1
else:
table = table2
base = base2
pixelbit = c * 4 + r
idx = 2 * ((hipixelbits >> pixelbit) & 1) + ((lopixelbits >> pixelbit) & 1)
mod = table[idx]
color = tuple(min(255, max(0, b + mod)) for b in base) + (next(it),)
outpixels[y][x] = color
return 128, 128, 4, None, outpixels

View file

@ -4,6 +4,7 @@ filesystem.
Based on code by Zhorken: https://github.com/Zhorken/pokemon-x-y-icons
and Kaphotics: https://github.com/kwsch/GARCTool
"""
from collections import Counter
from io import BytesIO
from pathlib import Path
import struct
@ -27,13 +28,15 @@ def count_bits(n):
garc_header_struct = c.Struct(
'garc_header',
c.Magic(b'CRAG'),
c.Const(c.ULInt32('header_size'), 0x1c),
c.ULInt32('header_size'), # 28 in XY, 36 in SUMO
c.Const(c.ULInt16('byte_order'), 0xfeff),
c.Const(c.ULInt16('mystery1'), 0x0400),
c.Const(c.ULInt32('chunks_ct'), 4),
c.ULInt16('mystery1'), # 0x0400 in XY, 0x0600 in SUMO
#c.Const(c.ULInt32('chunks_ct'), 4),
c.ULInt32('chunks_ct'),
c.ULInt32('data_offset'),
c.ULInt32('garc_length'),
c.ULInt32('last_length'),
c.Field('unknown_sumo_stuff', lambda ctx: ctx.header_size - 28),
)
fato_header_struct = c.Struct(
'fato_header',
@ -76,8 +79,7 @@ class GARCFile(_ContainerFile):
while bits:
if bits & 1:
start, end, length = struct.unpack('<3L', stream.read(12))
assert end - 4 < start + length <= end
slices.append((garc_header.data_offset + start, length))
slices.append((garc_header.data_offset + start, end - start))
bits >>= 1
self.slices.append(GARCEntry(stream, slices))
@ -245,19 +247,95 @@ def main(args):
args.cb(args)
def detect_subfile_type(subfile):
header = subfile.peek(16)
magic = header[0:4]
# CLIM
if magic.isalnum():
return magic.decode('ascii')
# PC
if magic[:2].isalnum():
return magic[:2].decode('ascii')
# Encrypted X/Y text?
if len(header) >= 16:
text_length = int.from_bytes(header[4:8], 'little')
header_length = int.from_bytes(header[12:16], 'little')
if len(subfile) == text_length + header_length:
return 'gen 6 text'
return None
def do_inspect(args):
root = Path(args.path)
if root.is_dir():
for path in sorted(root.glob('**/*')):
if path.is_dir():
continue
shortname = str(path.relative_to(root))
if len(shortname) > 12:
shortname = '...' + shortname[-9:]
stat = path.stat()
print("{:>12s} {:>10d} ".format(shortname, stat.st_size), end='')
if stat.st_size == 0:
print("empty file")
continue
with path.open('rb') as f:
try:
garc = GARCFile(f)
except Exception as exc:
print("{}: {}".format(type(exc).__name__, exc))
continue
total_subfiles = 0
magic_ctr = Counter()
size_ctr = Counter()
for i, topfile in enumerate(garc):
for j, subfile in enumerate(topfile):
total_subfiles += 1
size_ctr[len(subfile)] += 1
magic_ctr[detect_subfile_type(subfile)] += 1
print("{} subfiles".format(total_subfiles), end='')
if total_subfiles > len(garc):
print(" (some nested)")
else:
print()
cutoff = max(total_subfiles // 10, 2)
for magic, ct in magic_ctr.most_common():
if ct < cutoff:
break
print(" " * 24, "{:4d} x {:>9s}".format(ct, magic or 'unknown'))
for size, ct in size_ctr.most_common():
if ct < cutoff:
break
print(" " * 24, "{:4d} x {:9d}".format(ct, size))
return
with open(args.path, 'rb') as f:
garc = GARCFile(f)
for i, topfile in enumerate(garc):
print("File #{}, {} entr{}".format(
i, len(topfile), 'y' if len(topfile) == 1 else 'ies'))
for j, subfile in enumerate(topfile):
print(' ', j, len(subfile), end='')
if subfile.peek(2) == b'PC':
print("{:4d}/{:<4d} {:7d}B".format(i, j, len(subfile)), end='')
magic = detect_subfile_type(subfile)
if magic == 'PC':
print(" -- appears to be a PC file (generic container)")
pcfile = PokemonContainerFile(subfile)
for k, entry in enumerate(pcfile):
print(' ', repr(entry.read(50)))
elif magic == 'gen 6 text':
# TODO turn this into a generator so it doesn't have to
# parse the whole thing? need length though
texts = decrypt_xy_text(subfile.read())
print(" -- X/Y text, {} entries: {!r}".format(len(texts), texts[:5]), texts[-5:])
else:
print('', repr(subfile.read(50)))

View file

@ -22,11 +22,14 @@ import yaml
from .lib.garc import GARCFile, decrypt_xy_text
from .lib.text import merge_japanese_texts
# TODO auto-detect rom vs romfs vs... whatever
# TODO fix some hardcoding in here
# TODO finish converting garc parsing to use construct, if possible, i think (i would not miss substream)
# way way more sprite work in here...
# TODO would be nice to have meaningful names for the file structure instead of sprinkling hardcoded ones throughout
CANON_LANGUAGES = ('ja', 'en', 'fr', 'it', 'de', 'es', 'ko')
ORAS_SCRIPT_FILES = {
@ -39,6 +42,18 @@ ORAS_SCRIPT_FILES = {
'es': 'rom/a/0/7/7',
'ko': 'rom/a/0/7/8',
}
SUMO_SCRIPT_FILES = {
'ja-kana': 'rom/a/0/3/0',
'ja-kanji': 'rom/a/0/3/1',
'en': 'rom/a/0/3/2',
'fr': 'rom/a/0/3/3',
'it': 'rom/a/0/3/4',
'de': 'rom/a/0/3/5',
'es': 'rom/a/0/3/6',
'ko': 'rom/a/0/3/7',
'zh-simplified': 'rom/a/0/3/8',
'zh-traditional': 'rom/a/0/3/9',
}
ORAS_SCRIPT_ENTRIES = {
'form-names': 5,
# TODO these might be backwards, i'm just guessing
@ -53,13 +68,61 @@ ORAS_SCRIPT_ENTRIES = {
'ability-flavor': 36,
'ability-names': 37,
'nature-names': 51,
# Note that these place names come in pairs, in order to support X/Y's
# routes, which had both numbers and traditional street names
# TODO oughta rip those too!
'zone-names': 90,
'species-names': 98,
# 113: item names, with macros to branch for pluralization
# 114: copy of item names, but with "PP" in latin in korean (?!)
# 115: item names in plural (maybe interesting?)
'item-names': 116, # singular
'item-flavor': 117,
}
SUMO_SCRIPT_ENTRIES = {
# 2: bag pockets
# 81: ribbons
'form-names': 114,
# TODO a lot of these are missing
'species-flavor-sun': 119,
'species-flavor-moon': 120,
'move-contest-flavor': 109,
'move-names': 113,
# TODO 19 is z-move names
# Note: table 15 is also a list of move names, but with a few at the end
# missing? XY leftovers?
'move-flavor': 112,
'type-names': 107,
'ability-flavor': 97,
'ability-names': 96,
'nature-names': 87,
# Note that these place names come in pairs, in order to support X/Y's
# routes, which had both numbers and traditional street names
# TODO oughta rip those too!
'zone-names': 67,
# NOTE: 67 through 70 could be zone names, but could also be "where caught"
# names for Pokémon
'species-names': 55,
'pokemon-height-flavor': 115,
'genus-names': 116,
'pokemon-weight-flavor': 117,
'trainer-class-names': 106,
'berry-names': 65,
# 49 might be pokédex colors? or maybe clothing colors
# 38: item names, with macros to branch for pluralization
# 114: copy of item names, but with "PP" in latin in korean (?!)
# 37: item names in plural (maybe interesting?)
'item-names': 36, # singular
'item-flavor': 35,
}
# The first element in each list is the name of the BASE form -- if it's not
# None, the base form will be saved under two filenames
ORAS_EXTRA_SPRITE_NAMES = {
# Cosplay Pikachu
25: (None, 'rockstar', 'belle', 'popstar', 'phd', 'libre', 'cosplay'),
25: (None, 'rock-star', 'belle', 'pop-star', 'phd', 'libre', 'cosplay'),
# Unown
201: tuple('abcdefghijklmnopqrstuvwxyz') + ('exclamation', 'question'),
# Castform
@ -179,14 +242,16 @@ pokemon_struct = Struct(
ULInt32('tutors'),
ULInt16('mystery1'),
ULInt16('mystery2'),
ULInt32('bp_tutors1'),
ULInt32('bp_tutors2'),
ULInt32('bp_tutors3'),
ULInt32('bp_tutors4'),
ULInt32('bp_tutors1'), # unused in sumo
ULInt32('bp_tutors2'), # unused in sumo
ULInt32('bp_tutors3'), # unused in sumo
ULInt32('bp_tutors4'), # sumo: big numbers for pikachu, eevee, snorlax, mew, starter evos, couple others?? maybe special z-move item?
# TODO sumo is four bytes longer, not sure why, find out if those bytes are anything and a better way to express them
OptionalGreedyRange(Magic(b'\x00')),
)
pokemon_mega_evolutions_struct = Array(
3,
2, # NOTE: 3 for XY/ORAS, but i don't think the third is ever populated?
Struct(
'pokemon_mega_evolutions',
ULInt16('number'),
@ -205,6 +270,16 @@ egg_moves_struct = Struct(
),
)
egg_moves_struct = Struct(
'egg_moves',
ULInt16('first_form_id'), # TODO SUMO ONLY
ULInt16('count'),
Array(
lambda ctx: ctx.count,
ULInt16('moveids'),
),
)
level_up_moves_struct = OptionalGreedyRange(
Struct(
'level_up_pair',
@ -273,6 +348,236 @@ pokemon_sprite_struct = Struct(
ULInt16('right_count'),
)
encounter_struct = Struct(
'encounter',
# TODO top 5 bits are form stuff
ULInt16('pokemon_id'),
ULInt8('min_level'),
ULInt8('max_level'),
)
encounter_table_struct = Struct(
'encounter_table',
ULInt8('walk_rate'),
ULInt8('long_grass_rate'),
ULInt8('hidden_rate'),
ULInt8('surf_rate'),
ULInt8('rock_smash_rate'),
ULInt8('old_rod_rate'),
ULInt8('good_rod_rate'),
ULInt8('super_rod_rate'),
ULInt8('horde_rate'),
Magic(b'\x00' * 5),
Array(61, encounter_struct),
Magic(b'\x00' * 2),
)
ORAS_ENCOUNTER_SLOTS = [
('walk', (10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 4, 1)),
('long-grass', (10, 10, 10, 10, 10, 10, 10, 10, 10, 5, 4, 1)),
('hidden', (60, 35, 5)), # TODO guessing here!
('surf', (50, 30, 15, 4, 1)),
('rock-smash', (50, 30, 15, 4, 1)),
('old-rod', (60, 35, 5)),
('good-rod', (60, 35, 5)),
('super-rod', (60, 35, 5)),
('horde', ((60, 5), (35, 5), (5, 5))),
]
# The only thing really linking ORAS zones together is that they share the same
# overall location/place name, so use the index of that name as a key to match
# to an extant location
ORAS_ZONE_NAME_INDEX_TO_VEEKUN_LOCATION = {
#170: Littleroot Town
#172: Oldale Town
174: 'dewford-town',
#176: Lavaridge Town
#178: Fallarbor Town
#180: Verdanturf Town
#182: Pacifidlog Town
184: 'petalburg-city',
186: 'slateport-city',
#188: Mauville City
#190: Rustboro City
#192: Fortree City
194: 'lilycove-city',
196: 'mossdeep-city',
198: 'sootopolis-city',
200: 'ever-grande-city',
#202: Pokémon League
204: 'hoenn-route-101',
206: 'hoenn-route-102',
208: 'hoenn-route-103',
210: 'hoenn-route-104',
212: 'hoenn-route-105',
214: 'hoenn-route-106',
216: 'hoenn-route-107',
218: 'hoenn-route-108',
220: 'hoenn-route-109',
222: 'hoenn-route-110',
224: 'hoenn-route-111',
226: 'hoenn-route-112',
228: 'hoenn-route-113',
230: 'hoenn-route-114',
232: 'hoenn-route-115',
234: 'hoenn-route-116',
236: 'hoenn-route-117',
238: 'hoenn-route-118',
240: 'hoenn-route-119',
242: 'hoenn-route-120',
244: 'hoenn-route-121',
246: 'hoenn-route-122',
248: 'hoenn-route-123',
250: 'hoenn-route-124',
252: 'hoenn-route-125',
254: 'hoenn-route-126',
256: 'hoenn-route-127',
258: 'hoenn-route-128',
260: 'hoenn-route-129',
262: 'hoenn-route-130',
264: 'hoenn-route-131',
266: 'hoenn-route-132',
268: 'hoenn-route-133',
270: 'hoenn-route-134',
272: 'meteor-falls',
274: 'rusturf-tunnel',
#276: ???
#278: Desert Ruins
280: 'granite-cave',
282: 'petalburg-woods',
#284: Mt. Chimney
286: 'jagged-pass',
288: 'fiery-path',
290: 'mt-pyre',
#292: Team Aqua Hideout
294: 'seafloor-cavern',
296: 'cave-of-origin',
298: 'hoenn-victory-road',
300: 'shoal-cave',
302: 'new-mauville',
#304: Sea Mauville
#306: Island Cave
#308: Ancient Tomb
#310: Sealed Chamber
#312: Scorched Slab
#314: Team Magma Hideout
316: 'sky-pillar',
#318: Battle Resort
#320: Southern Island
# TODO is this "abandoned-ship" from rse?
#322: S.S. Tidal
324: 'hoenn-safari-zone',
#326: Mirage Forest
#328: Mirage Cave
#330: Mirage Island
#332: Mirage Mountain
#334: Trackless Forest
#336: Pathless Plain
#338: Nameless Cavern
#340: Fabled Cave
#342: Gnarled Den
#344: Crescent Isle
#346: Secret Islet
#348: Soaring in the sky
#350: Secret Shore
#352: Secret Meadow
#354: Secret Base
}
# TODO wait, in the yaml thing, where do the fanon names for these go?
ORAS_ZONE_INDEX_TO_VEEKUN_AREA = {
# TODO oops i should be actually mapping these to areas in rse. many of
# them aren't split the same way, though. uh oh. if we make areas a more
# first-class thing, then... how do we deal with this? e.g. route 104 is
# two zones in oras but only one zone in rse. it's easy enough to fudge
# that with encounters, but what do you do about events etc?
26: 'hoenn-route-104--north',
27: 'hoenn-route-104--south',
# TODO should i, maybe, indicate the type of terrain an area has...?
30: 'hoenn-route-107',
64: 'hoenn-route-107--underwater',
# NOTE: split from rse
38: 'hoenn-route-112--north', # route 111 side
39: 'hoenn-route-112--south', # lavaridge town side
35: 'hoenn-route-111',
# NOTE: split from rse
37: 'hoenn-route-111--desert',
48: 'hoenn-route-120',
# NOTE: new
49: 'hoenn-route-120--tomb-area',
53: 'hoenn-route-124',
65: 'hoenn-route-124--underwater',
55: 'hoenn-route-126',
66: 'hoenn-route-126--underwater',
57: 'hoenn-route-128',
# NOTE: new
68: 'hoenn-route-128--underwater',
58: 'hoenn-route-129',
# NOTE: new
69: 'hoenn-route-129--underwater',
59: 'hoenn-route-130',
# NOTE: new
70: 'hoenn-route-130--underwater',
71: 'meteor-falls',
74: 'meteor-falls--backsmall-room', # TODO this name is dumb
# NOTE: indistinguishable
72: 'meteor-falls--back',
73: 'meteor-falls--b1f',
78: 'granite-cave--1f',
79: 'granite-cave--b1f',
80: 'granite-cave--b2f',
# NOTE: indistinguishable
86: 'mt-pyre--1f',
87: 'mt-pyre--2f',
88: 'mt-pyre--3f',
89: 'mt-pyre--4f',
90: 'mt-pyre--outside',
# NOTE: indistinguishable; split from rse
91: 'mt-pyre--summit-south',
533: 'mt-pyre--summit-north',
# NOTE: many sets of these are indistinguishable; ALL split from rse
99: 'seafloor-cavern--entrance',
100: 'seafloor-cavern--room-1',
101: 'seafloor-cavern--room-2',
102: 'seafloor-cavern--room-5',
103: 'seafloor-cavern--room-6',
104: 'seafloor-cavern--room-3',
105: 'seafloor-cavern--room-7',
106: 'seafloor-cavern--room-4',
107: 'seafloor-cavern--room-8',
108: 'seafloor-cavern--room-9',
109: 'seafloor-cavern--room-10',
# NOTE: indistinguishable
112: 'cave-of-origin--entrance',
113: 'cave-of-origin--1f',
114: 'cave-of-origin--b1f',
115: 'cave-of-origin--b2f',
116: 'cave-of-origin--b3f',
# NOTE: new? rse had this room but had no encounters in it
452: 'cave-of-origin--b4f',
# NOTE: indistinguishable
123: 'hoenn-victory-road--entrance', # NOTE: new
124: 'hoenn-victory-road--1f',
125: 'hoenn-victory-road--b1f',
# NOTE: new; rse had b2f instead
126: 'hoenn-victory-road--2f',
}
# There are 63 tutor move bits in use, but only 60 move tutors -- the moves
# appear to be largely inherited from B2W2 but these are just not exposed in
# ORAS
@ -290,6 +595,11 @@ ORAS_NORMAL_MOVE_TUTORS = (
)
# TODO ripe for being put in the pokedex codebase itself
def make_identifier(english_name):
# TODO do nidoran too
return re.sub('[. ]+', '-', english_name.lower())
@contextmanager
def read_garc(path):
with path.open('rb') as f:
@ -345,18 +655,22 @@ def extract_data(root, out):
# -------------------------------------------------------------------------
# Names and flavor text
texts = {}
for lang, fn in ORAS_SCRIPT_FILES.items():
#for lang, fn in ORAS_SCRIPT_FILES.items():
for lang, fn in SUMO_SCRIPT_FILES.items():
texts[lang] = {}
with read_garc(root / fn) as garc:
for entryname, entryid in ORAS_SCRIPT_ENTRIES.items():
#for entryname, entryid in ORAS_SCRIPT_ENTRIES.items():
for entryname, entryid in SUMO_SCRIPT_ENTRIES.items():
entry = garc[entryid][0]
texts[lang][entryname] = decrypt_xy_text(entry.read())
# Japanese text is special! It's written in both kanji and kana, and we
# want to combine them
texts['ja'] = {}
for entryname in ORAS_SCRIPT_ENTRIES:
#for entryname in ORAS_SCRIPT_ENTRIES:
for entryname in SUMO_SCRIPT_ENTRIES:
kanjis = texts['ja-kanji'][entryname]
kanas = texts['ja-kana'][entryname]
# But not if they're names of things.
@ -364,11 +678,10 @@ def extract_data(root, out):
# case, what do we do? we want to ultimately put these in urls and
# whatnot, right, but we don't want furigana there :S do we need a
# separate "identifier" field /per language/?)
if entryname.endswith('names'):
assert kanjis == kanas
assert len(kanas) == len(kanjis)
if kanjis == kanas:
texts['ja'][entryname] = kanjis
else:
assert len(kanas) == len(kanjis)
texts['ja'][entryname] = [
merge_japanese_texts(kanji, kana)
for (kanji, kana) in zip(kanjis, kanas)
@ -377,21 +690,23 @@ def extract_data(root, out):
del texts['ja-kana']
identifiers = {}
identifiers['species'] = [
# TODO better identifier creation, to be determined later, but surely
# want to lose . and '
# TODO handling forms here is awkward since the form names are
# contained in the personal struct
((species_name or '') + '-' + form_name).lower().replace(' ', '-')
for (species_name, form_name) in itertools.zip_longest(
identifiers['species'] = []
for i, (species_name, form_name) in enumerate(itertools.zip_longest(
texts['en']['species-names'],
texts['en']['form-names'],
)
]
)):
if species_name:
ident = make_identifier(species_name)
else:
# TODO proooooobably fix this
ident = 'uhhhhh'
#print("??????", i, species_name, form_name)
if form_name:
ident = ident + '-' + make_identifier(form_name)
# TODO hold up, how are these /species/ identifiers?
identifiers['species'].append(ident)
identifiers['move'] = [
# TODO better identifier creation, to be determined later, but surely
# want to lose . and '
name.lower().replace(' ', '-')
make_identifier(name)
for name in texts['en']['move-names']
]
@ -404,10 +719,164 @@ def extract_data(root, out):
# TODO need to skip slot 0 which is junk
dump_to_yaml(texts[lang], f)
"""
# Encounters
# TODO move mee elsewheeere -- actually all of these should be in their own pieces
places = OrderedDict()
name_index_to_place = {}
name_index_counts = Counter()
zones = {}
zone_to_name_index = {}
with read_garc(root / 'rom/a/0/1/3') as garc:
# Fetch the pointer table from the encounter file first, mostly so we
# can figure out which zones have no encounters at all. For whatever
# reason, a zone with no encounters still has data -- but it uses the
# same pointer as the following zone. I don't know if the pointers
# were intended to be used as ranges or what, but it's a handy signal.
f = garc[-1][0]
# TODO SIGH, translate this to construct, i guess
magic = f.read(2)
assert magic == b'EN'
num_records = int.from_bytes(f.read(2), 'little')
encounter_pointers = []
for n in range(num_records):
encounter_pointers.append(int.from_bytes(f.read(4), 'little'))
empty_zones = set()
for n in range(num_records - 1):
if encounter_pointers[n] == encounter_pointers[n + 1]:
empty_zones.add(n)
# Every file in this GARC is ZO (zonedata) except the last one, which
# is a table of encounters for each zone.
num_zones = len(garc) - 1
for z in range(num_zones):
if z in empty_zones:
# TODO later we may want these, to hang events off of etc
continue
zone = OrderedDict()
zone['game-index'] = z
zones[z] = zone
# TODO probably worth trying to parse this stuff for real later
data = garc[z][0].read()
name_index = int.from_bytes(data[56:58], 'little')
name_bits = name_index >> 9
name_index &= 0x1ff
zone_to_name_index[z] = name_index
name_index_counts[name_index] += 1
# Create places as we go, but DO NOT assign zones to places yet,
# since the logic for figuring out zone identifiers is different
# for places with only one zone
if name_index not in name_index_to_place:
place = OrderedDict()
place['unknown--gen6-name-bits'] = name_bits
place['name'] = OrderedDict()
place['alternate-name'] = OrderedDict()
for language in CANON_LANGUAGES:
name, altname = (
texts[language]['zone-names'][name_index:name_index + 2])
place['name'][language] = name
if altname:
place['alternate-name'][language] = altname
# Drop this dict entirely if there are no alt names
if not place['alternate-name']:
del place['alternate-name']
name_index_to_place[name_index] = place
ident = ORAS_ZONE_NAME_INDEX_TO_VEEKUN_LOCATION.get(name_index)
if not ident:
# Not in veekun yet...
place['veekun--new'] = True
ident = make_identifier(place['name']['en'])
places[ident] = place
# TODO ugh
place['_identifier'] = ident
place['zones'] = OrderedDict()
# Some encounters are used more than once
seen_encounters = {}
for z, ptr in enumerate(encounter_pointers):
if z in empty_zones:
continue
zone = zones[z]
name_index = zone_to_name_index[z]
place = name_index_to_place[name_index]
# Now we have all the zones, so we can figure out identifiers and
# assign the zone to its parent place
identifier = place['_identifier']
if name_index_counts[name_index] > 1:
# TODO are these names /sometimes/ official? e.g. doesn't
# "B1F" appear sometimes?
subidentifier = ORAS_ZONE_INDEX_TO_VEEKUN_AREA.get(z)
if not subidentifier:
subidentifier = "oras-unknown-{}".format(z)
identifier = "{}--{}".format(identifier, subidentifier)
place['zones'][identifier] = zone
# Snag the actual encounters, if any.
zone['encounters'] = OrderedDict()
# TODO dumb hack for soaring through the sky, which is... nothing
if not f.read(1):
continue
f.seek(ptr)
encounter_table = encounter_table_struct.parse_stream(f)
n = 0
for method, chances in ORAS_ENCOUNTER_SLOTS:
rate_attr = method.replace('-', '_') + '_rate'
rate = getattr(encounter_table, rate_attr)
# TODO where does rate fit in here?
if rate == 0:
# TODO wrong for hordes
n += len(chances)
continue
encounters = zone['encounters'][method] = []
for chance in chances:
if isinstance(chance, tuple):
chance, groupsize = chance
else:
groupsize = 1
encounter = []
for _ in range(groupsize):
enc = encounter_table.encounter[n]
# TODO assert always zero when rate is zero, never zero when rate isn't
if enc.pokemon_id != 0:
if enc.min_level == enc.max_level:
levels = str(enc.min_level)
else:
levels = "{} - {}".format(enc.min_level, enc.max_level)
pokemon_ident = identifiers['species'][enc.pokemon_id & 0x1ff]
pokemon_form_bits = enc.pokemon_id >> 9
# TODO maybe turn this into, i have no idea, a
# custom type? something forcibly short??
# TODO what do i do with the form bits?
encounter.append("{} {}".format(pokemon_ident, levels))
n += 1
if groupsize == 1:
encounters.extend(encounter)
else:
encounters.append(encounter)
with (out / 'places.yaml').open('w') as f:
dump_to_yaml(places, f)
return
"""
# -------------------------------------------------------------------------
# Scrape some useful bits from the binary
with (root / 'exe/code.bin').open('rb') as f:
# Tutored moves
# TODO i think these are oras only? do they exist in sumo? xy?
tutor_moves = dict(tutors=ORAS_NORMAL_MOVE_TUTORS)
f.seek(0x004960f8)
for n in range(1, 5):
@ -421,7 +890,8 @@ def extract_data(root, out):
# TMs
machines = []
f.seek(0x004a67ee)
#f.seek(0x004a67ee) # ORAS
f.seek(0x0049795a) # SUMO
machineids = struct.unpack('<107H', f.read(2 * 107))
# Order appears to be based on some gen 4 legacy: TMs 1 through 92, HMs
# 1 through 6, then the other eight TMs and the last HM. But the bits
@ -439,15 +909,18 @@ def extract_data(root, out):
# -------------------------------------------------------------------------
# Pokémon structs
# TODO SUMO 0/1/8 seems to contain the index for the "base" species
pokemon_data = []
with read_garc(root / 'rom/a/1/9/5') as garc:
with read_garc(root / 'rom/a/0/1/7') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/5') as garc: # ORAS
personals = [subfile[0].read() for subfile in garc]
_pokemon_forms = {} # "real" species id => (base species id, form name id)
_next_name_form_id = 723
_next_name_form_id = 723 # TODO magic number
for i, personal in enumerate(personals[:-1]):
record = pokemon_struct.parse(personal)
# TODO transform to an OD somehow probably
pokemon_data.append(record)
print(i, hex(record.bp_tutors4))
#print("{:3d} {:15s} {} {:5d} {:5d}".format(
# i,
# identifiers['species'][baseid],
@ -483,7 +956,8 @@ def extract_data(root, out):
# -------------------------------------------------------------------------
# Move stats
movesets = OrderedDict()
with read_garc(root / 'rom/a/1/8/9') as garc:
with read_garc(root / 'rom/a/0/1/1') as garc: # SUMO
#with read_garc(root / 'rom/a/1/8/9') as garc: # ORAS
# Only one subfile
data = garc[0][0].read()
container = move_container_struct.parse(data)
@ -497,7 +971,8 @@ def extract_data(root, out):
#))
# Egg moves
with read_garc(root / 'rom/a/1/9/0') as garc:
with read_garc(root / 'rom/a/0/1/2') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/0') as garc: # ORAS
for i, subfile in enumerate(garc):
ident = identifiers['species'][i]
data = subfile[0].read()
@ -510,7 +985,8 @@ def extract_data(root, out):
eggset.append(identifiers['move'][moveid])
# Level-up moves
with read_garc(root / 'rom/a/1/9/1') as garc:
with read_garc(root / 'rom/a/0/1/3') as garc: # SUMO
#with read_garc(root / 'rom/a/1/9/1') as garc: # ORAS
for i, subfile in enumerate(garc):
ident = identifiers['species'][i]
level_up_moves = subfile[0].read()
@ -534,26 +1010,31 @@ def extract_data(root, out):
order = 1
# Evolution
#with read_garc(root / 'rom/a/1/9/2') as garc:
#with read_garc(root / 'rom/a/1/9/2') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/4') as garc: # SUMO?
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# Mega evolution
#with read_garc(root / 'rom/a/1/9/3') as garc:
#with read_garc(root / 'rom/a/1/9/3') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/5') as garc: # SUMO?
# for subfile in garc:
# evolution = subfile[0].read()
# print(repr(evolution))
# TODO what is a/1/9/4? 8 files of 404 bytes each
# TODO what is a/1/9/4 (ORAS) or a/0/1/6 (SUMO)? 8 files of 404 bytes each
# Baby Pokémon
#with read_garc(root / 'rom/a/1/9/6') as garc:
#with read_garc(root / 'rom/a/1/9/6') as garc: # ORAS
#with read_garc(root / 'rom/a/0/1/8') as garc: # SUMO?
# for subfile in garc:
# baby_pokemon = subfile[0].read()
# print(repr(baby_pokemon))
# Item stats
#with read_garc(root / 'rom/a/1/9/7') as garc:
# for subfile in garc:
# item_stats = subfile[0].read()
# print(repr(item_stats))
# TODO
#with read_garc(root / 'rom/a/1/9/7') as garc: # ORAS
with read_garc(root / 'rom/a/0/1/9') as garc: # ORAS
for subfile in garc:
item_stats = subfile[0].read()
# Tutor moves (from the personal structs)
for i, datum in enumerate(pokemon_data):
@ -584,7 +1065,8 @@ def get_mega_counts(root):
has.
"""
mega_counts = {} # pokemonid => number of mega evos
with read_garc(root / 'rom/a/1/9/3') as garc:
#with read_garc(root / 'rom/a/1/9/3') as garc: # oras
with read_garc(root / 'rom/a/0/1/5') as garc: # SUMO
for pokemonid, subfile in enumerate(garc):
mega_evos = pokemon_mega_evolutions_struct.parse_stream(subfile[0])
mega_counts[pokemonid] = max(
@ -634,7 +1116,9 @@ class SpriteFileNamer:
"Don't know how to name {} mega evolutions for Pokémon {}"
.format(self.mega_counts[pokemonid], pokemonid))
else:
raise ValueError("Pokemon {} doesn't have forms".format(pokemonid))
# TODO should use warnings for this so it works for new games
#raise ValueError("Pokemon {} doesn't have forms".format(pokemonid))
form = "form-{}".format(formid)
# Construct the directory
parts = []
@ -708,6 +1192,38 @@ class SpriteFileNamer:
shutil.copyfile(str(fn), str(fn2))
def write_clim_to_png(f, width, height, color_depth, palette, pixels):
"""Write the results of ``decode_clim`` to a file object."""
writer_kwargs = dict(width=width, height=height)
if palette:
writer_kwargs['palette'] = palette
else:
# TODO do i really only need alpha=True if there's no palette?
writer_kwargs['alpha'] = True
writer = png.Writer(**writer_kwargs)
# For a paletted image, I want to preserve Zhorken's good idea of
# indicating the original bit depth with an sBIT chunk. But PyPNG can't do
# that directly, so instead I have to do some nonsense.
if palette:
buf = io.BytesIO()
writer.write(buf, pixels)
# Read the PNG as chunks, and manually add an sBIT chunk
buf.seek(0)
png_reader = png.Reader(buf)
chunks = list(png_reader.chunks())
sbit = bytes([color_depth] * 3)
chunks.insert(1, ('sBIT', sbit))
# Now write the chunks to the file
png.write_chunks(f, chunks)
else:
# Otherwise, it's... almost straightforward.
writer.write(f, (itertools.chain(*row) for row in pixels))
def extract_box_sprites(root, out):
namer = SpriteFileNamer(
out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES)
@ -715,7 +1231,8 @@ def extract_box_sprites(root, out):
with (root / 'exe/code.bin').open('rb') as f:
# Form configuration, used to put sprites in the right order
# NOTE: in x/y the address is 0x0043ea98
f.seek(0x0047d650)
#f.seek(0x0047d650) # ORAS
f.seek(0x004999d0) # SUMO
# TODO magic number
for n in range(722):
sprite = pokemon_sprite_struct.parse_stream(f)
@ -767,7 +1284,9 @@ def extract_box_sprites(root, out):
pokemon_sprites_dir = out
if not pokemon_sprites_dir.exists():
pokemon_sprites_dir.mkdir()
with read_garc(root / 'rom/a/0/9/1') as garc:
# with read_garc(root / 'rom/a/0/9/1') as garc: # ORAS
# TODO what's in 2/5/3?
with read_garc(root / 'rom/a/0/6/2') as garc: # SUMO
from .lib.clim import decode_clim
for i, subfile in enumerate(garc):
if i == 0:
@ -782,33 +1301,14 @@ def extract_box_sprites(root, out):
data = subfile[0].read()
width, height, color_depth, palette, pixels = decode_clim(data)
png_writer = png.Writer(
width=width,
height=height,
palette=palette,
)
# TODO this is bad.
if 'right/' in namer.index_to_filenames[i][0]:
for row in pixels:
row.reverse()
# I want to preserve Zhorken's good idea of indicating the original
# bit depth with an sBIT chunk, but PyPNG can't do that directly,
# so we need to do a bit of nonsense.
buf = io.BytesIO()
png_writer.write(buf, pixels)
# Read the PNG as chunks, and manually add an sBIT chunk
buf.seek(0)
png_reader = png.Reader(buf)
chunks = list(png_reader.chunks())
sbit = bytes([color_depth] * 3)
chunks.insert(1, ('sBIT', sbit))
# Write chunks to an actual file
with namer.open(i) as f:
png.write_chunks(f, chunks)
write_clim_to_png(f, width, height, color_depth, palette, pixels)
def extract_dex_sprites(root, out):
@ -823,7 +1323,10 @@ def extract_dex_sprites(root, out):
namer = SpriteFileNamer(
out, get_mega_counts(root), ORAS_EXTRA_SPRITE_NAMES)
with read_garc(root / 'rom/a/0/0/8') as garc:
# TODO Meowstic is counted as simply female in here, but should probably be
# saved with a form filename as well
#with read_garc(root / 'rom/a/0/0/8') as garc: # ORAS
with read_garc(root / 'rom/a/0/9/4') as garc: # SUMO
f = garc[0][0]
# TODO magic number
for n in range(721):
@ -861,9 +1364,10 @@ def extract_dex_sprites(root, out):
namer.add(model_num, pokemonid, formid)
# And now, do the ripping
pokemon_sprites_dir = out
with read_garc(root / 'rom/a/2/6/3') as garc:
#with read_garc(root / 'rom/a/2/6/3') as garc: # ORAS
with read_garc(root / 'rom/a/2/4/0') as garc: # sun/moon demo
from .lib.clim import decode_clim
from .lib.etc1 import decode_etc1
for i, subfile in enumerate(garc):
shiny_prefix = None
if i > total_model_count:
@ -881,16 +1385,9 @@ def extract_dex_sprites(root, out):
continue
data = subfile[0].read()
width, height, color_depth, palette, pixels = decode_clim(data)
assert not palette
png_writer = png.Writer(
width=width,
height=height,
alpha=True,
)
with namer.open(i, prefix=shiny_prefix) as f:
png_writer.write(f, (itertools.chain(*row) for row in pixels))
write_clim_to_png(f, *decode_etc1(data))
#write_clim_to_png(f, *decode_clim(data))
def _munge_source_arg(strpath):