Use YAML schema for gen 6/7; add gen7 form names; improved image support

Specifically:

- Add support for detecting FLIM format

- Add support for more color formats

- Add a small decoded image type that knows how to write itself out as
  a PNG

- Improve ETC1 decoder to work with images whose dimensions are not
  powers of two, images with no alpha channel, and images with the
  strange FLIM pixel order

- Port the gen 6/7 extractor to Construct 2.8

- Switch to using script tags in language names, to distinguish Japanese
  kana from kanji and Simplified from Traditional Chinese

- Drop the load-time merging of kanji and kana

- Add paths to various text files in SUMO

- Add form names for SUMO Pokémon

- Clean up identifiers a bit, especially the distinction between species
  and Pokémon

- Use the Pokémon schema type to dump what we have so far, and give it a
  couple more fields that didn't exist in gen 1

- Get movesets dumping correctly

- Special-case a bunch of weirdness, where the number of dex sprites
  doesn't match the number of models in SUMO
This commit is contained in:
Eevee (Lexy Munroe) 2017-01-05 04:57:05 -08:00
parent 0f79a5b922
commit 053f2a8d22
6 changed files with 846 additions and 375 deletions

View file

@ -1,3 +1,5 @@
import io
import itertools
import math
import struct
@ -17,7 +19,7 @@ imag_header_struct = c.Struct(
'section_length' / c.Const(c.Int32ul, 0x10),
'width' / c.Int16ul,
'height' / c.Int16ul,
'format' / c.Int32ul,
#'format' / c.Int32ul,
# TODO this seems to have been expanded into several things in SUMO
#c.Enum(
# c.ULInt32('format'),
@ -37,13 +39,35 @@ imag_header_struct = c.Struct(
# A4=13,
# #ETC1=19,
#)
'unknown' / c.Int16ul,
'format' / c.Enum(
c.Int8ul,
L8=0,
A8=1,
LA4=2,
LA8=3,
HILO8=4,
RGB565=5,
RGB8=6,
RGBA5551=7,
RGBA4=8,
RGBA8=9,
ETC1=10,
ETC1A4=11,
L4=12,
A4=13,
#ETC1=19,
),
# RGB565=5,
# ETC1A4=11,
'unknown2' / c.Int8ul,
)
# TODO probably move these to their own module, since they aren't just for
# CLIM. pixel deshuffler, too. (which should probably spit out pypng's native
# format)
COLOR_DECODERS = {}
COLOR_FORMATS = {}
@attr.s
@ -59,22 +83,68 @@ class ColorFormat:
def __iter__(self):
# TODO back compat until i fix the below code
return iter((self.decoder, self.bits_per_pixel, self.bit_depth))
return iter((self, self.bits_per_pixel, self.bit_depth))
def _register_color_decoder(name, *, bpp, depth, alpha):
def register(f):
COLOR_DECODERS[name] = ColorFormat(name, f, bpp, depth, alpha)
COLOR_FORMATS[name] = ColorFormat(name, f, bpp, depth, alpha)
return f
return register
@_register_color_decoder('A4', bpp=0.5, depth=4, alpha=True)
def decode_A4(data):
for a in data:
a0 = a & 0xf
a0 = (a0 << 4) | (a0 << 0)
a1 = a >> 4
a1 = (a1 << 4) | (a1 << 0)
yield 0, 0, 0, a0
yield 0, 0, 0, a1
@_register_color_decoder('A8', bpp=1, depth=8, alpha=True)
def decode_a8(data):
for a in data:
yield 0, 0, 0, a
@_register_color_decoder('L4', bpp=0.5, depth=4, alpha=False)
def decode_l4(data):
for l in data:
l0 = l & 0xf
l0 = (l0 << 4) | (l0 << 0)
l1 = l >> 4
l1 = (l1 << 4) | (l1 << 0)
yield l0, l0, l0
yield l1, l1, l1
@_register_color_decoder('L8', bpp=1, depth=8, alpha=False)
def decode_l8(data):
for l in data:
yield l, l, l
@_register_color_decoder('LA4', bpp=1, depth=4, alpha=True)
def decode_la4(data):
for la in data:
l = la >> 4
l = (l << 4) | (l << 0)
a = (la >> 0) & 0xf
a = (a << 4) | (a << 4)
yield l, l, l, a
@_register_color_decoder('LA8', bpp=2, depth=8, alpha=True)
def decode_la8(data):
for i in range(0, len(data), 2):
a = data[i]
l = data[i + 1]
yield l, l, l, a
@_register_color_decoder('RGBA4', bpp=2, depth=4, alpha=True)
def decode_rgba4(data):
# The idea is that every uint16 is a packed rrrrggggbbbbaaaa, but when
@ -93,15 +163,16 @@ def decode_rgba4(data):
@_register_color_decoder('RGB8', bpp=3, depth=8, alpha=False)
def decode_rgb8(data):
for i in range(0, len(data), 3):
yield data[i:i + 3]
yield data[i:i + 3][::-1]
@_register_color_decoder('RGBA8', bpp=4, depth=8, alpha=True)
def decode_rgba8(data):
for i in range(0, len(data), 4):
yield data[i:i + 4]
yield data[i:i + 4][::-1]
# FIXME turns out the above just are these, so, ditch these
@_register_color_decoder('BGR8', bpp=3, depth=8, alpha=False)
def decode_bgr8(data):
for i in range(0, len(data), 3):
@ -125,6 +196,7 @@ def decode_rgba5551(data, *, start=0, count=None):
for i in range(start, end, 2):
datum = data[i] + data[i + 1] * 256
# FIXME repeat rather than doing division
r = (((datum >> 11) & 0x1f) * 255 + 15) // 31
g = (((datum >> 6) & 0x1f) * 255 + 15) // 31
b = (((datum >> 1) & 0x1f) * 255 + 15) // 31
@ -132,6 +204,45 @@ def decode_rgba5551(data, *, start=0, count=None):
yield r, g, b, a
@_register_color_decoder('RGB565', bpp=2, depth=5, alpha=False)
def decode_rgb565(data, *, start=0, count=None):
# FIXME i bet construct totally /can/ parse this mess for me
if count is None:
end = len(data)
else:
end = start + count * 2
for i in range(start, end, 2):
datum = data[i] + data[i + 1] * 256
# FIXME repeat rather than doing division
r = (((datum >> 11) & 0x1f) * 255 + 15) // 31
g = (((datum >> 5) & 0x3f) * 255 + 31) // 63
b = (((datum >> 0) & 0x1f) * 255 + 15) // 31
yield r, g, b
@_register_color_decoder('RGB332', bpp=1, depth=2, alpha=False)
def decode_rgb332(data, *, start=0, count=None):
if count is None:
end = len(data)
else:
end = start + count
for i in range(start, end):
datum = data[i]
r = (datum >> 5) & 0x7
r = (r << 5) | (r << 2) | (r >> 1)
g = (datum >> 2) & 0x7
g = (g << 5) | (g << 2) | (g >> 1)
b = (datum >> 0) & 0x7
b = (b << 5) | (b << 2) | (b >> 1)
yield r, g, b
_register_color_decoder('ETC1', bpp=0.5, depth=4, alpha=False)(None)
_register_color_decoder('ETC1A4', bpp=1, depth=4, alpha=True)(None)
del _register_color_decoder
@ -156,12 +267,24 @@ def untile_pixels(raw_pixels, width, height, *, is_flim):
Taken from: https://github.com/Zhorken/pokemon-x-y-icons/
"""
# FIXME this is a wild guess, because i've seen a 4x4 image that this just
# doesn't handle correctly, but the image is all white so i have no idea
# what the right fix is -- there's a 4 x 0x78 in 0/7/9 though...
if width < 8 or height < 8:
pixels = []
it = iter(raw_pixels)
for r in range(height):
pixels.append([])
for c in range(width):
pixels[-1].append(next(it))
return pixels
# Images are stored padded to powers of two
stored_width = 2 ** math.ceil(math.log(width) / math.log(2))
stored_height = 2 ** math.ceil(math.log(height) / math.log(2))
num_pixels = stored_width * stored_height
tile_width = stored_width // 8
tile_height = stored_height // 8
tile_width = (stored_width + 7) // 8
tile_height = (stored_height + 7) // 8
pixels = [
[None for x in range(width)]
@ -175,6 +298,7 @@ def untile_pixels(raw_pixels, width, height, *, is_flim):
# Find the coordinates of the top-left corner of the current tile.
# n.b. The image is eight tiles wide, and each tile is 8×8 pixels.
tile_num = n // 64
# FIXME i found a 4x4 FLIM that this fails for???
if is_flim:
# The FLIM format seems to pseudo-rotate the entire image to the
# right, so tiles start in the bottom left and go up
@ -224,25 +348,38 @@ def decode_clim(data):
raise ValueError("Unknown image format {}".format(file_format))
imag_header = imag_header_struct.parse(data[-20:])
if is_flim:
# TODO SUMO hack; not sure how to get format out of this header
imag_header.format = 'RGBA5551'
#if is_flim:
# # TODO SUMO hack; not sure how to get format out of this header
# imag_header.format = 'RGBA5551'
if imag_header.format not in COLOR_DECODERS:
if imag_header.format not in COLOR_FORMATS:
raise ValueError(
"don't know how to decode {} pixels".format(imag_header.format))
color_decoder, color_bpp, color_depth = COLOR_DECODERS[imag_header.format]
color_format = COLOR_FORMATS[imag_header.format]
mode, = struct.unpack_from('<H', data, 0)
if mode == 2:
# Paletted
palette_length, = struct.unpack_from('<H', data, 2)
palette = list(color_decoder(data, start=4, count=palette_length))
data_start = 4 + palette_length * color_bpp
palette = list(color_format.decoder(data, start=4, count=palette_length))
data_start = 4 + palette_length * color_format.bits_per_pixel
scrambled_pixels = uncuddle_paletted_pixels(palette, data[data_start:])
elif imag_header.format == 'ETC1':
# FIXME merge this decoder in (problem is it needs to know width +
# height -- maybe i can move the pixel unscrambling out of it somehow?)
from .etc1 import decode_etc1
pixels = decode_etc1(b'\x00' * 0x80 + data, imag_header.width, imag_header.height, use_alpha=False, is_flim=True)[4]
return DecodedImageData(
imag_header.width, imag_header.height, color_format, None, pixels)
elif imag_header.format == 'ETC1A4':
# FIXME same
from .etc1 import decode_etc1
pixels = decode_etc1(b'\x00' * 0x80 + data, imag_header.width, imag_header.height, is_flim=True)[4]
return DecodedImageData(
imag_header.width, imag_header.height, color_format, None, pixels)
else:
palette = None
scrambled_pixels = color_decoder(data)
scrambled_pixels = color_format.decoder(data)
pixels = untile_pixels(
scrambled_pixels,
@ -250,4 +387,55 @@ def decode_clim(data):
imag_header.height,
is_flim=is_flim,
)
return imag_header.width, imag_header.height, color_depth, palette, pixels
return DecodedImageData(
imag_header.width, imag_header.height, color_format, palette, pixels)
class DecodedImageData:
def __init__(self, width, height, color_format, palette, pixels):
self.width = width
self.height = height
self.color_format = color_format
self.palette = palette
self.pixels = pixels
def __iter__(self):
return iter((self.width, self.height, self.color_format.bit_depth, self.palette, self.pixels))
def mirror(self):
for row in self.pixels:
row.reverse()
def write_to_png(self, f):
"""Write the results of ``decode_clim`` to a file object."""
import png
writer_kwargs = dict(width=self.width, height=self.height)
if self.palette:
writer_kwargs['palette'] = self.palette
if self.color_format.alpha:
# TODO do i really only need alpha=True if there's no palette?
writer_kwargs['alpha'] = True
writer = png.Writer(**writer_kwargs)
# For a paletted image, I want to preserve Zhorken's good idea of
# indicating the original bit depth with an sBIT chunk. But PyPNG can't do
# that directly, so instead I have to do some nonsense.
# FIXME should probably just do that for everything?
if self.palette:
buf = io.BytesIO()
writer.write(buf, self.pixels)
# Read the PNG as chunks, and manually add an sBIT chunk
buf.seek(0)
png_reader = png.Reader(buf)
chunks = list(png_reader.chunks())
sbit = bytes([self.color_format.bit_depth] * 3)
chunks.insert(1, ('sBIT', sbit))
# Now write the chunks to the file
png.write_chunks(f, chunks)
else:
# Otherwise, it's... almost straightforward.
writer.write(f, (itertools.chain(*row) for row in self.pixels))

View file

@ -8,6 +8,7 @@ that decodes four 4x4 blocks one 8x8 block at a time, because of course it is.
(I believe the 3DS operates with 8x8 tiles, so this does make some sense.)
"""
import io
import math
# Easier than doing math
THREE_BIT_TWOS_COMPLEMENT = [0, 1, 2, 3, -4, -3, -2, -1]
@ -40,28 +41,34 @@ def clamp_to_byte(n):
return max(0, min(255, n))
def decode_etc1(data):
# TODO sizes are hardcoded here
width = 128
height = 128
# FIXME sizes are hardcoded here
def decode_etc1(data, width=128, height=128, use_alpha=True, is_flim=False):
# TODO this seems a little redundant; could just ask for a stream
f = io.BytesIO(data)
# Skip header
f.read(0x80)
outpixels = [[None] * width for _ in range(height)]
# Images are stored padded to powers of two
stored_width = 2 ** math.ceil(math.log(width) / math.log(2))
stored_height = 2 ** math.ceil(math.log(height) / math.log(2))
outpixels = [[None] * (width) for _ in range(height)]
# ETC1 encodes as 4x4 blocks. Normal ETC1 arranges them in English reading
# order, right and down. This Nintendo variant groups them as 8x8
# superblocks, where the four blocks in each superblock are themselves
# arranged right and down. So we read block offsets 8 at a time, and 'z'
# is our current position within a superblock.
# TODO this may do the wrong thing if width/height is not divisible by 8
for blocky in range(0, height, 8):
for blockx in range(0, width, 8):
for blocky in range(0, stored_height, 8):
for blockx in range(0, stored_width, 8):
for z in range(4):
row = f.read(16)
if not row:
if use_alpha:
row = f.read(16)
else:
# FIXME this could sure be incorporated better
row = b'\xff' * 8 + f.read(8)
if len(row) < 16:
print(row, blocky, blockx, z, f.tell() - 0x80, len(data) - 0x80)
raise EOFError
# Each block is encoded as 16 bytes. The first 8 are a 4-bit
@ -126,16 +133,28 @@ def decode_etc1(data):
base1 = red1, green1, blue1
base2 = red2, green2, blue2
# FLIM images do this truly bizarre thing where they write out the columns, as rows
if is_flim:
block = (blocky // 8) * (stored_width // 8) + (blockx // 8)
x0 = block // (stored_height // 8) * 8 + z // 2 * 4
y0 = block % (stored_height // 8) * 8 + z % 2 * 4
else:
x0 = blockx + z % 2 * 4
y0 = blocky + z // 2 * 4
# Now deal with individual pixels
it = iter_alpha_nybbles(alpha)
for c in range(4):
for r in range(4):
x = blockx + c
y = blocky + r
if z in (1, 3):
x += 4
if z in (2, 3):
y += 4
if is_flim:
x = x0 + r
y = y0 + c
else:
x = x0 + c
y = y0 + r
if not (x < width and y < height):
continue
if (flipbit and r < 2) or (not flipbit and c < 2):
table = table1
@ -149,8 +168,11 @@ def decode_etc1(data):
lobit = (lopixelbits >> pixelbit) & 0x1
mod = table[hibit * 2 + lobit]
color = tuple(clamp_to_byte(b + mod) for b in base)
color += (next(it),)
if use_alpha:
color += (next(it),)
outpixels[y][x] = color
# 4 is the bit depth; None is the palette
return width, height, 4, None, outpixels
from .clim import DecodedImageData, COLOR_FORMATS
# FIXME stupid import, wrong color format
return DecodedImageData(width, height, COLOR_FORMATS['ETC1A4'], None, outpixels)

View file

@ -94,13 +94,16 @@ class GARCEntry(object):
def __getitem__(self, i):
start, length = self.slices[i]
ss = self.stream.slice(start, length)
if ss.peek(1) in [b'\x10', b'\x11']:
if ss.peek(1) in b'\x10\x11':
# XXX this sucks but there's no real way to know for sure whether
# data is compressed or not. maybe just bake this into the caller
# and let them deal with it, same way we do with text decoding?
# TODO it would be nice if this could be done lazily for 'inspect'
# purposes, since the first four bytes are enough to tell you the
# size
# FIXME make this work even for red herrings, maybe by finishing it
# up and doing a trial decompression of the first x bytes
#return CompressedStream(ss)
try:
data = lzss3.decompress_bytes(ss.read())
except Exception:
@ -113,6 +116,47 @@ class GARCEntry(object):
return len(self.slices)
class CompressedStream:
def __init__(self, stream):
self.stream = stream
header = stream.read(4)
stream.seek(0)
assert header[0] in b'\x10\x11'
self.length, = struct.unpack('<L', header[1:] + b'\x00')
self.data = None
def __len__(self):
return self.length
def _decompress(self):
self.data = BytesIO(lzss3.decompress_bytes(self.stream.read()))
def read(self, *args):
if self.data is None:
self._decompress()
return self.data.read(*args)
def seek(self, *args):
if self.data is None:
self._decompress()
return self.data.seek(*args)
def tell(self, *args):
if self.data is None:
self._decompress()
return self.data.tell(*args)
def peek(self, n):
if self.data is None:
self._decompress()
here = self.data.tell()
ret = self.data.read(n)
self.data.seek(here)
return ret
XY_CHAR_MAP = {
0x307f: 0x202f, # nbsp
0xe08d: 0x2026, # ellipsis
@ -360,7 +404,7 @@ def do_inspect(args):
else:
print()
cutoff = max(total_subfiles // 10, 2)
cutoff = max(total_subfiles // 10, 1)
for magic, ct in magic_ctr.most_common():
if ct < cutoff:
break

View file

@ -11,7 +11,7 @@ class PokemonContainerFile(_ContainerFile):
self.stream = stream = Substream(stream)
magic, entry_ct = stream.unpack('<2sH')
assert magic == b'PC'
assert magic in (b'PC', b'PS', b'BL')
# Offsets are "A B C ...", where entry 0 ranges from A to B, entry 1
# from B to C, etc.

File diff suppressed because it is too large Load diff

View file

@ -164,6 +164,8 @@ Evolution = _ForwardDeclaration()
EncounterMap = _ForwardDeclaration()
MoveSet = _ForwardDeclaration()
Pokedex = _ForwardDeclaration()
Item = _ForwardDeclaration()
Ability = _ForwardDeclaration()
class Pokémon(VersionedLocus):
@ -173,6 +175,9 @@ class Pokémon(VersionedLocus):
base_stats = _Map(Stat, int)
growth_rate = _Value(GrowthRate)
base_experience = _Value(int, min=0, max=255)
capture_rate = _Value(int, min=0, max=255)
held_items = _Map(Item, int)
gender_rate = _Value(int)
pokedex_numbers = _Map(Pokedex, int)
@ -202,9 +207,25 @@ class Pokémon(VersionedLocus):
# TODO should this be written in hex, maybe?
game_index = _Value(int)
# FIXME how do i distinguish hidden ability?
abilities = _List(Ability)
Pokemon = Pokémon
MoveEffect = _ForwardDeclaration()
class Move(VersionedLocus):
name = _Localized(str)
type = _Value(Type)
power = _Value(int)
pp = _Value(int)
accuracy = _Value(int)
effect = _Value(MoveEffect)
# ------------------------------------------------------------------------------
# The repository class, primary interface to the data