veekun_pokedex/pokedex/extract/lib/clim.py
Eevee (Lexy Munroe) 053f2a8d22 Use YAML schema for gen 6/7; add gen7 form names; improved image support
Specifically:

- Add support for detecting FLIM format

- Add support for more color formats

- Add a small decoded image type that knows how to write itself out as
  a PNG

- Improve ETC1 decoder to work with images whose dimensions are not
  powers of two, images with no alpha channel, and images with the
  strange FLIM pixel order

- Port the gen 6/7 extractor to Construct 2.8

- Switch to using script tags in language names, to distinguish Japanese
  kana from kanji and Simplified from Traditional Chinese

- Drop the load-time merging of kanji and kana

- Add paths to various text files in SUMO

- Add form names for SUMO Pokémon

- Clean up identifiers a bit, especially the distinction between species
  and Pokémon

- Use the Pokémon schema type to dump what we have so far, and give it a
  couple more fields that didn't exist in gen 1

- Get movesets dumping correctly

- Special-case a bunch of weirdness, where the number of dex sprites
  doesn't match the number of models in SUMO
2017-01-05 04:57:05 -08:00

441 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import io
import itertools
import math
import struct
import attr
import construct as c
clim_header_struct = c.Struct(
c.Const(b'FLIM'), # TODO 'FLIM' in SUMO
'endianness' / c.Const(c.Int16ul, 0xfeff),
'header_length' / c.Const(c.Int16ul, 0x14),
'version' / c.Int32ul,
'file_size' / c.Int32ul,
'blocks_ct' / c.Int32ul,
)
imag_header_struct = c.Struct(
c.Const(b'imag'),
'section_length' / c.Const(c.Int32ul, 0x10),
'width' / c.Int16ul,
'height' / c.Int16ul,
#'format' / c.Int32ul,
# TODO this seems to have been expanded into several things in SUMO
#c.Enum(
# c.ULInt32('format'),
# L8=0,
# A8=1,
# LA4=2,
# LA8=3,
# HILO8=4,
# RGB565=5,
# RGB8=6,
# RGBA5551=7,
# RGBA4=8,
# RGBA8=9,
# ETC1=10,
# ETC1A4=11,
# L4=12,
# A4=13,
# #ETC1=19,
#)
'unknown' / c.Int16ul,
'format' / c.Enum(
c.Int8ul,
L8=0,
A8=1,
LA4=2,
LA8=3,
HILO8=4,
RGB565=5,
RGB8=6,
RGBA5551=7,
RGBA4=8,
RGBA8=9,
ETC1=10,
ETC1A4=11,
L4=12,
A4=13,
#ETC1=19,
),
# RGB565=5,
# ETC1A4=11,
'unknown2' / c.Int8ul,
)
# TODO probably move these to their own module, since they aren't just for
# CLIM. pixel deshuffler, too. (which should probably spit out pypng's native
# format)
COLOR_FORMATS = {}
@attr.s
class ColorFormat:
name = attr.ib('name')
decoder = attr.ib('decoder')
bits_per_pixel = attr.ib('bits_per_pixel')
bit_depth = attr.ib('bit_depth')
alpha = attr.ib('alpha')
def __call__(self, data):
return self.decoder(data)
def __iter__(self):
# TODO back compat until i fix the below code
return iter((self, self.bits_per_pixel, self.bit_depth))
def _register_color_decoder(name, *, bpp, depth, alpha):
def register(f):
COLOR_FORMATS[name] = ColorFormat(name, f, bpp, depth, alpha)
return f
return register
@_register_color_decoder('A4', bpp=0.5, depth=4, alpha=True)
def decode_A4(data):
for a in data:
a0 = a & 0xf
a0 = (a0 << 4) | (a0 << 0)
a1 = a >> 4
a1 = (a1 << 4) | (a1 << 0)
yield 0, 0, 0, a0
yield 0, 0, 0, a1
@_register_color_decoder('A8', bpp=1, depth=8, alpha=True)
def decode_a8(data):
for a in data:
yield 0, 0, 0, a
@_register_color_decoder('L4', bpp=0.5, depth=4, alpha=False)
def decode_l4(data):
for l in data:
l0 = l & 0xf
l0 = (l0 << 4) | (l0 << 0)
l1 = l >> 4
l1 = (l1 << 4) | (l1 << 0)
yield l0, l0, l0
yield l1, l1, l1
@_register_color_decoder('L8', bpp=1, depth=8, alpha=False)
def decode_l8(data):
for l in data:
yield l, l, l
@_register_color_decoder('LA4', bpp=1, depth=4, alpha=True)
def decode_la4(data):
for la in data:
l = la >> 4
l = (l << 4) | (l << 0)
a = (la >> 0) & 0xf
a = (a << 4) | (a << 4)
yield l, l, l, a
@_register_color_decoder('LA8', bpp=2, depth=8, alpha=True)
def decode_la8(data):
for i in range(0, len(data), 2):
a = data[i]
l = data[i + 1]
yield l, l, l, a
@_register_color_decoder('RGBA4', bpp=2, depth=4, alpha=True)
def decode_rgba4(data):
# The idea is that every uint16 is a packed rrrrggggbbbbaaaa, but when
# written out little-endian this becomes bbbbaaaarrrrgggg and there's just
# no pretty way to deal with this
for i in range(0, len(data), 2):
ba = data[i]
rg = data[i + 1]
r = (((rg & 0xf0) >> 4) * 255 + 7) // 15
g = (((rg & 0x0f) >> 0) * 255 + 7) // 15
b = (((ba & 0xf0) >> 4) * 255 + 7) // 15
a = (((ba & 0x0f) >> 0) * 255 + 7) // 15
yield r, g, b, a
@_register_color_decoder('RGB8', bpp=3, depth=8, alpha=False)
def decode_rgb8(data):
for i in range(0, len(data), 3):
yield data[i:i + 3][::-1]
@_register_color_decoder('RGBA8', bpp=4, depth=8, alpha=True)
def decode_rgba8(data):
for i in range(0, len(data), 4):
yield data[i:i + 4][::-1]
# FIXME turns out the above just are these, so, ditch these
@_register_color_decoder('BGR8', bpp=3, depth=8, alpha=False)
def decode_bgr8(data):
for i in range(0, len(data), 3):
yield data[i:i + 3][::-1]
@_register_color_decoder('ABGR8', bpp=4, depth=8, alpha=True)
def decode_abgr8(data):
for i in range(0, len(data), 4):
yield data[i:i + 4][::-1]
@_register_color_decoder('RGBA5551', bpp=2, depth=5, alpha=True)
def decode_rgba5551(data, *, start=0, count=None):
# I am extremely irritated that construct cannot parse this mess for me
# rrrrrgggggbbbbba
if count is None:
end = len(data)
else:
end = start + count * 2
for i in range(start, end, 2):
datum = data[i] + data[i + 1] * 256
# FIXME repeat rather than doing division
r = (((datum >> 11) & 0x1f) * 255 + 15) // 31
g = (((datum >> 6) & 0x1f) * 255 + 15) // 31
b = (((datum >> 1) & 0x1f) * 255 + 15) // 31
a = (datum & 0x1) * 255
yield r, g, b, a
@_register_color_decoder('RGB565', bpp=2, depth=5, alpha=False)
def decode_rgb565(data, *, start=0, count=None):
# FIXME i bet construct totally /can/ parse this mess for me
if count is None:
end = len(data)
else:
end = start + count * 2
for i in range(start, end, 2):
datum = data[i] + data[i + 1] * 256
# FIXME repeat rather than doing division
r = (((datum >> 11) & 0x1f) * 255 + 15) // 31
g = (((datum >> 5) & 0x3f) * 255 + 31) // 63
b = (((datum >> 0) & 0x1f) * 255 + 15) // 31
yield r, g, b
@_register_color_decoder('RGB332', bpp=1, depth=2, alpha=False)
def decode_rgb332(data, *, start=0, count=None):
if count is None:
end = len(data)
else:
end = start + count
for i in range(start, end):
datum = data[i]
r = (datum >> 5) & 0x7
r = (r << 5) | (r << 2) | (r >> 1)
g = (datum >> 2) & 0x7
g = (g << 5) | (g << 2) | (g >> 1)
b = (datum >> 0) & 0x7
b = (b << 5) | (b << 2) | (b >> 1)
yield r, g, b
_register_color_decoder('ETC1', bpp=0.5, depth=4, alpha=False)(None)
_register_color_decoder('ETC1A4', bpp=1, depth=4, alpha=True)(None)
del _register_color_decoder
def uncuddle_paletted_pixels(palette, data):
if len(palette) <= 16:
# Short palettes allow cramming two pixels into each byte
return (
idx
for byte in data
for idx in (byte >> 4, byte & 0x0f)
)
else:
return data
def untile_pixels(raw_pixels, width, height, *, is_flim):
"""Unscramble pixels into plain old rows.
The pixels are arranged in 8×8 tiles, and each tile is a third-
iteration Z-order curve.
Taken from: https://github.com/Zhorken/pokemon-x-y-icons/
"""
# FIXME this is a wild guess, because i've seen a 4x4 image that this just
# doesn't handle correctly, but the image is all white so i have no idea
# what the right fix is -- there's a 4 x 0x78 in 0/7/9 though...
if width < 8 or height < 8:
pixels = []
it = iter(raw_pixels)
for r in range(height):
pixels.append([])
for c in range(width):
pixels[-1].append(next(it))
return pixels
# Images are stored padded to powers of two
stored_width = 2 ** math.ceil(math.log(width) / math.log(2))
stored_height = 2 ** math.ceil(math.log(height) / math.log(2))
num_pixels = stored_width * stored_height
tile_width = (stored_width + 7) // 8
tile_height = (stored_height + 7) // 8
pixels = [
[None for x in range(width)]
for y in range(height)
]
for n, pixel in enumerate(raw_pixels):
if n >= num_pixels:
break
# Find the coordinates of the top-left corner of the current tile.
# n.b. The image is eight tiles wide, and each tile is 8×8 pixels.
tile_num = n // 64
# FIXME i found a 4x4 FLIM that this fails for???
if is_flim:
# The FLIM format seems to pseudo-rotate the entire image to the
# right, so tiles start in the bottom left and go up
tile_y = (tile_height - 1 - (tile_num % tile_height)) * 8
tile_x = tile_num // tile_height * 8
else:
# CLIM has the more conventional right-then-down order
tile_y = tile_num // tile_width * 8
tile_x = tile_num % tile_width * 8
# Determine the pixel's coordinates within the tile
# http://en.wikipedia.org/wiki/Z-order_curve#Coordinate_values
within_tile = n % 64
sub_x = (
(within_tile & 0b000001) |
(within_tile & 0b000100) >> 1 |
(within_tile & 0b010000) >> 2
)
sub_y = (
(within_tile & 0b000010) >> 1 |
(within_tile & 0b001000) >> 2 |
(within_tile & 0b100000) >> 3
)
if is_flim:
# Individual tiles are also rotated. Unrotate them
sub_x, sub_y = sub_y, 7 - sub_x
# Add up the pixel's coordinates within the whole image
x = tile_x + sub_x
y = tile_y + sub_y
if x < width and y < height:
pixels[y][x] = pixel
return pixels
def decode_clim(data):
file_format = data[-40:-36]
if file_format == b'CLIM':
is_flim = False
elif file_format == b'FLIM':
is_flim = True
else:
raise ValueError("Unknown image format {}".format(file_format))
imag_header = imag_header_struct.parse(data[-20:])
#if is_flim:
# # TODO SUMO hack; not sure how to get format out of this header
# imag_header.format = 'RGBA5551'
if imag_header.format not in COLOR_FORMATS:
raise ValueError(
"don't know how to decode {} pixels".format(imag_header.format))
color_format = COLOR_FORMATS[imag_header.format]
mode, = struct.unpack_from('<H', data, 0)
if mode == 2:
# Paletted
palette_length, = struct.unpack_from('<H', data, 2)
palette = list(color_format.decoder(data, start=4, count=palette_length))
data_start = 4 + palette_length * color_format.bits_per_pixel
scrambled_pixels = uncuddle_paletted_pixels(palette, data[data_start:])
elif imag_header.format == 'ETC1':
# FIXME merge this decoder in (problem is it needs to know width +
# height -- maybe i can move the pixel unscrambling out of it somehow?)
from .etc1 import decode_etc1
pixels = decode_etc1(b'\x00' * 0x80 + data, imag_header.width, imag_header.height, use_alpha=False, is_flim=True)[4]
return DecodedImageData(
imag_header.width, imag_header.height, color_format, None, pixels)
elif imag_header.format == 'ETC1A4':
# FIXME same
from .etc1 import decode_etc1
pixels = decode_etc1(b'\x00' * 0x80 + data, imag_header.width, imag_header.height, is_flim=True)[4]
return DecodedImageData(
imag_header.width, imag_header.height, color_format, None, pixels)
else:
palette = None
scrambled_pixels = color_format.decoder(data)
pixels = untile_pixels(
scrambled_pixels,
imag_header.width,
imag_header.height,
is_flim=is_flim,
)
return DecodedImageData(
imag_header.width, imag_header.height, color_format, palette, pixels)
class DecodedImageData:
def __init__(self, width, height, color_format, palette, pixels):
self.width = width
self.height = height
self.color_format = color_format
self.palette = palette
self.pixels = pixels
def __iter__(self):
return iter((self.width, self.height, self.color_format.bit_depth, self.palette, self.pixels))
def mirror(self):
for row in self.pixels:
row.reverse()
def write_to_png(self, f):
"""Write the results of ``decode_clim`` to a file object."""
import png
writer_kwargs = dict(width=self.width, height=self.height)
if self.palette:
writer_kwargs['palette'] = self.palette
if self.color_format.alpha:
# TODO do i really only need alpha=True if there's no palette?
writer_kwargs['alpha'] = True
writer = png.Writer(**writer_kwargs)
# For a paletted image, I want to preserve Zhorken's good idea of
# indicating the original bit depth with an sBIT chunk. But PyPNG can't do
# that directly, so instead I have to do some nonsense.
# FIXME should probably just do that for everything?
if self.palette:
buf = io.BytesIO()
writer.write(buf, self.pixels)
# Read the PNG as chunks, and manually add an sBIT chunk
buf.seek(0)
png_reader = png.Reader(buf)
chunks = list(png_reader.chunks())
sbit = bytes([self.color_format.bit_depth] * 3)
chunks.insert(1, ('sBIT', sbit))
# Now write the chunks to the file
png.write_chunks(f, chunks)
else:
# Otherwise, it's... almost straightforward.
writer.write(f, (itertools.chain(*row) for row in self.pixels))