mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Add an experimental lazy CompressedStream that actually seems to work
Unfortunately it may not make any practical difference, since a compressed PC file still needs to be fully decompressed just to check the magic numbers of its subfiles. May revert this later, I dunno.
This commit is contained in:
parent
ac19f95d5c
commit
2631d36963
2 changed files with 153 additions and 40 deletions
142
pokedex/extract/lib/compressed.py
Normal file
142
pokedex/extract/lib/compressed.py
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
"""Substreams that can handle compressed data."""
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from .base import Substream
|
||||||
|
|
||||||
|
|
||||||
|
class DecompressionError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CompressedStream:
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
self.data = bytearray()
|
||||||
|
self.pos = 0
|
||||||
|
self._read_header()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.length
|
||||||
|
|
||||||
|
def read(self, n=-1):
|
||||||
|
maxread = self.length - self.pos
|
||||||
|
if n < 0 or 0 <= maxread < n:
|
||||||
|
n = maxread
|
||||||
|
self._ensure_bytes(self.pos + n)
|
||||||
|
data = self.data[self.pos:self.pos + n]
|
||||||
|
self.pos += n
|
||||||
|
return data
|
||||||
|
|
||||||
|
def seek(self, offset, whence=0):
|
||||||
|
if whence == 1:
|
||||||
|
offset += self.pos
|
||||||
|
elif whence == 2:
|
||||||
|
offset += self.length
|
||||||
|
offset = max(offset, 0)
|
||||||
|
if self.length >= 0:
|
||||||
|
offset = min(offset, self.length)
|
||||||
|
self.pos = offset
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
return self.pos
|
||||||
|
|
||||||
|
def peek(self, n):
|
||||||
|
pos = self.tell()
|
||||||
|
maxread = self.length - self.pos
|
||||||
|
data = self.read(min(maxread, n))
|
||||||
|
self.seek(pos)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def unpack(self, fmt):
|
||||||
|
"""Unpacks a struct format from the current position in the stream."""
|
||||||
|
data = self.read(struct.calcsize(fmt))
|
||||||
|
return struct.unpack(fmt, data)
|
||||||
|
|
||||||
|
def slice(self, offset, length=-1):
|
||||||
|
# TODO limit or warn if length is too long for this slice?
|
||||||
|
raise RuntimeError
|
||||||
|
return Substream(self, offset, length)
|
||||||
|
|
||||||
|
|
||||||
|
def _bits(byte):
|
||||||
|
return ((byte >> 7) & 1,
|
||||||
|
(byte >> 6) & 1,
|
||||||
|
(byte >> 5) & 1,
|
||||||
|
(byte >> 4) & 1,
|
||||||
|
(byte >> 3) & 1,
|
||||||
|
(byte >> 2) & 1,
|
||||||
|
(byte >> 1) & 1,
|
||||||
|
(byte) & 1)
|
||||||
|
|
||||||
|
|
||||||
|
class LZSS11CompressedStream(CompressedStream):
|
||||||
|
def _read_header(self):
|
||||||
|
header = self.stream.read(4)
|
||||||
|
self.compressed_pos = self.stream.tell()
|
||||||
|
assert header[0] == 0x11
|
||||||
|
self.length, = struct.unpack('<L', header[1:] + b'\x00')
|
||||||
|
|
||||||
|
def _ensure_bytes(self, needed):
|
||||||
|
self.stream.seek(self.compressed_pos)
|
||||||
|
|
||||||
|
def writebyte(b):
|
||||||
|
self.data.append(b)
|
||||||
|
|
||||||
|
def readbyte():
|
||||||
|
return self.stream.read(1)[0]
|
||||||
|
|
||||||
|
def copybyte():
|
||||||
|
writebyte(readbyte())
|
||||||
|
|
||||||
|
while len(self.data) < needed:
|
||||||
|
byte = self.stream.read(1)
|
||||||
|
if not byte:
|
||||||
|
break
|
||||||
|
b, = byte
|
||||||
|
flags = _bits(b)
|
||||||
|
for flag in flags:
|
||||||
|
if flag == 0:
|
||||||
|
copybyte()
|
||||||
|
elif flag == 1:
|
||||||
|
b = readbyte()
|
||||||
|
indicator = b >> 4
|
||||||
|
|
||||||
|
if indicator == 0:
|
||||||
|
# 8 bit count, 12 bit disp
|
||||||
|
# indicator is 0, don't need to mask b
|
||||||
|
count = (b << 4)
|
||||||
|
b = readbyte()
|
||||||
|
count += b >> 4
|
||||||
|
count += 0x11
|
||||||
|
elif indicator == 1:
|
||||||
|
# 16 bit count, 12 bit disp
|
||||||
|
count = ((b & 0xf) << 12) + (readbyte() << 4)
|
||||||
|
b = readbyte()
|
||||||
|
count += b >> 4
|
||||||
|
count += 0x111
|
||||||
|
else:
|
||||||
|
# indicator is count (4 bits), 12 bit disp
|
||||||
|
count = indicator
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
disp = ((b & 0xf) << 8) + readbyte()
|
||||||
|
disp += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
for _ in range(count):
|
||||||
|
writebyte(self.data[-disp])
|
||||||
|
except IndexError:
|
||||||
|
# FIXME `it` no longer exists, need len of substream
|
||||||
|
raise DecompressionError(count, disp, len(self.data), len(self.stream), self.compressed_pos, self.stream.tell())
|
||||||
|
else:
|
||||||
|
raise DecompressionError(flag)
|
||||||
|
|
||||||
|
if needed <= len(self.data):
|
||||||
|
break
|
||||||
|
|
||||||
|
self.compressed_pos = self.stream.tell()
|
||||||
|
|
||||||
|
# FIXME check this once we hit eof
|
||||||
|
#if len(self.data) != decompressed_size:
|
||||||
|
# raise DecompressionError(
|
||||||
|
# "decompressed size does not match the expected size")
|
|
@ -94,7 +94,17 @@ class GARCEntry(object):
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
start, length = self.slices[i]
|
start, length = self.slices[i]
|
||||||
ss = self.stream.slice(start, length)
|
ss = self.stream.slice(start, length)
|
||||||
if ss.peek(1) in b'\x10\x11':
|
peek = ss.peek(1)
|
||||||
|
if peek == b'\x11':
|
||||||
|
from .compressed import DecompressionError, LZSS11CompressedStream
|
||||||
|
decompressor = LZSS11CompressedStream(ss)
|
||||||
|
try:
|
||||||
|
decompressor.peek(256)
|
||||||
|
except DecompressionError:
|
||||||
|
return ss
|
||||||
|
else:
|
||||||
|
return decompressor
|
||||||
|
elif ss.peek(1) in b'\x10\x11':
|
||||||
# XXX this sucks but there's no real way to know for sure whether
|
# XXX this sucks but there's no real way to know for sure whether
|
||||||
# data is compressed or not. maybe just bake this into the caller
|
# data is compressed or not. maybe just bake this into the caller
|
||||||
# and let them deal with it, same way we do with text decoding?
|
# and let them deal with it, same way we do with text decoding?
|
||||||
|
@ -116,45 +126,6 @@ class GARCEntry(object):
|
||||||
return len(self.slices)
|
return len(self.slices)
|
||||||
|
|
||||||
|
|
||||||
class CompressedStream:
|
|
||||||
def __init__(self, stream):
|
|
||||||
self.stream = stream
|
|
||||||
header = stream.read(4)
|
|
||||||
stream.seek(0)
|
|
||||||
assert header[0] in b'\x10\x11'
|
|
||||||
self.length, = struct.unpack('<L', header[1:] + b'\x00')
|
|
||||||
self.data = None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return self.length
|
|
||||||
|
|
||||||
def _decompress(self):
|
|
||||||
self.data = BytesIO(lzss3.decompress_bytes(self.stream.read()))
|
|
||||||
|
|
||||||
def read(self, *args):
|
|
||||||
if self.data is None:
|
|
||||||
self._decompress()
|
|
||||||
return self.data.read(*args)
|
|
||||||
|
|
||||||
def seek(self, *args):
|
|
||||||
if self.data is None:
|
|
||||||
self._decompress()
|
|
||||||
return self.data.seek(*args)
|
|
||||||
|
|
||||||
def tell(self, *args):
|
|
||||||
if self.data is None:
|
|
||||||
self._decompress()
|
|
||||||
return self.data.tell(*args)
|
|
||||||
|
|
||||||
def peek(self, n):
|
|
||||||
if self.data is None:
|
|
||||||
self._decompress()
|
|
||||||
here = self.data.tell()
|
|
||||||
ret = self.data.read(n)
|
|
||||||
self.data.seek(here)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
XY_CHAR_MAP = {
|
XY_CHAR_MAP = {
|
||||||
|
|
Loading…
Reference in a new issue