mirror of
https://github.com/veekun/pokedex.git
synced 2024-08-20 18:16:34 +00:00
Add an experimental lazy CompressedStream that actually seems to work
Unfortunately it may not make any practical difference, since a compressed PC file still needs to be fully decompressed just to check the magic numbers of its subfiles. May revert this later, I dunno.
This commit is contained in:
parent
ac19f95d5c
commit
2631d36963
2 changed files with 153 additions and 40 deletions
142
pokedex/extract/lib/compressed.py
Normal file
142
pokedex/extract/lib/compressed.py
Normal file
|
@ -0,0 +1,142 @@
|
|||
"""Substreams that can handle compressed data."""
|
||||
import struct
|
||||
|
||||
from .base import Substream
|
||||
|
||||
|
||||
class DecompressionError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class CompressedStream:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
self.data = bytearray()
|
||||
self.pos = 0
|
||||
self._read_header()
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def read(self, n=-1):
|
||||
maxread = self.length - self.pos
|
||||
if n < 0 or 0 <= maxread < n:
|
||||
n = maxread
|
||||
self._ensure_bytes(self.pos + n)
|
||||
data = self.data[self.pos:self.pos + n]
|
||||
self.pos += n
|
||||
return data
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
if whence == 1:
|
||||
offset += self.pos
|
||||
elif whence == 2:
|
||||
offset += self.length
|
||||
offset = max(offset, 0)
|
||||
if self.length >= 0:
|
||||
offset = min(offset, self.length)
|
||||
self.pos = offset
|
||||
|
||||
def tell(self):
|
||||
return self.pos
|
||||
|
||||
def peek(self, n):
|
||||
pos = self.tell()
|
||||
maxread = self.length - self.pos
|
||||
data = self.read(min(maxread, n))
|
||||
self.seek(pos)
|
||||
return data
|
||||
|
||||
def unpack(self, fmt):
|
||||
"""Unpacks a struct format from the current position in the stream."""
|
||||
data = self.read(struct.calcsize(fmt))
|
||||
return struct.unpack(fmt, data)
|
||||
|
||||
def slice(self, offset, length=-1):
|
||||
# TODO limit or warn if length is too long for this slice?
|
||||
raise RuntimeError
|
||||
return Substream(self, offset, length)
|
||||
|
||||
|
||||
def _bits(byte):
|
||||
return ((byte >> 7) & 1,
|
||||
(byte >> 6) & 1,
|
||||
(byte >> 5) & 1,
|
||||
(byte >> 4) & 1,
|
||||
(byte >> 3) & 1,
|
||||
(byte >> 2) & 1,
|
||||
(byte >> 1) & 1,
|
||||
(byte) & 1)
|
||||
|
||||
|
||||
class LZSS11CompressedStream(CompressedStream):
|
||||
def _read_header(self):
|
||||
header = self.stream.read(4)
|
||||
self.compressed_pos = self.stream.tell()
|
||||
assert header[0] == 0x11
|
||||
self.length, = struct.unpack('<L', header[1:] + b'\x00')
|
||||
|
||||
def _ensure_bytes(self, needed):
|
||||
self.stream.seek(self.compressed_pos)
|
||||
|
||||
def writebyte(b):
|
||||
self.data.append(b)
|
||||
|
||||
def readbyte():
|
||||
return self.stream.read(1)[0]
|
||||
|
||||
def copybyte():
|
||||
writebyte(readbyte())
|
||||
|
||||
while len(self.data) < needed:
|
||||
byte = self.stream.read(1)
|
||||
if not byte:
|
||||
break
|
||||
b, = byte
|
||||
flags = _bits(b)
|
||||
for flag in flags:
|
||||
if flag == 0:
|
||||
copybyte()
|
||||
elif flag == 1:
|
||||
b = readbyte()
|
||||
indicator = b >> 4
|
||||
|
||||
if indicator == 0:
|
||||
# 8 bit count, 12 bit disp
|
||||
# indicator is 0, don't need to mask b
|
||||
count = (b << 4)
|
||||
b = readbyte()
|
||||
count += b >> 4
|
||||
count += 0x11
|
||||
elif indicator == 1:
|
||||
# 16 bit count, 12 bit disp
|
||||
count = ((b & 0xf) << 12) + (readbyte() << 4)
|
||||
b = readbyte()
|
||||
count += b >> 4
|
||||
count += 0x111
|
||||
else:
|
||||
# indicator is count (4 bits), 12 bit disp
|
||||
count = indicator
|
||||
count += 1
|
||||
|
||||
disp = ((b & 0xf) << 8) + readbyte()
|
||||
disp += 1
|
||||
|
||||
try:
|
||||
for _ in range(count):
|
||||
writebyte(self.data[-disp])
|
||||
except IndexError:
|
||||
# FIXME `it` no longer exists, need len of substream
|
||||
raise DecompressionError(count, disp, len(self.data), len(self.stream), self.compressed_pos, self.stream.tell())
|
||||
else:
|
||||
raise DecompressionError(flag)
|
||||
|
||||
if needed <= len(self.data):
|
||||
break
|
||||
|
||||
self.compressed_pos = self.stream.tell()
|
||||
|
||||
# FIXME check this once we hit eof
|
||||
#if len(self.data) != decompressed_size:
|
||||
# raise DecompressionError(
|
||||
# "decompressed size does not match the expected size")
|
|
@ -94,7 +94,17 @@ class GARCEntry(object):
|
|||
def __getitem__(self, i):
|
||||
start, length = self.slices[i]
|
||||
ss = self.stream.slice(start, length)
|
||||
if ss.peek(1) in b'\x10\x11':
|
||||
peek = ss.peek(1)
|
||||
if peek == b'\x11':
|
||||
from .compressed import DecompressionError, LZSS11CompressedStream
|
||||
decompressor = LZSS11CompressedStream(ss)
|
||||
try:
|
||||
decompressor.peek(256)
|
||||
except DecompressionError:
|
||||
return ss
|
||||
else:
|
||||
return decompressor
|
||||
elif ss.peek(1) in b'\x10\x11':
|
||||
# XXX this sucks but there's no real way to know for sure whether
|
||||
# data is compressed or not. maybe just bake this into the caller
|
||||
# and let them deal with it, same way we do with text decoding?
|
||||
|
@ -116,45 +126,6 @@ class GARCEntry(object):
|
|||
return len(self.slices)
|
||||
|
||||
|
||||
class CompressedStream:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
header = stream.read(4)
|
||||
stream.seek(0)
|
||||
assert header[0] in b'\x10\x11'
|
||||
self.length, = struct.unpack('<L', header[1:] + b'\x00')
|
||||
self.data = None
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def _decompress(self):
|
||||
self.data = BytesIO(lzss3.decompress_bytes(self.stream.read()))
|
||||
|
||||
def read(self, *args):
|
||||
if self.data is None:
|
||||
self._decompress()
|
||||
return self.data.read(*args)
|
||||
|
||||
def seek(self, *args):
|
||||
if self.data is None:
|
||||
self._decompress()
|
||||
return self.data.seek(*args)
|
||||
|
||||
def tell(self, *args):
|
||||
if self.data is None:
|
||||
self._decompress()
|
||||
return self.data.tell(*args)
|
||||
|
||||
def peek(self, n):
|
||||
if self.data is None:
|
||||
self._decompress()
|
||||
here = self.data.tell()
|
||||
ret = self.data.read(n)
|
||||
self.data.seek(here)
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
|
||||
XY_CHAR_MAP = {
|
||||
|
|
Loading…
Reference in a new issue