Add an experimental lazy CompressedStream that actually seems to work

Unfortunately it may not make any practical difference, since a
compressed PC file still needs to be fully decompressed just to check
the magic numbers of its subfiles.  May revert this later, I dunno.
This commit is contained in:
Eevee (Lexy Munroe) 2017-01-26 16:26:50 -08:00
parent ac19f95d5c
commit 2631d36963
2 changed files with 153 additions and 40 deletions

View file

@ -0,0 +1,142 @@
"""Substreams that can handle compressed data."""
import struct
from .base import Substream
class DecompressionError(ValueError):
pass
class CompressedStream:
def __init__(self, stream):
self.stream = stream
self.data = bytearray()
self.pos = 0
self._read_header()
def __len__(self):
return self.length
def read(self, n=-1):
maxread = self.length - self.pos
if n < 0 or 0 <= maxread < n:
n = maxread
self._ensure_bytes(self.pos + n)
data = self.data[self.pos:self.pos + n]
self.pos += n
return data
def seek(self, offset, whence=0):
if whence == 1:
offset += self.pos
elif whence == 2:
offset += self.length
offset = max(offset, 0)
if self.length >= 0:
offset = min(offset, self.length)
self.pos = offset
def tell(self):
return self.pos
def peek(self, n):
pos = self.tell()
maxread = self.length - self.pos
data = self.read(min(maxread, n))
self.seek(pos)
return data
def unpack(self, fmt):
"""Unpacks a struct format from the current position in the stream."""
data = self.read(struct.calcsize(fmt))
return struct.unpack(fmt, data)
def slice(self, offset, length=-1):
# TODO limit or warn if length is too long for this slice?
raise RuntimeError
return Substream(self, offset, length)
def _bits(byte):
return ((byte >> 7) & 1,
(byte >> 6) & 1,
(byte >> 5) & 1,
(byte >> 4) & 1,
(byte >> 3) & 1,
(byte >> 2) & 1,
(byte >> 1) & 1,
(byte) & 1)
class LZSS11CompressedStream(CompressedStream):
def _read_header(self):
header = self.stream.read(4)
self.compressed_pos = self.stream.tell()
assert header[0] == 0x11
self.length, = struct.unpack('<L', header[1:] + b'\x00')
def _ensure_bytes(self, needed):
self.stream.seek(self.compressed_pos)
def writebyte(b):
self.data.append(b)
def readbyte():
return self.stream.read(1)[0]
def copybyte():
writebyte(readbyte())
while len(self.data) < needed:
byte = self.stream.read(1)
if not byte:
break
b, = byte
flags = _bits(b)
for flag in flags:
if flag == 0:
copybyte()
elif flag == 1:
b = readbyte()
indicator = b >> 4
if indicator == 0:
# 8 bit count, 12 bit disp
# indicator is 0, don't need to mask b
count = (b << 4)
b = readbyte()
count += b >> 4
count += 0x11
elif indicator == 1:
# 16 bit count, 12 bit disp
count = ((b & 0xf) << 12) + (readbyte() << 4)
b = readbyte()
count += b >> 4
count += 0x111
else:
# indicator is count (4 bits), 12 bit disp
count = indicator
count += 1
disp = ((b & 0xf) << 8) + readbyte()
disp += 1
try:
for _ in range(count):
writebyte(self.data[-disp])
except IndexError:
# FIXME `it` no longer exists, need len of substream
raise DecompressionError(count, disp, len(self.data), len(self.stream), self.compressed_pos, self.stream.tell())
else:
raise DecompressionError(flag)
if needed <= len(self.data):
break
self.compressed_pos = self.stream.tell()
# FIXME check this once we hit eof
#if len(self.data) != decompressed_size:
# raise DecompressionError(
# "decompressed size does not match the expected size")

View file

@ -94,7 +94,17 @@ class GARCEntry(object):
def __getitem__(self, i):
start, length = self.slices[i]
ss = self.stream.slice(start, length)
if ss.peek(1) in b'\x10\x11':
peek = ss.peek(1)
if peek == b'\x11':
from .compressed import DecompressionError, LZSS11CompressedStream
decompressor = LZSS11CompressedStream(ss)
try:
decompressor.peek(256)
except DecompressionError:
return ss
else:
return decompressor
elif ss.peek(1) in b'\x10\x11':
# XXX this sucks but there's no real way to know for sure whether
# data is compressed or not. maybe just bake this into the caller
# and let them deal with it, same way we do with text decoding?
@ -116,45 +126,6 @@ class GARCEntry(object):
return len(self.slices)
class CompressedStream:
def __init__(self, stream):
self.stream = stream
header = stream.read(4)
stream.seek(0)
assert header[0] in b'\x10\x11'
self.length, = struct.unpack('<L', header[1:] + b'\x00')
self.data = None
def __len__(self):
return self.length
def _decompress(self):
self.data = BytesIO(lzss3.decompress_bytes(self.stream.read()))
def read(self, *args):
if self.data is None:
self._decompress()
return self.data.read(*args)
def seek(self, *args):
if self.data is None:
self._decompress()
return self.data.seek(*args)
def tell(self, *args):
if self.data is None:
self._decompress()
return self.data.tell(*args)
def peek(self, n):
if self.data is None:
self._decompress()
here = self.data.tell()
ret = self.data.read(n)
self.data.seek(here)
return ret
XY_CHAR_MAP = {