webmetro/src/lib.rs

285 lines
8.6 KiB
Rust
Raw Normal View History

2017-01-10 03:40:21 +00:00
extern crate futures;
2017-01-12 05:41:35 +00:00
#[derive(Debug, PartialEq)]
pub enum Error {
2017-01-12 13:44:48 +00:00
CorruptVarint,
UnknownElementId,
UnknownElementLength,
CorruptPayload,
2017-01-12 05:41:35 +00:00
}
#[derive(Debug, PartialEq)]
2017-01-12 12:37:29 +00:00
pub enum Varint {
2017-01-12 12:55:49 +00:00
/// a numeric value
2017-01-12 05:41:35 +00:00
Value(u64),
2017-01-12 12:55:49 +00:00
/// the reserved "unknown" value
2017-01-12 05:41:35 +00:00
Unknown
}
2017-01-12 12:59:09 +00:00
/// Try to parse an EBML varint at the start of the given slice.
2017-01-12 05:41:35 +00:00
/// Returns an Err() if the format is corrupt.
/// Returns Ok(None) if more bytes are needed to get a result.
2017-01-16 06:39:25 +00:00
/// Returns Ok(Some((varint, size))) to return a varint value and
2017-01-12 12:55:49 +00:00
/// the size of the parsed varint.
2017-01-12 12:37:29 +00:00
pub fn decode_varint(bytes: &[u8]) -> Result<Option<(Varint, usize)>, Error> {
2017-01-12 05:41:35 +00:00
let mut value: u64 = 0;
let mut value_length = 1;
let mut mask: u8 = 0x80;
let mut unknown_marker: u64 = !0;
if bytes.len() == 0 {
return Ok(None)
}
// get length marker bit from first byte & parse first byte
while mask > 0 {
if (mask & bytes[0]) != 0 {
value = (bytes[0] & !mask) as u64;
unknown_marker = (mask - 1) as u64;
break
}
value_length += 1;
mask = mask >> 1;
}
if mask == 0 {
return Err(Error::CorruptVarint)
}
// check we have enough data to parse
if value_length > bytes.len() {
return Ok(None)
}
// decode remaining bytes
for i in 1..value_length {
value = (value << 8) + (bytes[i] as u64);
unknown_marker = (unknown_marker << 8) + 0xFF;
}
// determine result
if value == unknown_marker {
2017-01-12 12:37:29 +00:00
Ok(Some((Varint::Unknown, value_length)))
2017-01-12 05:41:35 +00:00
} else {
2017-01-12 12:37:29 +00:00
Ok(Some((Varint::Value(value), value_length)))
2017-01-12 05:41:35 +00:00
}
}
2017-01-12 13:44:48 +00:00
/// Try to parse an EBML element header at the start of the given slice.
/// Returns an Err() if the format is corrupt.
/// Returns Ok(None) if more bytes are needed to get a result.
2017-01-16 06:39:25 +00:00
/// Returns Ok(Some((id, varint, size))) to return the element id,
/// the size of the payload, and the size of the parsed header.
2017-01-12 13:44:48 +00:00
pub fn decode_tag(bytes: &[u8]) -> Result<Option<(u64, Varint, usize)>, Error> {
// parse element ID
match decode_varint(bytes) {
Ok(None) => Ok(None),
Err(err) => Err(err),
Ok(Some((Varint::Unknown, _))) => Err(Error::UnknownElementId),
Ok(Some((Varint::Value(element_id), id_size))) => {
// parse payload size
match decode_varint(&bytes[id_size..]) {
Ok(None) => Ok(None),
Err(err) => Err(err),
Ok(Some((element_length, length_size))) =>
Ok(Some((
element_id,
element_length,
id_size + length_size
)))
}
}
}
}
pub trait Schema<'a> {
2017-01-24 08:20:49 +00:00
type Element: 'a;
fn should_unwrap(&self, element_id: u64) -> bool;
fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result<Self::Element, Error>;
fn decode_element<'b: 'a>(&self, bytes: &'b[u8]) -> Result<Option<(Self::Element, usize)>, Error> {
match decode_tag(bytes) {
Ok(None) => Ok(None),
Err(err) => Err(err),
Ok(Some((element_id, payload_size_tag, tag_size))) => {
let should_unwrap = self.should_unwrap(element_id);
let payload_size = match (should_unwrap, payload_size_tag) {
(true, _) => 0,
(false, Varint::Unknown) => return Err(Error::UnknownElementLength),
(false, Varint::Value(size)) => size as usize
};
let element_size = tag_size + payload_size;
if element_size > bytes.len() {
// need to read more still
return Ok(None);
}
match self.decode(element_id, &bytes[tag_size..element_size]) {
Ok(element) => Ok(Some((element, element_size))),
Err(error) => Err(error)
}
}
}
}
fn iter_for<'b: 'a>(self, bytes: &'b[u8]) -> EbmlIterator<'a, Self>
where Self: Sized
{
EbmlIterator {
schema: self,
slice: bytes,
position: 0
}
}
}
pub struct Webm;
#[derive(Debug, PartialEq)]
pub enum WebmElement<'a> {
Unknown(u64, &'a[u8])
}
impl<'a> Schema<'a> for Webm {
type Element = WebmElement<'a>;
fn should_unwrap(&self, element_id: u64) -> bool {
false
}
fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result<WebmElement<'a>, Error> {
// dummy
Ok(WebmElement::Unknown(element_id, bytes))
}
}
2017-01-25 00:11:59 +00:00
pub struct EbmlIterator<'b, T: Schema<'b>> {
schema: T,
2017-01-24 08:20:49 +00:00
slice: &'b[u8],
position: usize,
}
2017-01-25 00:11:59 +00:00
impl<'b, T: Schema<'b>> Iterator for EbmlIterator<'b, T> {
type Item = T::Element;
2017-01-24 08:20:49 +00:00
2017-01-25 00:11:59 +00:00
fn next(&mut self) -> Option<T::Element> {
match self.schema.decode_element(&self.slice[self.position..]) {
2017-01-24 08:20:49 +00:00
Err(_) => None,
Ok(None) => None,
Ok(Some((element, element_size))) => {
self.position += element_size;
Some(element)
}
}
}
}
2017-01-10 03:40:21 +00:00
#[cfg(test)]
mod tests {
use futures::future::{ok, Future};
2017-01-12 13:44:48 +00:00
use super::*;
use super::Error::{CorruptVarint, UnknownElementId};
2017-01-12 12:37:29 +00:00
use super::Varint::{Unknown, Value};
2017-01-10 03:40:21 +00:00
#[test]
fn hello_futures() {
let my_future = ok::<String, ()>("Hello".into())
.map(|hello| hello + ", Futures!");
let string_result = my_future.wait().unwrap();
assert_eq!(string_result, "Hello, Futures!");
}
2017-01-12 05:41:35 +00:00
#[test]
fn fail_corrupted_varints() {
2017-01-12 13:37:03 +00:00
assert_eq!(decode_varint(&[0]), Err(CorruptVarint));
assert_eq!(decode_varint(&[0, 0, 0]), Err(CorruptVarint));
2017-01-12 05:41:35 +00:00
}
#[test]
fn incomplete_varints() {
assert_eq!(decode_varint(&[]), Ok(None));
assert_eq!(decode_varint(&[0x40]), Ok(None));
assert_eq!(decode_varint(&[0x01, 0, 0]), Ok(None));
}
#[test]
fn parse_varints() {
assert_eq!(decode_varint(&[0xFF]), Ok(Some((Unknown, 1))));
assert_eq!(decode_varint(&[0x7F, 0xFF]), Ok(Some((Unknown, 2))));
assert_eq!(decode_varint(&[0x80]), Ok(Some((Value(0), 1))));
assert_eq!(decode_varint(&[0x81]), Ok(Some((Value(1), 1))));
assert_eq!(decode_varint(&[0x40, 52]), Ok(Some((Value(52), 2))));
// test extra data in buffer
assert_eq!(decode_varint(&[0x83, 0x11]), Ok(Some((Value(3), 1))));
}
2017-01-12 13:44:48 +00:00
#[test]
fn fail_corrupted_tags() {
assert_eq!(decode_tag(&[0]), Err(CorruptVarint));
assert_eq!(decode_tag(&[0x80, 0]), Err(CorruptVarint));
assert_eq!(decode_tag(&[0xFF, 0x80]), Err(UnknownElementId));
assert_eq!(decode_tag(&[0x7F, 0xFF, 0x40, 0]), Err(UnknownElementId));
}
#[test]
fn incomplete_tags() {
assert_eq!(decode_tag(&[]), Ok(None));
assert_eq!(decode_tag(&[0x80]), Ok(None));
assert_eq!(decode_tag(&[0x40, 0, 0x40]), Ok(None));
}
#[test]
fn parse_tags() {
assert_eq!(decode_tag(&[0x80, 0x80]), Ok(Some((0, Value(0), 2))));
assert_eq!(decode_tag(&[0x81, 0x85]), Ok(Some((1, Value(5), 2))));
assert_eq!(decode_tag(&[0x80, 0xFF]), Ok(Some((0, Unknown, 2))));
assert_eq!(decode_tag(&[0x80, 0x7F, 0xFF]), Ok(Some((0, Unknown, 3))));
assert_eq!(decode_tag(&[0x85, 0x40, 52]), Ok(Some((5, Value(52), 3))));
}
const TEST_FILE: &'static [u8] = include_bytes!("data/test1.webm");
#[test]
fn decode_sanity_test() {
let decoded = Webm.decode_element(TEST_FILE);
if let Ok(Some((WebmElement::Unknown(tag, slice), element_size))) = decoded {
assert_eq!(tag, 0x0A45DFA3); // EBML tag, sans the length indicator bit
assert_eq!(slice.len(), 31); // known header payload length
assert_eq!(element_size, 43); // known header total length
} else {
panic!("Did not parse expected EBML header; result: {:?}", decoded);
}
}
2017-01-24 08:20:49 +00:00
fn assert_webm_blob(test: Option<WebmElement>, tag: u64, payload_size: usize) {
match test {
Some(WebmElement::Unknown(element_tag, bytes)) => {
assert_eq!(element_tag, tag);
assert_eq!(bytes.len(), payload_size);
},
None => {
panic!("Did not parse expected WebM element; result: {:?}", test);
}
}
}
#[test]
fn decode_webm_test1() {
let mut iter = Webm.iter_for(TEST_FILE);
2017-01-24 08:20:49 +00:00
// EBML Header
assert_webm_blob(iter.next(), 0x0A45DFA3, 31);
// Segment
assert_webm_blob(iter.next(), 0x08538067, 56124);
}
2017-01-10 03:40:21 +00:00
}