diff --git a/src/bin/dump.rs b/src/bin/dump.rs index 7ff1dba..1251c9a 100644 --- a/src/bin/dump.rs +++ b/src/bin/dump.rs @@ -4,8 +4,9 @@ use std::env::args; use std::fs::File; use std::io::Read; use std::path::Path; -use lab_ebml::{Schema, Webm}; -use lab_ebml::WebmElement::*; +use lab_ebml::Schema; +use lab_ebml::webm::Webm; +use lab_ebml::webm::WebmElement::*; pub fn main() { let mut args = args(); diff --git a/src/ebml.rs b/src/ebml.rs new file mode 100644 index 0000000..30079e5 --- /dev/null +++ b/src/ebml.rs @@ -0,0 +1,135 @@ +pub const EBML_HEAD_ID: u64 = 0x0A45DFA3; +pub const VOID_ID: u64 = 0x6C; + +#[derive(Debug, PartialEq)] +pub enum Error { + CorruptVarint, + UnknownElementId, + UnknownElementLength, + CorruptPayload, +} + +#[derive(Debug, PartialEq)] +pub enum Varint { + /// a numeric value + Value(u64), + /// the reserved "unknown" value + Unknown +} + +/// Try to parse an EBML varint at the start of the given slice. +/// Returns an Err() if the format is corrupt. +/// Returns Ok(None) if more bytes are needed to get a result. +/// Returns Ok(Some((varint, size))) to return a varint value and +/// the size of the parsed varint. +pub fn decode_varint(bytes: &[u8]) -> Result, Error> { + let mut value: u64 = 0; + let mut value_length = 1; + let mut mask: u8 = 0x80; + let mut unknown_marker: u64 = !0; + + if bytes.len() == 0 { + return Ok(None) + } + + // get length marker bit from first byte & parse first byte + while mask > 0 { + if (mask & bytes[0]) != 0 { + value = (bytes[0] & !mask) as u64; + unknown_marker = (mask - 1) as u64; + break + } + value_length += 1; + mask = mask >> 1; + } + + if mask == 0 { + return Err(Error::CorruptVarint) + } + + // check we have enough data to parse + if value_length > bytes.len() { + return Ok(None) + } + + // decode remaining bytes + for i in 1..value_length { + value = (value << 8) + (bytes[i] as u64); + unknown_marker = (unknown_marker << 8) + 0xFF; + } + + // determine result + if value == unknown_marker { + Ok(Some((Varint::Unknown, value_length))) + } else { + Ok(Some((Varint::Value(value), value_length))) + } +} + +/// Try to parse an EBML element header at the start of the given slice. +/// Returns an Err() if the format is corrupt. +/// Returns Ok(None) if more bytes are needed to get a result. +/// Returns Ok(Some((id, varint, size))) to return the element id, +/// the size of the payload, and the size of the parsed header. +pub fn decode_tag(bytes: &[u8]) -> Result, Error> { + // parse element ID + match decode_varint(bytes) { + Ok(None) => Ok(None), + Err(err) => Err(err), + Ok(Some((Varint::Unknown, _))) => Err(Error::UnknownElementId), + Ok(Some((Varint::Value(element_id), id_size))) => { + // parse payload size + match decode_varint(&bytes[id_size..]) { + Ok(None) => Ok(None), + Err(err) => Err(err), + Ok(Some((element_length, length_size))) => + Ok(Some(( + element_id, + element_length, + id_size + length_size + ))) + } + } + } +} + +#[derive(Debug, PartialEq)] +pub struct Ebml(pub S, pub T); + +pub trait Schema<'a> { + type Element: 'a; + + fn should_unwrap(&self, element_id: u64) -> bool; + fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result; + + fn decode_element<'b: 'a>(&self, bytes: &'b[u8]) -> Result, Error> { + match decode_tag(bytes) { + Ok(None) => Ok(None), + Err(err) => Err(err), + Ok(Some((element_id, payload_size_tag, tag_size))) => { + let should_unwrap = self.should_unwrap(element_id); + + let payload_size = match (should_unwrap, payload_size_tag) { + (true, _) => 0, + (false, Varint::Unknown) => return Err(Error::UnknownElementLength), + (false, Varint::Value(size)) => size as usize + }; + + let element_size = tag_size + payload_size; + if element_size > bytes.len() { + // need to read more still + return Ok(None); + } + + match self.decode(element_id, &bytes[tag_size..element_size]) { + Ok(element) => Ok(Some((element, element_size))), + Err(error) => Err(error) + } + } + } + } + + fn parse(self, source: T) -> Ebml where Self: Sized { + Ebml(self, source) + } +} diff --git a/src/iterator.rs b/src/iterator.rs new file mode 100644 index 0000000..4283252 --- /dev/null +++ b/src/iterator.rs @@ -0,0 +1,36 @@ +use ebml::*; + +pub struct EbmlIterator<'b, T: Schema<'b>> { + schema: T, + slice: &'b[u8], + position: usize, +} + +impl<'b, S: Schema<'b>> IntoIterator for Ebml { + type Item = S::Element; + type IntoIter = EbmlIterator<'b, S>; + + fn into_iter(self) -> EbmlIterator<'b, S> + { + EbmlIterator { + schema: self.0, + slice: self.1, + position: 0 + } + } +} + +impl<'b, T: Schema<'b>> Iterator for EbmlIterator<'b, T> { + type Item = T::Element; + + fn next(&mut self) -> Option { + match self.schema.decode_element(&self.slice[self.position..]) { + Err(_) => None, + Ok(None) => None, + Ok(Some((element, element_size))) => { + self.position += element_size; + Some(element) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 3141784..a6c7517 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,224 +1,11 @@ extern crate futures; -#[derive(Debug, PartialEq)] -pub enum Error { - CorruptVarint, - UnknownElementId, - UnknownElementLength, - CorruptPayload, -} +pub mod ebml; +mod iterator; +pub mod webm; -#[derive(Debug, PartialEq)] -pub enum Varint { - /// a numeric value - Value(u64), - /// the reserved "unknown" value - Unknown -} - -/// Try to parse an EBML varint at the start of the given slice. -/// Returns an Err() if the format is corrupt. -/// Returns Ok(None) if more bytes are needed to get a result. -/// Returns Ok(Some((varint, size))) to return a varint value and -/// the size of the parsed varint. -pub fn decode_varint(bytes: &[u8]) -> Result, Error> { - let mut value: u64 = 0; - let mut value_length = 1; - let mut mask: u8 = 0x80; - let mut unknown_marker: u64 = !0; - - if bytes.len() == 0 { - return Ok(None) - } - - // get length marker bit from first byte & parse first byte - while mask > 0 { - if (mask & bytes[0]) != 0 { - value = (bytes[0] & !mask) as u64; - unknown_marker = (mask - 1) as u64; - break - } - value_length += 1; - mask = mask >> 1; - } - - if mask == 0 { - return Err(Error::CorruptVarint) - } - - // check we have enough data to parse - if value_length > bytes.len() { - return Ok(None) - } - - // decode remaining bytes - for i in 1..value_length { - value = (value << 8) + (bytes[i] as u64); - unknown_marker = (unknown_marker << 8) + 0xFF; - } - - // determine result - if value == unknown_marker { - Ok(Some((Varint::Unknown, value_length))) - } else { - Ok(Some((Varint::Value(value), value_length))) - } -} - -/// Try to parse an EBML element header at the start of the given slice. -/// Returns an Err() if the format is corrupt. -/// Returns Ok(None) if more bytes are needed to get a result. -/// Returns Ok(Some((id, varint, size))) to return the element id, -/// the size of the payload, and the size of the parsed header. -pub fn decode_tag(bytes: &[u8]) -> Result, Error> { - // parse element ID - match decode_varint(bytes) { - Ok(None) => Ok(None), - Err(err) => Err(err), - Ok(Some((Varint::Unknown, _))) => Err(Error::UnknownElementId), - Ok(Some((Varint::Value(element_id), id_size))) => { - // parse payload size - match decode_varint(&bytes[id_size..]) { - Ok(None) => Ok(None), - Err(err) => Err(err), - Ok(Some((element_length, length_size))) => - Ok(Some(( - element_id, - element_length, - id_size + length_size - ))) - } - } - } -} - -pub trait Schema<'a> { - type Element: 'a; - - fn should_unwrap(&self, element_id: u64) -> bool; - fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result; - - fn decode_element<'b: 'a>(&self, bytes: &'b[u8]) -> Result, Error> { - match decode_tag(bytes) { - Ok(None) => Ok(None), - Err(err) => Err(err), - Ok(Some((element_id, payload_size_tag, tag_size))) => { - let should_unwrap = self.should_unwrap(element_id); - - let payload_size = match (should_unwrap, payload_size_tag) { - (true, _) => 0, - (false, Varint::Unknown) => return Err(Error::UnknownElementLength), - (false, Varint::Value(size)) => size as usize - }; - - let element_size = tag_size + payload_size; - if element_size > bytes.len() { - // need to read more still - return Ok(None); - } - - match self.decode(element_id, &bytes[tag_size..element_size]) { - Ok(element) => Ok(Some((element, element_size))), - Err(error) => Err(error) - } - } - } - } - - fn parse(self, source: T) -> Ebml where Self: Sized { - Ebml(self, source) - } - -} - -pub const EBML_HEAD_ID: u64 = 0x0A45DFA3; -pub const VOID_ID: u64 = 0x6C; - -#[derive(Debug, PartialEq)] -pub struct Ebml(S, T); - -impl<'b, S: Schema<'b>> IntoIterator for Ebml { - type Item = S::Element; - type IntoIter = EbmlIterator<'b, S>; - - fn into_iter(self) -> EbmlIterator<'b, S> - { - EbmlIterator { - schema: self.0, - slice: self.1, - position: 0 - } - } -} - -const SEGMENT_ID: u64 = 0x08538067; -const SEEK_HEAD_ID: u64 = 0x014D9B74; -const SEGMENT_INFO_ID: u64 = 0x0549A966; -const CUES_ID: u64 = 0x0C53BB6B; -const TRACKS_ID: u64 = 0x0654AE6B; -const CLUSTER_ID: u64 = 0x0F43B675; -pub struct Webm; - -#[derive(Debug, PartialEq)] -pub enum WebmElement<'b> { - EbmlHead, - Void, - Segment, - SeekHead, - Info, - Cues, - Tracks(&'b[u8]), - Cluster(&'b[u8]), - Unknown(u64) -} - -impl<'a> Schema<'a> for Webm { - type Element = WebmElement<'a>; - - fn should_unwrap(&self, element_id: u64) -> bool { - match element_id { - // Segment - SEGMENT_ID => true, - _ => false - } - } - - fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result, Error> { - match element_id { - EBML_HEAD_ID => Ok(WebmElement::EbmlHead), - VOID_ID => Ok(WebmElement::Void), - SEGMENT_ID => Ok(WebmElement::Segment), - SEEK_HEAD_ID => Ok(WebmElement::SeekHead), - SEGMENT_INFO_ID => Ok(WebmElement::Info), - CUES_ID => Ok(WebmElement::Cues), - TRACKS_ID => Ok(WebmElement::Tracks(bytes)), - CLUSTER_ID => Ok(WebmElement::Cluster(bytes)), - _ => Ok(WebmElement::Unknown(element_id)) - } - } -} - -pub struct EbmlIterator<'b, T: Schema<'b>> { - schema: T, - slice: &'b[u8], - position: usize, -} - -impl<'b, T: Schema<'b>> Iterator for EbmlIterator<'b, T> { - type Item = T::Element; - - fn next(&mut self) -> Option { - match self.schema.decode_element(&self.slice[self.position..]) { - Err(_) => None, - Ok(None) => None, - Ok(Some((element, element_size))) => { - self.position += element_size; - Some(element) - } - } - } -} +pub use ebml::{Error, Schema}; #[cfg(test)] mod tests { diff --git a/src/webm.rs b/src/webm.rs new file mode 100644 index 0000000..8e3fbef --- /dev/null +++ b/src/webm.rs @@ -0,0 +1,48 @@ +use ebml::*; + +const SEGMENT_ID: u64 = 0x08538067; +const SEEK_HEAD_ID: u64 = 0x014D9B74; +const SEGMENT_INFO_ID: u64 = 0x0549A966; +const CUES_ID: u64 = 0x0C53BB6B; +const TRACKS_ID: u64 = 0x0654AE6B; +const CLUSTER_ID: u64 = 0x0F43B675; +pub struct Webm; + +#[derive(Debug, PartialEq)] +pub enum WebmElement<'b> { + EbmlHead, + Void, + Segment, + SeekHead, + Info, + Cues, + Tracks(&'b[u8]), + Cluster(&'b[u8]), + Unknown(u64) +} + +impl<'a> Schema<'a> for Webm { + type Element = WebmElement<'a>; + + fn should_unwrap(&self, element_id: u64) -> bool { + match element_id { + // Segment + SEGMENT_ID => true, + _ => false + } + } + + fn decode<'b: 'a>(&self, element_id: u64, bytes: &'b[u8]) -> Result, Error> { + match element_id { + EBML_HEAD_ID => Ok(WebmElement::EbmlHead), + VOID_ID => Ok(WebmElement::Void), + SEGMENT_ID => Ok(WebmElement::Segment), + SEEK_HEAD_ID => Ok(WebmElement::SeekHead), + SEGMENT_INFO_ID => Ok(WebmElement::Info), + CUES_ID => Ok(WebmElement::Cues), + TRACKS_ID => Ok(WebmElement::Tracks(bytes)), + CLUSTER_ID => Ok(WebmElement::Cluster(bytes)), + _ => Ok(WebmElement::Unknown(element_id)) + } + } +}