From 181dd4972efc1c40e6a965438a65c27474a43107 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Sun, 6 Dec 2020 20:42:45 +0100 Subject: Redo the whole thing again. Parsing the chunks separately was not a good idea. The file layout seems to be alternating ROOT and ARCH chunks, where the ROOT chunk describes what kind of data to expect in the following ARCH chunk. That is: +------+------+------+ | RIFF | SIZE | NUND | +------+------+------+------+ \ | ROOT | SIZE | NAME | TYPE | | +------+------+------+------+ > One unit | ARCH | SIZE | DATA.... | | +------+------+-------------+ / : : +------+------+------+------+ \ | ROOT | SIZE | NAME | TYPE | | +------+------+------+------+ > Last unit | ARCH | SIZE | DATA.... | | +------+------+-------------+ / EOF The NAME and TYPE are counted (and possibly zero terminated) strings describing the name of the following chunk (e.g. "Version"), and data type (e.g. "PAppVersion") respectively. The order of the chunk pairs does not seem significant, but the pairs are always a ROOT chunk followed by an ARCH chunk. --- src/main.rs | 319 ++++++++++++++++++------------------------------------------ 1 file changed, 95 insertions(+), 224 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6a1c37d..58b6a86 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,9 +20,8 @@ mod cubase_project; use cubase_project::*; use nom::{ - branch::alt, bytes::complete::*, - multi::{count, length_data, length_value, many0}, + multi::length_data, number::complete::*, sequence::tuple, Finish, @@ -53,184 +52,42 @@ fn cmstring(data: &[u8]) -> IResult<&[u8], &str> { * Leaf nodes also has a data payload prefixed by a 32 bit length (be). */ #[derive(Debug)] -enum Node<'a> { - Container(&'a str, u16), - Leaf(&'a str, u16, NodeValue<'a>), +enum NodeType { + Container, + Object, } -#[derive(Debug)] -enum NodeValue<'a> { - Unknown, - //List(Vec>), - Raw(Vec), - Arrangement(PArrangement), - InstrumentTrackEvent(MInstrumentTrackEvent<'a>), - - MidiAfterTouch(u32), - MidiController(u32), - MidiNote(u32), - MidiPart(MMidiPart<'a>), - MidiPartEvent(Vec), - MidiPitchBend(u32), - MidiPolyPressure(u32), - MidiProgramChange(u32), - MidiSysex(Vec), - - SignatureTrackEvent(MSignatureTrackEvent<'a>), - TempoTrackEvent(&'a [u8]), - Version(PAppVersion), -} - -/// A vector of 32bit words preceeded by a 16 bit count. -fn counted_vec(data: &[u8]) -> IResult<&[u8], Vec> { - let (r, len) = be_u16(data)?; - count(be_u32, len as usize)(&r) -} - -/// A byte vector prefixed with a 32 bit count. -fn bytevec<'a>(data: &'a[u8]) -> IResult<&'a [u8], &'a [u8]> { - length_data(be_u32)(&data) -} - -fn p_appversion<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { - let (r, (appname, appversion, appdate, num2, apparch, num3, appencoding, applocale)) = - tuple((cmstring, cmstring, cmstring, be_u32, cmstring, be_u32, cmstring, cmstring))(&data)?; - - Ok((r, PAppVersion { - appname: String::from(appname), - appversion: String::from(appversion), - appdate: String::from(appdate), - num2, - apparch: String::from(apparch), - num3, - appencoding: String::from(appencoding), - applocale: String::from(applocale), - })) -} #[derive(Debug)] -struct PArrangement { - num1: u16, - num2: u16, -} - -fn p_arrangement(data: &[u8]) -> IResult<&[u8], PArrangement> { - let (r, (num1, num2)) = tuple((be_u16, be_u16))(data)?; - Ok((r, PArrangement { num1, num2 })) -} - -#[derive(Debug)] -struct MInstrumentTrackEvent<'a> { - num16_3: Vec, - num32_8: Vec, +struct Node<'a> { + node_type: NodeType, name: &'a str, - num32: u32, + num: u16, + payload: Option<&'a [u8]>, } -fn m_instrument_track_event<'a>(data: &'a [u8]) -> IResult<&'a [u8], MInstrumentTrackEvent<'a>> { - let (r, (num16_3, num32_8, name, num32)) = - tuple((count(be_u16, 3), count(be_u32, 8), cmstring, be_u32))(&data)?; - Ok((r, MInstrumentTrackEvent { - num16_3, - num32_8, - name, - num32, - })) -} +fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { + let mut v = PAppVersion::default(); -fn m_tempo_track_event<'a>(data: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> { - bytevec(&data) -} + let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(&data)?; -#[derive(Debug)] -struct MSignatureTrackEvent<'a> { - bv: &'a [u8], - num32: u32, -} - -fn m_signature_track_event<'a>(data: &'a [u8]) -> IResult<&'a [u8], MSignatureTrackEvent<'a>> { - let (r, (bv, num32)) = tuple((bytevec, be_u32))(&data)?; - Ok((r, MSignatureTrackEvent { bv, num32 })) -} + v.appname = String::from(appname); + v.appversion = String::from(appversion); + v.appdate = String::from(appdate); + v.num2 = num2; -#[derive(Debug)] -struct MMidiPart<'a> { - num32: u32, - name: &'a str, - data: Vec, -} + if r.len() > 0 { + let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?; -fn m_midi_part<'a>(data: &'a [u8]) -> IResult<&'a [u8], MMidiPart<'a>> { - let (r, (num32, name, data)) = tuple((be_u32, cmstring, count(be_u8, 17)))(&data)?; - Ok((r, MMidiPart { num32, name, data })) -} + v.apparch = String::from(apparch); + v.num3 = num3; + v.appencoding = String::from(appencoding); + v.applocale = String::from(applocale); -fn node_value<'a>(class: &str, data: &'a [u8]) -> IResult<&'a [u8], NodeValue<'a>> { - match class { - "PAppVersion" => { - let (r, appver) = p_appversion(&data)?; - Ok((r, NodeValue::Version(appver))) - }, - "PArrangement" => { - let (r, arr) = p_arrangement(&data)?; - Ok((r, NodeValue::Arrangement(arr))) - }, - "MRoot" | - "MTrackList" => { - let (r, v) = counted_vec(&data)?; - Ok((r, NodeValue::Raw(v))) - }, - "MInstrumentTrackEvent" => { - let (r, e) = m_instrument_track_event(&data)?; - Ok((r, NodeValue::InstrumentTrackEvent(e))) - }, - "MTempoTrackEvent" => { - let (r, e) = m_tempo_track_event(&data)?; - Ok((r, NodeValue::TempoTrackEvent(e))) - }, - "MSignatureTrackEvent" => { - let (r, e) = m_signature_track_event(&data)?; - Ok((r, NodeValue::SignatureTrackEvent(e))) - }, - "MMidiPartEvent" => { - let (r, v) = count(be_u8, 30)(data)?; - Ok((r, NodeValue::MidiPartEvent(v))) - }, - "MMidiPart" => { - let (r, v) = m_midi_part(&data)?; - Ok((r, NodeValue::MidiPart(v))) - }, - "MMidiNote" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiNote(v))) - }, - "MMidiPolyPressure" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiPolyPressure(v))) - }, - "MMidiAfterTouch" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiAfterTouch(v))) - }, - "MMidiProgramChange" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiProgramChange(v))) - }, - "MMidiController" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiController(v))) - }, - "MMidiPitchBend" => { - let (r, v) = be_u32(data)?; - Ok((r, NodeValue::MidiPitchBend(v))) - }, - "MMidiSysex" => { - let sep = vec![0xff, 0xff, 0xff, 0xff]; - let (r, v) = take_until(sep.as_slice())(data)?; - Ok((r, NodeValue::MidiSysex(v.to_vec()))) - }, - &_ => Ok((&[], NodeValue::Unknown)) //"Unknown node class \"{}\".", class) + r = r2; } + + Ok((r, v)) } fn fourcc<'a>(data: &'a [u8]) -> IResult<&'a [u8], &'a str> { @@ -245,7 +102,11 @@ fn fourcc<'a>(data: &'a [u8]) -> IResult<&'a [u8], &'a str> { * The actual data follows in the ARCH chunk following this ROOT chunk. */ fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { - tuple((cmstring, cmstring))(data) + let (r, chunk) = riff_chunk(data)?; + assert_eq!(chunk.fourcc, "ROOT"); + //assert_eq!(r.len(), 0); + let (_, t) = tuple((cmstring, cmstring))(chunk.payload)?; + Ok((r, t)) } @@ -256,7 +117,7 @@ fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { */ fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?; - Ok((r, Node::Container(name, num))) + Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} )) } /** @@ -266,17 +127,7 @@ fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { */ fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?; - let (_, nv) = node_value(name, payload)?; - Ok((r, Node::Leaf(name, num, nv))) -} - -/** - * ARCH chunks contains one or more structured data elements that can be - * either container nodes or object/leaf nodes.. - */ -fn arch_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], Vec> { - let (rest, nodes) = many0(alt((container_node, object_node)))(data)?; - Ok((rest, nodes)) + Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } )) } struct RiffChunk<'a> { @@ -290,8 +141,38 @@ fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> { } +fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { + let mut proj = CubaseProject::default(); + + let (mut payload, _) = cpr_file_header(data)?; + + println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len()); + + while payload.len() > 0 { + // Expect a root chunk first: + let (r, (k, t)) = root_chunk(payload)?; + println!("[*] Found root: ({}, {})", k, t); + + match k { + "Version" => { + let (r2, v) = version_chunk(r)?; + println!("[*] {:?}", v); + //assert_eq!(r2.len(), 0); + proj.app_version = v; + payload = r2; + }, + _ => { + let (r2, _) = riff_chunk(r)?; + payload = r2; + } + } + } + + Ok((payload, proj)) +} + /** - * Split file into individual chunks. + * Parse the Cubase Project File Header. * * A Cubase project file is a RIFF file. * @@ -302,19 +183,15 @@ fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> { * * To make it even worse, the chunk size of the root chunk is four bytes short, so this makes * the file parser itself a bit more complex than what it needs to be. + * + * We just swallow the extra "NUND" tag here, so it won't bother us later. */ -fn split_chunks<'a>(data: &'a [u8]) -> IResult<&'a [u8], Vec>> { - let (r, (_, (_, chunks))) = - tuple(( - tag(b"RIFF"), - length_value( - payload_len, - tuple((tag(b"NUND"), many0(riff_chunk))) - ) - ))(&data)?; - Ok((r, chunks)) +fn cpr_file_header<'a>(data: &'a [u8]) -> IResult<&'a [u8], ()> { + let (r, (_, _, _)) = tuple((tag(b"RIFF"), payload_len, tag(b"NUND")))(data)?; + Ok((r, ())) } + /** * Get the correct payload length from the root chunk of the file. * @@ -327,6 +204,25 @@ fn payload_len<'a>(data: &'a [u8]) -> IResult<&'a [u8], u32> { Ok((r, len + 4)) } + +fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { + let (r, chunk) = riff_chunk(data)?; + assert_eq!(chunk.fourcc, "ARCH"); + //assert_eq!(r.len(), 0); + + let (odata, c) = container_node(chunk.payload)?; + println!("[*] {:?}", c); + + let (r2, o) = object_node(odata)?; + println!("[*] {:?}", o); + assert_eq!(r2.len(), 0); + + let (r2, v) = p_app_version(o.payload.unwrap())?; + assert_eq!(r2.len(), 0); + + Ok((r, v)) +} + pub fn parse_cubase_project

(filename: P) -> Result> where P: AsRef @@ -334,41 +230,11 @@ where println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?); let data = std::fs::read(filename)?; - let (rest, chunks) = split_chunks(&data) + let (_, proj) = cpr_file(&data) .finish() .map_err(|e| format!("{:?}", e))?; - println!("Rest: {}", rest.len()); - - let proj = CubaseProject::default(); - - for chunk in chunks { - println!("{}: {}", chunk.fourcc, chunk.payload.len()); - - match chunk.fourcc { - "ROOT" => { - let (r, (k, v)) = root_chunk(chunk.payload).finish().map_err(|e| format!("{:?}", e))?; - assert_eq!(r.len(), 0); - println!(" {}: {}", k, v); - }, - "ARCH" => { - let (r, nodes) = arch_chunk(chunk.payload).finish().map_err(|e| format!("{:?}", e))?; - assert_eq!(r.len(), 0); - let mut indent = 2; - for node in nodes { - println!("{1:0$}{2:?}", indent, " ", node); - if let Node::Container(_, _) = node { - indent += 2; - } - } - }, - _ => { - eprintln!("[-] Warning: ignoring unknown chunk \"{}\" of length {} bytes.", - chunk.fourcc, - chunk.payload.len()); - } - }; - } + println!("[*] Done!"); Ok(proj) } @@ -379,7 +245,12 @@ fn main() -> Result<(), Box> { .next() .expect("You must give me a file to analyze"); - parse_cubase_project(filename)?; + let proj = parse_cubase_project(filename)?; + + println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion); + println!("Architecture: {}", proj.app_version.apparch); + println!("Encoding: {}", proj.app_version.appencoding); + println!("Locale: {}", proj.app_version.applocale); Ok(()) } -- cgit v1.2.3