From 40a44631b56de9343946fe29b0172184e4efc62b Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Mon, 7 Dec 2020 16:06:46 +0100 Subject: Simplify more parser functions using sequence parsers. --- src/main.rs | 87 ++++++++++++++++++++++--------------------------------------- 1 file changed, 31 insertions(+), 56 deletions(-) diff --git a/src/main.rs b/src/main.rs index 9425106..c5f1e46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,7 +24,7 @@ use nom::{ combinator::map, multi::{length_data, length_value, many1}, number::complete::*, - sequence::tuple, + sequence::{preceded, terminated, tuple}, Finish, IResult, }; @@ -40,9 +40,9 @@ use std::{ /// included in the length. In that case, we trim off the ending zero bytes. /// /// We assume strings to be UTF-8 encoded. -fn cmstring(data: &[u8]) -> IResult<&[u8], &str> { +fn cmstring(input: &[u8]) -> IResult<&[u8], &str> { let strdata = length_value(be_u32, take_till(|c| c == b'\0')); - map(strdata, |s| std::str::from_utf8(s).unwrap())(data) + map(strdata, |s| std::str::from_utf8(s).unwrap())(input) } /** @@ -68,10 +68,10 @@ struct Node<'a> { payload: Option<&'a [u8]>, } -fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { +fn p_app_version<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { let mut v = PAppVersion::default(); - let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(&data)?; + let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(input)?; v.appname = String::from(appname); v.appversion = String::from(appversion); @@ -92,14 +92,10 @@ fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { Ok((r, v)) } -fn p_arrangement<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> { +fn p_arrangement<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> { let mut v = PArrangement::default(); - Ok((data, v)) -} - -fn fourcc<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a str> { - map(take(4usize), |tag| std::str::from_utf8(tag).unwrap())(input) + Ok((input, v)) } /** @@ -108,14 +104,17 @@ fn fourcc<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a str> { * These seem to be a mapping between a field name, and the data type the field contains. * The actual data follows in the ARCH chunk following this ROOT chunk. */ -fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { - let (r, chunk) = riff_chunk(data)?; - assert_eq!(chunk.fourcc, "ROOT"); - //assert_eq!(r.len(), 0); - let (_, t) = tuple((cmstring, cmstring))(chunk.payload)?; - Ok((r, t)) +fn root_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { + preceded( + tag(b"ROOT"), + length_value(be_u32, tuple((cmstring, cmstring))))(input) } +fn arch_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> { + preceded( + tag(b"ARCH"), + length_data(be_u32))(input) +} /** * A container node does not contain any data of it's own, but contains @@ -137,21 +136,12 @@ fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } )) } -struct RiffChunk<'a> { - fourcc: &'a str, - payload: &'a [u8], -} - -fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> { - let (r, (fourcc, payload)) = tuple((fourcc, length_data(be_u32)))(i)?; - Ok((r, RiffChunk { fourcc, payload })) -} - -fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { +fn cpr_file<'a>(input: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { let mut proj = CubaseProject::default(); - let (mut payload, _) = cpr_file_header(data)?; + let (mut payload, len) = cpr_file_header(input)?; + assert_eq!(payload.len(), len as usize); println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len()); @@ -164,7 +154,6 @@ fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { "Version" => { let (r2, v) = version_chunk(r)?; println!("[*] {:?}", v); - //assert_eq!(r2.len(), 0); proj.app_version = v; payload = r2; }, @@ -174,7 +163,7 @@ fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { payload = r2; }, _ => { - let (r2, _) = riff_chunk(r)?; + let (r2, _) = arch_chunk(r)?; payload = r2; } } @@ -198,31 +187,18 @@ fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { * * We just swallow the extra "NUND" tag here, so it won't bother us later. */ -fn cpr_file_header<'a>(data: &'a [u8]) -> IResult<&'a [u8], ()> { - let (r, (_, _, _)) = tuple((tag(b"RIFF"), payload_len, tag(b"NUND")))(data)?; - Ok((r, ())) -} - - -/** - * Get the correct payload length from the root chunk of the file. - * - * Cubase incorrectly specifies the chunk size four bytes less than what it really is, probably due - * to the strange inserted "NUND" tag that don't really belong anywhere. This parser fixes that by - * adding four to the size read from the file. - */ -fn payload_len<'a>(data: &'a [u8]) -> IResult<&'a [u8], u32> { - let (r, len) = be_u32(data)?; - Ok((r, len + 4)) +fn cpr_file_header<'a>(input: &'a [u8]) -> IResult<&'a [u8], u32> { + terminated( + preceded( + tag(b"RIFF"), be_u32), + tag(b"NUND"))(input) } -fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { - let (r, chunk) = riff_chunk(data)?; - assert_eq!(chunk.fourcc, "ARCH"); - //assert_eq!(r.len(), 0); +fn version_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { + let (r, chunk) = arch_chunk(input)?; - let (odata, c) = container_node(chunk.payload)?; + let (odata, c) = container_node(chunk)?; println!("[*] {:?}", c); let (r2, o) = object_node(odata)?; @@ -236,9 +212,8 @@ fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { } -fn arrangement_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> { - let (r, chunk) = riff_chunk(data)?; - assert_eq!(chunk.fourcc, "ARCH"); +fn arrangement_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> { + let (r, chunk) = arch_chunk(input)?; // It varies a bit how many levels deep the actual PArrangement object is. // @@ -249,7 +224,7 @@ fn arrangement_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> { // // While Cubase version 5.x (file version 400?) seems to have the CmObject // directly under the GDocument container. - let (odata, c) = many1(container_node)(chunk.payload)?; + let (odata, c) = many1(container_node)(chunk)?; println!("[*] {:?}", c); // This is the actual PArrangeent object -- cgit v1.2.3