/* cbconvert -- A program to parse and convert Cubase projects * Copyright (C) 2020 Harald Eilertsen * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ mod cubase_project; use cubase_project::*; use nom::{ branch::alt, bytes::complete::*, combinator::{map, rest, value}, error::ParseError, multi::{length_data, length_value, many1}, number::complete::*, sequence::{preceded, terminated, tuple}, Finish, IResult, Parser, }; use std::{ error::Error, path::Path, }; /// Parser for length prefixed strings. /// /// Cubase uses a string format where the length of the string is given as a 32 bit big endian word /// before the actual string data. The string may also be zero terminated, with the zero term char /// included in the length. In that case, we trim off the ending zero bytes. /// /// We assume strings to be UTF-8 encoded. fn cmstring(input: &[u8]) -> IResult<&[u8], &str> { let strdata = length_value(be_u32, take_till(|c| c == b'\0')); map(strdata, |s| std::str::from_utf8(s).unwrap())(input) } fn cmtag<'a>(t: &'a str) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], (NodeType, u16)> { map( tuple(( node_type, length_value( be_u32, tuple((tag(t), tag(b"\0"))) ), be_u16) ), |(nt, (_, _), v)| (nt, v)) } /** * The data chunks in the file is split into what seems like a structure of containers and leaf * nodes. Where the containers don't contain any data of themselves, but leaf nodes may contain * data and also be a container for further sub nodes. * * Each node regardless of type has a name, and a 16 bit number which meaning I'm not sure about. * Leaf nodes also has a data payload prefixed by a 32 bit length (be). */ #[derive(Clone, Debug)] enum NodeType { Container, Object, } fn node_type<'a>(input: &'a [u8]) -> IResult<&'a [u8], NodeType> { alt(( value(NodeType::Container, tag(b"\xff\xff\xff\xfe")), value(NodeType::Object, tag(b"\xff\xff\xff\xff")) ))(input) } #[derive(Debug)] struct Node<'a> { node_type: NodeType, name: &'a str, num: u16, payload: Option<&'a [u8]>, } fn p_app_version<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { let mut v = PAppVersion::default(); let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(input)?; v.appname = String::from(appname); v.appversion = String::from(appversion); v.appdate = String::from(appdate); v.num2 = num2; if r.len() > 0 { let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?; v.apparch = String::from(apparch); v.num3 = num3; v.appencoding = String::from(appencoding); v.applocale = String::from(applocale); r = r2; } Ok((r, v)) } fn p_arrangement<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> { let v = PArrangement::default(); Ok((input, v)) } /** * Root chunks always have the same layout, two length prefixed strings. * * These seem to be a mapping between a field name, and the data type the field contains. * The actual data follows in the ARCH chunk following this ROOT chunk. */ fn root_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { preceded( tag(b"ROOT"), length_value(be_u32, tuple((cmstring, cmstring))))(input) } fn arch_chunk<'a, O, E, F>(subparser: F) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E> where E: ParseError<&'a [u8]>, F: Parser<&'a [u8], O, E>, { preceded( tag(b"ARCH"), length_value(be_u32, subparser)) } /** * A container node does not contain any data of it's own, but contains * one or more sub elements. These can be either other containers, or * object/leaf nodes. */ fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?; Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} )) } /** * An object node contains serialized structured data. * * It has a size, as well as a payload containing the actual serialized data. */ fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?; Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } )) } fn cpr_file<'a>(input: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { let mut proj = CubaseProject::default(); let (mut payload, len) = cpr_file_header(input)?; assert_eq!(payload.len(), len as usize); println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len()); while payload.len() > 0 { // Expect a root chunk first: let (r, (k, t)) = root_chunk(payload)?; println!("[*] Found root: ({}, {})", k, t); match k { "Version" => { let (r2, v) = arch_chunk(version_chunk)(r)?; println!("[*] {:?}", v); proj.app_version = v; payload = r2; }, "Arrangement1" => { let (r2, a) = arch_chunk(arrangement_chunk)(r)?; println!("[*] {:?}", a); payload = r2; }, _ => { let (r2, _) = arch_chunk(rest)(r)?; payload = r2; } } } Ok((payload, proj)) } /** * Parse the Cubase Project File Header. * * A Cubase project file is a RIFF file. * * That is almost. There's a strange extra tag "NUND" between the root (RIFF) header and the * rest of the chunks. This would have been ok, if the NUND tag was followed by a length field, * as it would just be another chunk. This is not the case however, the tag is followed * immefiately by another chunk header. * * To make it even worse, the chunk size of the root chunk is four bytes short, so this makes * the file parser itself a bit more complex than what it needs to be. * * We just swallow the extra "NUND" tag here, so it won't bother us later. */ fn cpr_file_header<'a>(input: &'a [u8]) -> IResult<&'a [u8], u32> { terminated( preceded( tag(b"RIFF"), be_u32), tag(b"NUND"))(input) } fn version_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { preceded( tuple((cmtag("CmObject"), cmtag("PAppVersion"))), length_value(be_u32, p_app_version))(input) } fn arrangement_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> { // It varies a bit how many levels deep the actual PArrangement object is. // // File created with Cubase 4.5.x (file version 310?) seems to have this // structure: // // GDocument -> GModel -> CmObject // // While Cubase version 5.x (file version 400?) seems to have the CmObject // directly under the GDocument container. let (odata, c) = many1(container_node)(input)?; println!("[*] {:?}", c); // This is the actual PArrangeent object let (r2, o) = object_node(odata)?; println!("[*] {:?}: {} ({} bytes)", o.node_type, o.name, o.payload.unwrap().len()); assert_eq!(r2.len(), 0); let (r2, _v) = p_arrangement(o.payload.unwrap())?; // assert_eq!(r2.len(), 0); Ok((r2, PArrangement::default())) } pub fn parse_cubase_project

(filename: P) -> Result> where P: AsRef { println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?); let data = std::fs::read(filename)?; let (_, proj) = cpr_file(&data) .finish() .map_err(|e| format!("{:?}", e))?; println!("[*] Done!"); Ok(proj) } fn main() -> Result<(), Box> { let filename = std::env::args() .skip(1) .next() .expect("You must give me a file to analyze"); let proj = parse_cubase_project(filename)?; println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion); println!("Architecture: {}", proj.app_version.apparch); println!("Encoding: {}", proj.app_version.appencoding); println!("Locale: {}", proj.app_version.applocale); Ok(()) }