/* cbconvert -- A program to parse and convert Cubase projects * Copyright (C) 2020 Harald Eilertsen * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ mod cubase_project; use cubase_project::*; use nom::{ bytes::complete::*, combinator::map, multi::{length_data, length_value, many1}, number::complete::*, sequence::tuple, Finish, IResult, }; use std::{ error::Error, path::Path, }; /// Parser for length prefixed strings. /// /// Cubase uses a string format where the length of the string is given as a 32 bit big endian word /// before the actual string data. The string may also be zero terminated, with the zero term char /// included in the length. In that case, we trim off the ending zero bytes. /// /// We assume strings to be UTF-8 encoded. fn cmstring(data: &[u8]) -> IResult<&[u8], &str> { let strdata = length_value(be_u32, take_till(|c| c == b'\0')); map(strdata, |s| std::str::from_utf8(s).unwrap())(data) } /** * The data chunks in the file is split into what seems like a structure of containers and leaf * nodes. Where the containers don't contain any data of themselves, but leaf nodes may contain * data and also be a container for further sub nodes. * * Each node regardless of type has a name, and a 16 bit number which meaning I'm not sure about. * Leaf nodes also has a data payload prefixed by a 32 bit length (be). */ #[derive(Debug)] enum NodeType { Container, Object, } #[derive(Debug)] struct Node<'a> { node_type: NodeType, name: &'a str, num: u16, payload: Option<&'a [u8]>, } fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { let mut v = PAppVersion::default(); let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(&data)?; v.appname = String::from(appname); v.appversion = String::from(appversion); v.appdate = String::from(appdate); v.num2 = num2; if r.len() > 0 { let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?; v.apparch = String::from(apparch); v.num3 = num3; v.appencoding = String::from(appencoding); v.applocale = String::from(applocale); r = r2; } Ok((r, v)) } fn p_arrangement<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> { let mut v = PArrangement::default(); Ok((data, v)) } fn fourcc<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a str> { map(take(4usize), |tag| std::str::from_utf8(tag).unwrap())(input) } /** * Root chunks always have the same layout, two length prefixed strings. * * These seem to be a mapping between a field name, and the data type the field contains. * The actual data follows in the ARCH chunk following this ROOT chunk. */ fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> { let (r, chunk) = riff_chunk(data)?; assert_eq!(chunk.fourcc, "ROOT"); //assert_eq!(r.len(), 0); let (_, t) = tuple((cmstring, cmstring))(chunk.payload)?; Ok((r, t)) } /** * A container node does not contain any data of it's own, but contains * one or more sub elements. These can be either other containers, or * object/leaf nodes. */ fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?; Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} )) } /** * An object node contains serialized structured data. * * It has a size, as well as a payload containing the actual serialized data. */ fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> { let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?; Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } )) } struct RiffChunk<'a> { fourcc: &'a str, payload: &'a [u8], } fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> { let (r, (fourcc, payload)) = tuple((fourcc, length_data(be_u32)))(i)?; Ok((r, RiffChunk { fourcc, payload })) } fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> { let mut proj = CubaseProject::default(); let (mut payload, _) = cpr_file_header(data)?; println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len()); while payload.len() > 0 { // Expect a root chunk first: let (r, (k, t)) = root_chunk(payload)?; println!("[*] Found root: ({}, {})", k, t); match k { "Version" => { let (r2, v) = version_chunk(r)?; println!("[*] {:?}", v); //assert_eq!(r2.len(), 0); proj.app_version = v; payload = r2; }, "Arrangement1" => { let (r2, a) = arrangement_chunk(r)?; println!("[*] {:?}", a); payload = r2; }, _ => { let (r2, _) = riff_chunk(r)?; payload = r2; } } } Ok((payload, proj)) } /** * Parse the Cubase Project File Header. * * A Cubase project file is a RIFF file. * * That is almost. There's a strange extra tag "NUND" between the root (RIFF) header and the * rest of the chunks. This would have been ok, if the NUND tag was followed by a length field, * as it would just be another chunk. This is not the case however, the tag is followed * immefiately by another chunk header. * * To make it even worse, the chunk size of the root chunk is four bytes short, so this makes * the file parser itself a bit more complex than what it needs to be. * * We just swallow the extra "NUND" tag here, so it won't bother us later. */ fn cpr_file_header<'a>(data: &'a [u8]) -> IResult<&'a [u8], ()> { let (r, (_, _, _)) = tuple((tag(b"RIFF"), payload_len, tag(b"NUND")))(data)?; Ok((r, ())) } /** * Get the correct payload length from the root chunk of the file. * * Cubase incorrectly specifies the chunk size four bytes less than what it really is, probably due * to the strange inserted "NUND" tag that don't really belong anywhere. This parser fixes that by * adding four to the size read from the file. */ fn payload_len<'a>(data: &'a [u8]) -> IResult<&'a [u8], u32> { let (r, len) = be_u32(data)?; Ok((r, len + 4)) } fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> { let (r, chunk) = riff_chunk(data)?; assert_eq!(chunk.fourcc, "ARCH"); //assert_eq!(r.len(), 0); let (odata, c) = container_node(chunk.payload)?; println!("[*] {:?}", c); let (r2, o) = object_node(odata)?; println!("[*] {:?}", o); assert_eq!(r2.len(), 0); let (r2, v) = p_app_version(o.payload.unwrap())?; assert_eq!(r2.len(), 0); Ok((r, v)) } fn arrangement_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> { let (r, chunk) = riff_chunk(data)?; assert_eq!(chunk.fourcc, "ARCH"); // It varies a bit how many levels deep the actual PArrangement object is. // // File created with Cubase 4.5.x (file version 310?) seems to have this // structure: // // GDocument -> GModel -> CmObject // // While Cubase version 5.x (file version 400?) seems to have the CmObject // directly under the GDocument container. let (odata, c) = many1(container_node)(chunk.payload)?; println!("[*] {:?}", c); // This is the actual PArrangeent object let (r2, o) = object_node(odata)?; println!("[*] {:?}: {} ({} bytes)", o.node_type, o.name, o.payload.unwrap().len()); assert_eq!(r2.len(), 0); let (r2, v) = p_arrangement(o.payload.unwrap())?; // assert_eq!(r2.len(), 0); Ok((r, PArrangement::default())) } pub fn parse_cubase_project

(filename: P) -> Result> where P: AsRef { println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?); let data = std::fs::read(filename)?; let (_, proj) = cpr_file(&data) .finish() .map_err(|e| format!("{:?}", e))?; println!("[*] Done!"); Ok(proj) } fn main() -> Result<(), Box> { let filename = std::env::args() .skip(1) .next() .expect("You must give me a file to analyze"); let proj = parse_cubase_project(filename)?; println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion); println!("Architecture: {}", proj.app_version.apparch); println!("Encoding: {}", proj.app_version.appencoding); println!("Locale: {}", proj.app_version.applocale); Ok(()) }