/* cbconvert -- A program to parse and convert Cubase projects
* Copyright (C) 2020 Harald Eilertsen
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
mod cubase_project;
use cubase_project::*;
use nom::{
branch::alt,
bytes::complete::*,
combinator::{map, rest, value},
error::ParseError,
multi::{length_data, length_value, many1},
number::complete::*,
sequence::{preceded, terminated, tuple},
Finish,
IResult,
Parser,
};
use std::{
error::Error,
path::Path,
};
/// Parser for length prefixed strings.
///
/// Cubase uses a string format where the length of the string is given as a 32 bit big endian word
/// before the actual string data. The string may also be zero terminated, with the zero term char
/// included in the length. In that case, we trim off the ending zero bytes.
///
/// We assume strings to be UTF-8 encoded.
fn cmstring(input: &[u8]) -> IResult<&[u8], &str> {
let strdata = length_value(be_u32, take_till(|c| c == b'\0'));
map(strdata, |s| std::str::from_utf8(s).unwrap())(input)
}
fn cmtag<'a>(t: &'a str) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], (NodeType, u16)> {
map(
tuple((
node_type,
length_value(
be_u32, tuple((tag(t), tag(b"\0")))
),
be_u16)
),
|(nt, (_, _), v)| (nt, v))
}
/**
* The data chunks in the file is split into what seems like a structure of containers and leaf
* nodes. Where the containers don't contain any data of themselves, but leaf nodes may contain
* data and also be a container for further sub nodes.
*
* Each node regardless of type has a name, and a 16 bit number which meaning I'm not sure about.
* Leaf nodes also has a data payload prefixed by a 32 bit length (be).
*/
#[derive(Clone, Debug)]
enum NodeType {
Container,
Object,
}
fn node_type<'a>(input: &'a [u8]) -> IResult<&'a [u8], NodeType> {
alt((
value(NodeType::Container, tag(b"\xff\xff\xff\xfe")),
value(NodeType::Object, tag(b"\xff\xff\xff\xff"))
))(input)
}
#[derive(Debug)]
struct Node<'a> {
node_type: NodeType,
name: &'a str,
num: u16,
payload: Option<&'a [u8]>,
}
fn p_app_version<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
let mut v = PAppVersion::default();
let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(input)?;
v.appname = String::from(appname);
v.appversion = String::from(appversion);
v.appdate = String::from(appdate);
v.num2 = num2;
if r.len() > 0 {
let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?;
v.apparch = String::from(apparch);
v.num3 = num3;
v.appencoding = String::from(appencoding);
v.applocale = String::from(applocale);
r = r2;
}
Ok((r, v))
}
fn p_arrangement<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
let mut v = PArrangement::default();
Ok((input, v))
}
/**
* Root chunks always have the same layout, two length prefixed strings.
*
* These seem to be a mapping between a field name, and the data type the field contains.
* The actual data follows in the ARCH chunk following this ROOT chunk.
*/
fn root_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> {
preceded(
tag(b"ROOT"),
length_value(be_u32, tuple((cmstring, cmstring))))(input)
}
fn arch_chunk<'a, O, E, F>(subparser: F) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>
where
E: ParseError<&'a [u8]>,
F: Parser<&'a [u8], O, E>,
{
preceded(
tag(b"ARCH"),
length_value(be_u32, subparser))
}
/**
* A container node does not contain any data of it's own, but contains
* one or more sub elements. These can be either other containers, or
* object/leaf nodes.
*/
fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?;
Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} ))
}
/**
* An object node contains serialized structured data.
*
* It has a size, as well as a payload containing the actual serialized data.
*/
fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?;
Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } ))
}
fn cpr_file<'a>(input: &'a [u8]) -> IResult<&'a [u8], CubaseProject> {
let mut proj = CubaseProject::default();
let (mut payload, len) = cpr_file_header(input)?;
assert_eq!(payload.len(), len as usize);
println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len());
while payload.len() > 0 {
// Expect a root chunk first:
let (r, (k, t)) = root_chunk(payload)?;
println!("[*] Found root: ({}, {})", k, t);
match k {
"Version" => {
let (r2, v) = arch_chunk(version_chunk)(r)?;
println!("[*] {:?}", v);
proj.app_version = v;
payload = r2;
},
"Arrangement1" => {
let (r2, a) = arch_chunk(arrangement_chunk)(r)?;
println!("[*] {:?}", a);
payload = r2;
},
_ => {
let (r2, _) = arch_chunk(rest)(r)?;
payload = r2;
}
}
}
Ok((payload, proj))
}
/**
* Parse the Cubase Project File Header.
*
* A Cubase project file is a RIFF file.
*
* That is almost. There's a strange extra tag "NUND" between the root (RIFF) header and the
* rest of the chunks. This would have been ok, if the NUND tag was followed by a length field,
* as it would just be another chunk. This is not the case however, the tag is followed
* immefiately by another chunk header.
*
* To make it even worse, the chunk size of the root chunk is four bytes short, so this makes
* the file parser itself a bit more complex than what it needs to be.
*
* We just swallow the extra "NUND" tag here, so it won't bother us later.
*/
fn cpr_file_header<'a>(input: &'a [u8]) -> IResult<&'a [u8], u32> {
terminated(
preceded(
tag(b"RIFF"), be_u32),
tag(b"NUND"))(input)
}
fn version_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
preceded(
tuple((cmtag("CmObject"), cmtag("PAppVersion"))),
length_value(be_u32, p_app_version))(input)
}
fn arrangement_chunk<'a>(input: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
// It varies a bit how many levels deep the actual PArrangement object is.
//
// File created with Cubase 4.5.x (file version 310?) seems to have this
// structure:
//
// GDocument -> GModel -> CmObject
//
// While Cubase version 5.x (file version 400?) seems to have the CmObject
// directly under the GDocument container.
let (odata, c) = many1(container_node)(input)?;
println!("[*] {:?}", c);
// This is the actual PArrangeent object
let (r2, o) = object_node(odata)?;
println!("[*] {:?}: {} ({} bytes)", o.node_type, o.name, o.payload.unwrap().len());
assert_eq!(r2.len(), 0);
let (r2, v) = p_arrangement(o.payload.unwrap())?;
// assert_eq!(r2.len(), 0);
Ok((r2, PArrangement::default()))
}
pub fn parse_cubase_project<P>(filename: P) -> Result<CubaseProject, Box<dyn Error>>
where
P: AsRef<Path>
{
println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?);
let data = std::fs::read(filename)?;
let (_, proj) = cpr_file(&data)
.finish()
.map_err(|e| format!("{:?}", e))?;
println!("[*] Done!");
Ok(proj)
}
fn main() -> Result<(), Box<dyn Error>> {
let filename = std::env::args()
.skip(1)
.next()
.expect("You must give me a file to analyze");
let proj = parse_cubase_project(filename)?;
println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion);
println!("Architecture: {}", proj.app_version.apparch);
println!("Encoding: {}", proj.app_version.appencoding);
println!("Locale: {}", proj.app_version.applocale);
Ok(())
}