/* cbconvert -- A program to parse and convert Cubase projects
* Copyright (C) 2020 Harald Eilertsen
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
mod cubase_project;
use cubase_project::*;
use nom::{
bytes::complete::*,
combinator::map,
multi::{length_data, length_value, many1},
number::complete::*,
sequence::tuple,
Finish,
IResult,
};
use std::{
error::Error,
path::Path,
};
/// Parser for length prefixed strings.
///
/// Cubase uses a string format where the length of the string is given as a 32 bit big endian word
/// before the actual string data. The string may also be zero terminated, with the zero term char
/// included in the length. In that case, we trim off the ending zero bytes.
///
/// We assume strings to be UTF-8 encoded.
fn cmstring(data: &[u8]) -> IResult<&[u8], &str> {
let strdata = length_value(be_u32, take_till(|c| c == b'\0'));
map(strdata, |s| std::str::from_utf8(s).unwrap())(data)
}
/**
* The data chunks in the file is split into what seems like a structure of containers and leaf
* nodes. Where the containers don't contain any data of themselves, but leaf nodes may contain
* data and also be a container for further sub nodes.
*
* Each node regardless of type has a name, and a 16 bit number which meaning I'm not sure about.
* Leaf nodes also has a data payload prefixed by a 32 bit length (be).
*/
#[derive(Debug)]
enum NodeType {
Container,
Object,
}
#[derive(Debug)]
struct Node<'a> {
node_type: NodeType,
name: &'a str,
num: u16,
payload: Option<&'a [u8]>,
}
fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
let mut v = PAppVersion::default();
let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(&data)?;
v.appname = String::from(appname);
v.appversion = String::from(appversion);
v.appdate = String::from(appdate);
v.num2 = num2;
if r.len() > 0 {
let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?;
v.apparch = String::from(apparch);
v.num3 = num3;
v.appencoding = String::from(appencoding);
v.applocale = String::from(applocale);
r = r2;
}
Ok((r, v))
}
fn p_arrangement<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
let mut v = PArrangement::default();
Ok((data, v))
}
fn fourcc<'a>(data: &'a [u8]) -> IResult<&'a [u8], &'a str> {
let (rest, tag) = take(4usize)(data)?;
Ok((rest, std::str::from_utf8(tag).unwrap()))
}
/**
* Root chunks always have the same layout, two length prefixed strings.
*
* These seem to be a mapping between a field name, and the data type the field contains.
* The actual data follows in the ARCH chunk following this ROOT chunk.
*/
fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> {
let (r, chunk) = riff_chunk(data)?;
assert_eq!(chunk.fourcc, "ROOT");
//assert_eq!(r.len(), 0);
let (_, t) = tuple((cmstring, cmstring))(chunk.payload)?;
Ok((r, t))
}
/**
* A container node does not contain any data of it's own, but contains
* one or more sub elements. These can be either other containers, or
* object/leaf nodes.
*/
fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?;
Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} ))
}
/**
* An object node contains serialized structured data.
*
* It has a size, as well as a payload containing the actual serialized data.
*/
fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?;
Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } ))
}
struct RiffChunk<'a> {
fourcc: &'a str,
payload: &'a [u8],
}
fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> {
let (r, (fourcc, payload)) = tuple((fourcc, length_data(be_u32)))(i)?;
Ok((r, RiffChunk { fourcc, payload }))
}
fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> {
let mut proj = CubaseProject::default();
let (mut payload, _) = cpr_file_header(data)?;
println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len());
while payload.len() > 0 {
// Expect a root chunk first:
let (r, (k, t)) = root_chunk(payload)?;
println!("[*] Found root: ({}, {})", k, t);
match k {
"Version" => {
let (r2, v) = version_chunk(r)?;
println!("[*] {:?}", v);
//assert_eq!(r2.len(), 0);
proj.app_version = v;
payload = r2;
},
"Arrangement1" => {
let (r2, a) = arrangement_chunk(r)?;
println!("[*] {:?}", a);
payload = r2;
},
_ => {
let (r2, _) = riff_chunk(r)?;
payload = r2;
}
}
}
Ok((payload, proj))
}
/**
* Parse the Cubase Project File Header.
*
* A Cubase project file is a RIFF file.
*
* That is almost. There's a strange extra tag "NUND" between the root (RIFF) header and the
* rest of the chunks. This would have been ok, if the NUND tag was followed by a length field,
* as it would just be another chunk. This is not the case however, the tag is followed
* immefiately by another chunk header.
*
* To make it even worse, the chunk size of the root chunk is four bytes short, so this makes
* the file parser itself a bit more complex than what it needs to be.
*
* We just swallow the extra "NUND" tag here, so it won't bother us later.
*/
fn cpr_file_header<'a>(data: &'a [u8]) -> IResult<&'a [u8], ()> {
let (r, (_, _, _)) = tuple((tag(b"RIFF"), payload_len, tag(b"NUND")))(data)?;
Ok((r, ()))
}
/**
* Get the correct payload length from the root chunk of the file.
*
* Cubase incorrectly specifies the chunk size four bytes less than what it really is, probably due
* to the strange inserted "NUND" tag that don't really belong anywhere. This parser fixes that by
* adding four to the size read from the file.
*/
fn payload_len<'a>(data: &'a [u8]) -> IResult<&'a [u8], u32> {
let (r, len) = be_u32(data)?;
Ok((r, len + 4))
}
fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
let (r, chunk) = riff_chunk(data)?;
assert_eq!(chunk.fourcc, "ARCH");
//assert_eq!(r.len(), 0);
let (odata, c) = container_node(chunk.payload)?;
println!("[*] {:?}", c);
let (r2, o) = object_node(odata)?;
println!("[*] {:?}", o);
assert_eq!(r2.len(), 0);
let (r2, v) = p_app_version(o.payload.unwrap())?;
assert_eq!(r2.len(), 0);
Ok((r, v))
}
fn arrangement_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
let (r, chunk) = riff_chunk(data)?;
assert_eq!(chunk.fourcc, "ARCH");
// It varies a bit how many levels deep the actual PArrangement object is.
//
// File created with Cubase 4.5.x (file version 310?) seems to have this
// structure:
//
// GDocument -> GModel -> CmObject
//
// While Cubase version 5.x (file version 400?) seems to have the CmObject
// directly under the GDocument container.
let (odata, c) = many1(container_node)(chunk.payload)?;
println!("[*] {:?}", c);
// This is the actual PArrangeent object
let (r2, o) = object_node(odata)?;
println!("[*] {:?}: {} ({} bytes)", o.node_type, o.name, o.payload.unwrap().len());
assert_eq!(r2.len(), 0);
let (r2, v) = p_arrangement(o.payload.unwrap())?;
// assert_eq!(r2.len(), 0);
Ok((r, PArrangement::default()))
}
pub fn parse_cubase_project
(filename: P) -> Result>
where
P: AsRef
{
println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?);
let data = std::fs::read(filename)?;
let (_, proj) = cpr_file(&data)
.finish()
.map_err(|e| format!("{:?}", e))?;
println!("[*] Done!");
Ok(proj)
}
fn main() -> Result<(), Box> {
let filename = std::env::args()
.skip(1)
.next()
.expect("You must give me a file to analyze");
let proj = parse_cubase_project(filename)?;
println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion);
println!("Architecture: {}", proj.app_version.apparch);
println!("Encoding: {}", proj.app_version.appencoding);
println!("Locale: {}", proj.app_version.applocale);
Ok(())
}