aboutsummaryrefslogblamecommitdiffstats
path: root/src/main.rs
blob: 94251068371d5af1da791777f2513838581c4f82 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
















                                                                        



                      

                       

                                              





                        





                                       


                                                                                                   

                                          
                                                  

                                                                  

 








                                                                                                 


               

 

                

                        
                  

                              

 

                                                                        
 
                                                                                                              
 



                                            
 

                                                                                                               
 



                                                  
 
               
     

              

 





                                                                         

                                                                     

 
   
                                                                        




                                                                                         




                                                             

 
 




                                                                       
                                                                  
                                                                                         
                                                                              






                                                                             

                                                                                                                       
                                                                                     

 










                                                                          



















                                                                                




                                                    









                                             
   
                                        






                                                                                               
  

                                                                                             

                                                                          
   


                                                                                 

 
 










                                                                                                   
 


















                                                                        



























                                                                                       






                                                                                      
                                   


                                          
                          

            

 
                                         




                                                      





                                                                                               


          
/* cbconvert -- A program to parse and convert Cubase projects
 * Copyright (C) 2020  Harald Eilertsen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

mod cubase_project;

use cubase_project::*;

use nom::{
    bytes::complete::*,
    combinator::map,
    multi::{length_data, length_value, many1},
    number::complete::*,
    sequence::tuple,
    Finish,
    IResult,
};
use std::{
    error::Error,
    path::Path,
};

/// Parser for length prefixed strings.
///
/// Cubase uses a string format where the length of the string is given as a 32 bit big endian word
/// before the actual string data. The string may also be zero terminated, with the zero term char
/// included in the length. In that case, we trim off the ending zero bytes.
///
/// We assume strings to be UTF-8 encoded.
fn cmstring(data: &[u8]) -> IResult<&[u8], &str> {
    let strdata = length_value(be_u32, take_till(|c| c == b'\0'));
    map(strdata, |s| std::str::from_utf8(s).unwrap())(data)
}

/**
 * The data chunks in the file is split into what seems like a structure of containers and leaf
 * nodes. Where the containers don't contain any data of themselves, but leaf nodes may contain
 * data and also be a container for further sub nodes.
 *
 * Each node regardless of type has a name, and a 16 bit number which meaning I'm not sure about.
 * Leaf nodes also has a data payload prefixed by a 32 bit length (be).
 */
#[derive(Debug)]
enum NodeType {
    Container,
    Object,
}


#[derive(Debug)]
struct Node<'a> {
    node_type: NodeType,
    name: &'a str,
    num: u16,
    payload: Option<&'a [u8]>,
}

fn p_app_version<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
    let mut v = PAppVersion::default();

    let (mut r, (appname, appversion, appdate, num2)) = tuple((cmstring, cmstring, cmstring, be_u32))(&data)?;

    v.appname = String::from(appname);
    v.appversion = String::from(appversion);
    v.appdate = String::from(appdate);
    v.num2 = num2;

    if r.len() > 0 {
        let (r2, (apparch, num3, appencoding, applocale)) = tuple((cmstring, be_u32, cmstring, cmstring))(&r)?;

        v.apparch = String::from(apparch);
        v.num3 = num3;
        v.appencoding = String::from(appencoding);
        v.applocale = String::from(applocale);

        r = r2;
    }

    Ok((r, v))
}

fn p_arrangement<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
    let mut v = PArrangement::default();

    Ok((data, v))
}

fn fourcc<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a str> {
    map(take(4usize), |tag| std::str::from_utf8(tag).unwrap())(input)
}

/**
 * Root chunks always have the same layout, two length prefixed strings.
 *
 * These seem to be a mapping between a field name, and the data type the field contains.
 * The actual data follows in the ARCH chunk following this ROOT chunk.
 */
fn root_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], (&str, &str)> {
    let (r, chunk) = riff_chunk(data)?;
    assert_eq!(chunk.fourcc, "ROOT");
    //assert_eq!(r.len(), 0);
    let (_, t) = tuple((cmstring, cmstring))(chunk.payload)?;
    Ok((r, t))
}


/**
 * A container node does not contain any data of it's own, but contains
 * one or more sub elements. These can be either other containers, or
 * object/leaf nodes.
 */
fn container_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
    let (r, (_, name, num)) = tuple((tag(b"\xff\xff\xff\xfe"), cmstring, be_u16))(data)?;
    Ok((r, Node { node_type: NodeType::Container, name, num, payload: None} ))
}

/**
 * An object node contains serialized structured data.
 *
 * It has a size, as well as a payload containing the actual serialized data.
 */
fn object_node<'a>(data: &'a [u8]) -> IResult<&'a [u8], Node> {
    let (r, (_, name, num, payload)) = tuple((tag(b"\xff\xff\xff\xff"), cmstring, be_u16, length_data(be_u32)))(data)?;
    Ok((r, Node { node_type: NodeType::Object, name, num, payload: Some(payload) } ))
}

struct RiffChunk<'a> {
    fourcc: &'a str,
    payload: &'a [u8],
}

fn riff_chunk<'a>(i: &'a [u8]) -> IResult<&'a [u8], RiffChunk> {
    let (r, (fourcc, payload)) = tuple((fourcc, length_data(be_u32)))(i)?;
    Ok((r, RiffChunk { fourcc, payload }))
}


fn cpr_file<'a>(data: &'a [u8]) -> IResult<&'a [u8], CubaseProject> {
    let mut proj = CubaseProject::default();

    let (mut payload, _) = cpr_file_header(data)?;

    println!("[*] Reading Cubase CPR file, {} bytes of data...", payload.len());

    while payload.len() > 0 {
        // Expect a root chunk first:
        let (r, (k, t)) = root_chunk(payload)?;
        println!("[*] Found root: ({}, {})", k, t);

        match k {
            "Version" => {
                let (r2, v) = version_chunk(r)?;
                println!("[*] {:?}", v);
                //assert_eq!(r2.len(), 0);
                proj.app_version = v;
                payload = r2;
            },
            "Arrangement1" => {
                let (r2, a) = arrangement_chunk(r)?;
                println!("[*] {:?}", a);
                payload = r2;
            },
            _ => {
                let (r2, _) = riff_chunk(r)?;
                payload = r2;
            }
        }
    }

    Ok((payload, proj))
}

/**
 * Parse the Cubase Project File Header.
 *
 * A Cubase project file is a RIFF file.
 *
 * That is almost. There's a strange extra tag "NUND" between the root (RIFF) header and the
 * rest of the chunks. This would have been ok, if the NUND tag was followed by a length field,
 * as it would just be another chunk. This is not the case however, the tag is followed
 * immefiately by another chunk header.
 *
 * To make it even worse, the chunk size of the root chunk is four bytes short, so this makes
 * the file parser itself a bit more complex than what it needs to be.
 *
 * We just swallow the extra "NUND" tag here, so it won't bother us later.
 */
fn cpr_file_header<'a>(data: &'a [u8]) -> IResult<&'a [u8], ()> {
    let (r, (_, _, _)) = tuple((tag(b"RIFF"), payload_len, tag(b"NUND")))(data)?;
    Ok((r, ()))
}


/**
 * Get the correct payload length from the root chunk of the file.
 *
 * Cubase incorrectly specifies the chunk size four bytes less than what it really is, probably due
 * to the strange inserted "NUND" tag that don't really belong anywhere.  This parser fixes that by
 * adding four to the size read from the file.
 */
fn payload_len<'a>(data: &'a [u8]) -> IResult<&'a [u8], u32> {
    let (r, len) = be_u32(data)?;
    Ok((r, len + 4))
}


fn version_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PAppVersion> {
    let (r, chunk) = riff_chunk(data)?;
    assert_eq!(chunk.fourcc, "ARCH");
    //assert_eq!(r.len(), 0);

    let (odata, c) = container_node(chunk.payload)?;
    println!("[*] {:?}", c);

    let (r2, o) = object_node(odata)?;
    println!("[*] {:?}", o);
    assert_eq!(r2.len(), 0);

    let (r2, v) = p_app_version(o.payload.unwrap())?;
    assert_eq!(r2.len(), 0);

    Ok((r, v))
}


fn arrangement_chunk<'a>(data: &'a [u8]) -> IResult<&'a [u8], PArrangement> {
    let (r, chunk) = riff_chunk(data)?;
    assert_eq!(chunk.fourcc, "ARCH");

    // It varies a bit how many levels deep the actual PArrangement object is.
    //
    // File created with Cubase 4.5.x (file version 310?) seems to have this
    // structure:
    //
    //     GDocument -> GModel -> CmObject
    //
    // While Cubase version 5.x (file version 400?) seems to have the CmObject
    // directly under the GDocument container.
    let (odata, c) = many1(container_node)(chunk.payload)?;
    println!("[*] {:?}", c);

    // This is the actual PArrangeent object
    let (r2, o) = object_node(odata)?;
    println!("[*] {:?}: {} ({} bytes)", o.node_type, o.name, o.payload.unwrap().len());
    assert_eq!(r2.len(), 0);

    let (r2, v) = p_arrangement(o.payload.unwrap())?;
    // assert_eq!(r2.len(), 0);

    Ok((r, PArrangement::default()))
}

pub fn parse_cubase_project<P>(filename: P) -> Result<CubaseProject, Box<dyn Error>>
where
    P: AsRef<Path>
{
    println!("Reading {}...", filename.as_ref().to_str().ok_or("Invalid file name")?);

    let data = std::fs::read(filename)?;
    let (_, proj) = cpr_file(&data)
        .finish()
        .map_err(|e| format!("{:?}", e))?;

    println!("[*] Done!");

    Ok(proj)
}

fn main() -> Result<(), Box<dyn Error>> {
    let filename = std::env::args()
        .skip(1)
        .next()
        .expect("You must give me a file to analyze");

    let proj = parse_cubase_project(filename)?;

    println!("File generated by {} {}", proj.app_version.appname, proj.app_version.appversion);
    println!("Architecture: {}", proj.app_version.apparch);
    println!("Encoding: {}", proj.app_version.appencoding);
    println!("Locale: {}", proj.app_version.applocale);

    Ok(())
}