Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/bbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use nom::{
number, AsChar, IResult, Needed,
};

pub mod cr3_moov;
mod idat;
mod iinf;
mod iloc;
Expand All @@ -15,6 +16,8 @@ mod keys;
mod meta;
mod mvhd;
mod tkhd;
mod uuid;
pub use cr3_moov::Cr3MoovBox;
pub use ilst::IlstBox;
pub use keys::KeysBox;
pub use meta::MetaBox;
Expand Down
145 changes: 145 additions & 0 deletions src/bbox/cr3_moov.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
use std::ops::Range;

use nom::{combinator::fail, IResult};

use super::{
uuid::{CanonUuidBox, CANON_UUID, UUID_SIZE},
BoxHolder,
};

const MIN_CR3_INPUT_SIZE: usize = 8;

const MIN_FTYP_BODY_SIZE: usize = 4;

/// Represents the parsed moov box structure for Canon CR3 files.
///
/// Canon CR3 files are based on the ISO Base Media File Format (similar to MP4/MOV)
/// but contain Canon-specific metadata in a UUID box within the moov container.
/// This struct provides access to the Canon UUID box containing EXIF metadata.
///
/// # CR3 File Structure
/// CR3 File
/// +-- ftyp (file type box)
/// +-- moov (movie box)
/// | +-- uuid (Canon UUID box)
/// | +-- CMT1 (main EXIF data)
/// | +-- CMT2 (ExifIFD data)
/// | +-- CMT3 (MakerNotes data)
/// +-- mdat (media data)
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Cr3MoovBox {
/// Canon's UUID box containing CMT metadata, if present
uuid_canon_box: Option<CanonUuidBox>,
}

impl Cr3MoovBox {
pub fn parse(input: &[u8]) -> IResult<&[u8], Option<Cr3MoovBox>> {
// Validate minimum input size
if input.len() < MIN_CR3_INPUT_SIZE {
tracing::warn!(
"Input too small for CR3 parsing: {} bytes, expected at least {}",
input.len(),
MIN_CR3_INPUT_SIZE
);
return fail(input);
}

let remain = input;
let (remain, bbox) = BoxHolder::parse(remain)?;

// Verify this is a valid file format by checking for ftyp box
if bbox.box_type() != "ftyp" {
tracing::warn!("Expected ftyp box, found: {}", bbox.box_type());
return fail(input);
}

// Validate ftyp box has minimum required size
if bbox.body_data().len() < MIN_FTYP_BODY_SIZE {
tracing::warn!(
"ftyp box too small: {} bytes, expected at least {}",
bbox.body_data().len(),
MIN_FTYP_BODY_SIZE
);
return fail(input);
}

// Find the moov box containing the metadata
let (remain, Some(moov_bbox)) = super::find_box(remain, "moov")? else {
tracing::debug!("moov box not found in CR3 file");
return Ok((remain, None));
};

tracing::debug!(
box_type = moov_bbox.box_type(),
size = moov_bbox.header.box_size,
"Found moov box in CR3 file"
);

// Parse the moov box contents to find Canon UUID box
let (_, moov_box) = Self::parse_moov_content(moov_bbox.body_data(), input)?;
tracing::debug!(?moov_box, "Successfully parsed CR3 moov box");

Ok((remain, Some(moov_box)))
}

fn parse_moov_content<'a>(
moov_data: &'a [u8],
full_input: &'a [u8],
) -> IResult<&'a [u8], Cr3MoovBox> {
let mut remain = moov_data;
let mut uuid_canon_box = None;

// Iterate through all boxes within the moov box to find Canon's UUID box
while !remain.is_empty() {
let (new_remain, bbox) = match BoxHolder::parse(remain) {
Ok(result) => result,
Err(e) => {
tracing::warn!(
"Failed to parse box in moov content, continuing with partial data: {:?}",
e
);
break; // Stop parsing but return what we found so far
}
};

if bbox.box_type() == "uuid" {
let body_data = bbox.body_data();

// Validate UUID box has minimum required size
if body_data.len() < UUID_SIZE {
tracing::debug!("UUID box too small: {} bytes", body_data.len());
remain = new_remain;
continue;
}

let uuid_bytes = &body_data[0..UUID_SIZE];

if uuid_bytes == CANON_UUID {
tracing::debug!(
"Found Canon UUID box with {} bytes of data",
body_data.len()
);
let (_, canon_box) = CanonUuidBox::parse(body_data, full_input)?;
uuid_canon_box = Some(canon_box);
break;
} else {
tracing::debug!("Found non-Canon UUID box");
}
}

remain = new_remain;
}

Ok((remain, Cr3MoovBox { uuid_canon_box }))
}

#[allow(dead_code)] // API method for tests
pub fn uuid_canon_box(&self) -> Option<&CanonUuidBox> {
self.uuid_canon_box.as_ref()
}

pub fn exif_data_offset(&self) -> Option<Range<usize>> {
// For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
self.uuid_canon_box.as_ref()?.exif_data_offset().cloned()
}
}
1 change: 1 addition & 0 deletions src/bbox/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ impl ParseBody<MetaBox> for MetaBox {
}

impl MetaBox {
#[allow(dead_code)]
#[tracing::instrument(skip_all)]
pub fn exif_data<'a>(&self, input: &'a [u8]) -> IResult<&'a [u8], Option<&'a [u8]>> {
self.iinf
Expand Down
212 changes: 212 additions & 0 deletions src/bbox/uuid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
use std::ops::Range;

use nom::IResult;

use super::BoxHolder;
use crate::exif::TiffHeader;

/// Size of a UUID in bytes
pub const UUID_SIZE: usize = 16;

/// Canon CMT box types
const CMT_BOX_TYPES: &[&str] = &["CMT1", "CMT2", "CMT3"];

/// Canon's UUID for CR3 files: 85c0b687-820f-11e0-8111-f4ce462b6a48
pub const CANON_UUID: [u8; 16] = [
0x85, 0xc0, 0xb6, 0x87, 0x82, 0x0f, 0x11, 0xe0, 0x81, 0x11, 0xf4, 0xce, 0x46, 0x2b, 0x6a, 0x48,
];

/// Represents Canon's UUID box containing CMT (Canon Metadata) boxes.
///
/// Canon CR3 files store EXIF metadata in a proprietary UUID box format.
/// The UUID box contains three CMT (Canon Metadata) sub-boxes:
/// - CMT1: Main EXIF IFD0 data (camera settings, basic metadata)
/// - CMT2: ExifIFD data (detailed EXIF information)
/// - CMT3: MakerNotes data (Canon-specific metadata)
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CanonUuidBox {
/// CMT1 contains the main EXIF IFD0 data (primary metadata)
cmt1_offset: Option<Range<usize>>,
/// CMT2 contains the ExifIFD data (detailed EXIF information)
cmt2_offset: Option<Range<usize>>,
/// CMT3 contains the MakerNotes data (Canon-specific metadata)
cmt3_offset: Option<Range<usize>>,
}

impl CanonUuidBox {
/// Returns the offset range for the primary EXIF data (CMT1).
pub fn exif_data_offset(&self) -> Option<&Range<usize>> {
// For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
self.cmt1_offset.as_ref()
}

/// Returns the offset range for the ExifIFD data (CMT2).
#[allow(dead_code)] // API method for future use
pub fn cmt2_data_offset(&self) -> Option<&Range<usize>> {
self.cmt2_offset.as_ref()
}

/// Returns the offset range for the MakerNotes data (CMT3).
#[allow(dead_code)] // API method for future use
pub fn cmt3_data_offset(&self) -> Option<&Range<usize>> {
self.cmt3_offset.as_ref()
}

/// Parses Canon's UUID box to extract CMT (Canon Metadata) box offsets.
pub fn parse<'a>(uuid_data: &'a [u8], full_input: &'a [u8]) -> IResult<&'a [u8], CanonUuidBox> {
// Validate input sizes
if uuid_data.len() < UUID_SIZE {
tracing::error!(
"Canon UUID box data too small: {} bytes, expected at least {}",
uuid_data.len(),
UUID_SIZE
);
return nom::combinator::fail(uuid_data);
}

if full_input.is_empty() {
tracing::error!("Full input is empty for Canon UUID box parsing");
return nom::combinator::fail(uuid_data);
}

// Skip the UUID header
let mut remain = &uuid_data[UUID_SIZE..];
let mut cmt1_offset = None;
let mut cmt2_offset = None;
let mut cmt3_offset = None;

tracing::debug!(
"Parsing Canon UUID box with {} bytes of CMT data",
remain.len()
);

// Parse CMT boxes within the Canon UUID box
while !remain.is_empty() {
let (new_remain, bbox) = match BoxHolder::parse(remain) {
Ok(result) => result,
Err(e) => {
tracing::warn!(
"Failed to parse CMT box, continuing with partial data: {:?}",
e
);
break; // Stop parsing but return what we found so far
}
};

let box_type = bbox.box_type();
if CMT_BOX_TYPES.contains(&box_type) {
// Calculate offset safely using slice bounds checking
let data_start = bbox.data.as_ptr() as usize;
let input_start = full_input.as_ptr() as usize;

// Ensure the data pointer is within the input bounds
if data_start < input_start || data_start >= input_start + full_input.len() {
tracing::warn!("CMT box data pointer outside input bounds");
remain = new_remain;
continue;
}

let start_offset = data_start - input_start;
let body_start = start_offset + bbox.header_size();
let body_end = start_offset + bbox.data.len();

// Validate offset ranges are within bounds
if body_end > full_input.len() {
tracing::warn!(
"CMT box body extends beyond input bounds: {}..{} > {}",
body_start,
body_end,
full_input.len()
);
remain = new_remain;
continue;
}

let offset_range = body_start..body_end;

// Validate CMT box data has minimum size and reasonable content
let cmt_data = &full_input[offset_range.clone()];
if !Self::validate_cmt_data(box_type, cmt_data) {
tracing::warn!("CMT box {} failed validation, skipping", box_type);
remain = new_remain;
continue;
}

match box_type {
"CMT1" => {
cmt1_offset = Some(offset_range);
tracing::debug!("Found CMT1 (IFD0) at offset {}..{}", body_start, body_end);
}
"CMT2" => {
cmt2_offset = Some(offset_range);
tracing::debug!(
"Found CMT2 (ExifIFD) at offset {}..{}",
body_start,
body_end
);
}
"CMT3" => {
cmt3_offset = Some(offset_range);
tracing::debug!(
"Found CMT3 (MakerNotes) at offset {}..{}",
body_start,
body_end
);
}
_ => unreachable!("box_type should be one of CMT1, CMT2, or CMT3"),
}
} else {
// Skip unknown boxes within Canon UUID
tracing::debug!("Skipping unknown box type: {}", box_type);
}

remain = new_remain;
}

Ok((
remain,
CanonUuidBox {
cmt1_offset,
cmt2_offset,
cmt3_offset,
},
))
}

/// Validates CMT box data for basic integrity.
fn validate_cmt_data(box_type: &str, data: &[u8]) -> bool {
// Minimum size check - CMT boxes should have at least 8 bytes
if data.len() < 8 {
tracing::warn!("CMT box {} too small: {} bytes", box_type, data.len());
return false;
}

match box_type {
"CMT1" => {
// CMT1 should start with TIFF header - validate using TiffHeader::parse
if TiffHeader::parse(data).is_ok() {
tracing::debug!("CMT1 has valid TIFF header");
true
} else {
tracing::warn!("CMT1 does not have valid TIFF header");
false
}
}
"CMT2" | "CMT3" => {
// CMT2 and CMT3 should also be TIFF format, but we're more lenient
// since they might have different internal structures
if data.len() >= 8 {
tracing::debug!("CMT box {} has sufficient size", box_type);
true
} else {
tracing::warn!("CMT box {} too small for valid data", box_type);
false
}
}
_ => {
tracing::warn!("Unknown CMT box type: {}", box_type);
false
}
}
}
}
Loading