// // Copyright (c) 2020 KAMADA Ken'ichi. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. // use std::convert::{TryFrom as _, TryInto as _}; use std::io::{BufRead, ErrorKind, Seek, SeekFrom}; use crate::endian::{Endian, BigEndian}; use crate::error::Error; use crate::util::{read64, BufReadExt as _, ReadExt as _}; // Checking "mif1" in the compatible brands should be enough, because // the "heic", "heix", "heim", and "heis" files shall include "mif1" // among the compatible brands [ISO23008-12 B.4.1] [ISO23008-12 B.4.3]. // Same for "msf1" [ISO23008-12 B.4.2] [ISO23008-12 B.4.4]. static HEIF_BRANDS: &[[u8; 4]] = &[*b"mif1", *b"msf1"]; const MAX_EXIF_SIZE: usize = 65535; // Most errors in this file are Error::InvalidFormat. impl From<&'static str> for Error { fn from(err: &'static str) -> Error { Error::InvalidFormat(err) } } pub fn get_exif_attr(reader: &mut R) -> Result, Error> where R: BufRead + Seek { let mut parser = Parser::new(reader); match parser.parse() { Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof => Err("Broken HEIF file".into()), Err(e) => Err(e), Ok(mut buf) => { if buf.len() < 4 { return Err("ExifDataBlock too small".into()); } let offset = BigEndian::loadu32(&buf, 0) as usize; if buf.len() - 4 < offset { return Err("Invalid Exif header offset".into()); } buf.drain(.. 4 + offset); Ok(buf) }, } } #[derive(Debug)] struct Parser { reader: R, // Whether the file type box has been checked. ftyp_checked: bool, // The item where Exif data is stored. item_id: Option, // The location of the item_id. item_location: Option, } #[derive(Debug)] struct Location { construction_method: u8, // index, offset, length extents: Vec<(u64, u64, u64)>, base_offset: u64, } impl Parser where R: BufRead + Seek { fn new(reader: R) -> Self { Self { reader: reader, ftyp_checked: false, item_id: None, item_location: None, } } fn parse(&mut self) -> Result, Error> { while let Some((size, boxtype)) = self.read_box_header()? { match &boxtype { b"ftyp" => { let buf = self.read_file_level_box(size)?; self.parse_ftyp(BoxSplitter::new(&buf))?; self.ftyp_checked = true; }, b"meta" => { if !self.ftyp_checked { return Err("MetaBox found before FileTypeBox".into()); } let buf = self.read_file_level_box(size)?; let exif = self.parse_meta(BoxSplitter::new(&buf))?; return Ok(exif); }, _ => self.skip_file_level_box(size)?, } } Err(Error::NotFound("HEIF")) } // Reads size, type, and largesize, // and returns body size and type. // If no byte can be read due to EOF, None is returned. fn read_box_header(&mut self) -> Result, Error> { if self.reader.is_eof()? { return Ok(None); } let mut buf = [0; 8]; self.reader.read_exact(&mut buf)?; let size = match BigEndian::loadu32(&buf, 0) { 0 => Some(std::u64::MAX), 1 => read64(&mut self.reader)?.checked_sub(16), x => u64::from(x).checked_sub(8), }.ok_or("Invalid box size")?; let boxtype = buf[4..8].try_into().expect("never fails"); Ok(Some((size, boxtype))) } fn read_file_level_box(&mut self, size: u64) -> Result, Error> { let mut buf; match size { std::u64::MAX => { buf = Vec::new(); self.reader.read_to_end(&mut buf)?; }, _ => { let size = size.try_into() .or(Err("Box is larger than the address space"))?; buf = Vec::new(); self.reader.read_exact_len(&mut buf, size)?; }, } Ok(buf) } fn skip_file_level_box(&mut self, size: u64) -> Result<(), Error> { match size { std::u64::MAX => self.reader.seek(SeekFrom::End(0))?, _ => self.reader.seek(SeekFrom::Current( size.try_into().or(Err("Large seek not supported"))?))?, }; Ok(()) } fn parse_ftyp(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> { let head = boxp.slice(8)?; let _major_brand = &head[0..4]; let _minor_version = BigEndian::loadu32(&head, 4); while let Ok(compat_brand) = boxp.array4() { if HEIF_BRANDS.contains(&compat_brand) { return Ok(()); } } Err("No compatible brand recognized in ISO base media file".into()) } fn parse_meta(&mut self, mut boxp: BoxSplitter) -> Result, Error> { let (version, _flags) = boxp.fullbox_header()?; if version != 0 { return Err("Unsupported MetaBox".into()); } let mut idat = None; let mut iloc = None; while !boxp.is_empty() { let (boxtype, mut body) = boxp.child_box()?; match boxtype { b"idat" => idat = Some(body.slice(body.len())?), b"iinf" => self.parse_iinf(body)?, b"iloc" => iloc = Some(body), _ => {}, } } self.item_id.ok_or(Error::NotFound("HEIF"))?; self.parse_iloc(iloc.ok_or("No ItemLocationBox")?)?; let location = self.item_location.as_ref() .ok_or("No matching item in ItemLocationBox")?; let mut buf = Vec::new(); match location.construction_method { 0 => { for &(_, off, len) in &location.extents { let off = location.base_offset.checked_add(off) .ok_or("Invalid offset")?; // Seeking beyond the EOF is allowed and // implementation-defined, but the subsequent read // should fail. self.reader.seek(SeekFrom::Start(off))?; match len { 0 => { self.reader.read_to_end(&mut buf)?; }, _ => { let len = len.try_into() .or(Err("Extent too large"))?; self.reader.read_exact_len(&mut buf, len)?; }, } if buf.len() > MAX_EXIF_SIZE { return Err("Exif data too large".into()); } } }, 1 => { let idat = idat.ok_or("No ItemDataBox")?; for &(_, off, len) in &location.extents { let off = location.base_offset.checked_add(off) .ok_or("Invalid offset")?; let end = off.checked_add(len).ok_or("Invalid length")?; let off = off.try_into().or(Err("Offset too large"))?; let end = end.try_into().or(Err("Length too large"))?; buf.extend_from_slice(match len { 0 => idat.get(off..), _ => idat.get(off..end), }.ok_or("Out of ItemDataBox")?); if buf.len() > MAX_EXIF_SIZE { return Err("Exif data too large".into()); } } }, 2 => return Err(Error::NotSupported( "Construction by item offset is not supported")), _ => return Err("Invalid construction_method".into()), } Ok(buf) } fn parse_iloc(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> { let (version, _flags) = boxp.fullbox_header()?; let tmp = boxp.uint16().map(usize::from)?; let (offset_size, length_size, base_offset_size) = (tmp >> 12, tmp >> 8 & 0xf, tmp >> 4 & 0xf); let index_size = match version { 1 | 2 => tmp & 0xf, _ => 0 }; let item_count = match version { 0 | 1 => boxp.uint16()?.into(), 2 => boxp.uint32()?, _ => return Err("Unsupported ItemLocationBox".into()), }; for _ in 0..item_count { let item_id = match version { 0 | 1 => boxp.uint16()?.into(), 2 => boxp.uint32()?, _ => unreachable!(), }; let construction_method = match version { 0 => 0, 1 | 2 => boxp.slice(2).map(|x| x[1] & 0xf)?, _ => unreachable!(), }; let data_ref_index = boxp.uint16()?; if construction_method == 0 && data_ref_index != 0 { return Err(Error::NotSupported( "External data reference is not supported")); } let base_offset = boxp.size048(base_offset_size)? .ok_or("Invalid base_offset_size")?; let extent_count = boxp.uint16()?.into(); if self.item_id == Some(item_id) { let mut extents = Vec::with_capacity(extent_count); for _ in 0..extent_count { let index = boxp.size048(index_size)? .ok_or("Invalid index_size")?; let offset = boxp.size048(offset_size)? .ok_or("Invalid offset_size")?; let length = boxp.size048(length_size)? .ok_or("Invalid length_size")?; extents.push((index, offset, length)); } self.item_location = Some(Location { construction_method, extents, base_offset }); } else { // (15 + 15 + 15) * u16::MAX never overflows. boxp.slice((index_size + offset_size + length_size) * extent_count)?; } } Ok(()) } fn parse_iinf(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> { let (version, _flags) = boxp.fullbox_header()?; let entry_count = match version { 0 => boxp.uint16()?.into(), _ => boxp.uint32()?, }; for _ in 0..entry_count { let (boxtype, body) = boxp.child_box()?; match boxtype { b"infe" => self.parse_infe(body)?, _ => {}, } } Ok(()) } fn parse_infe(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> { let (version, _flags) = boxp.fullbox_header()?; let item_id = match version { 2 => boxp.uint16()?.into(), 3 => boxp.uint32()?, _ => return Err("Unsupported ItemInfoEntry".into()), }; let _item_protection_index = boxp.slice(2)?; let item_type = boxp.slice(4)?; if item_type == b"Exif" { self.item_id = Some(item_id); } Ok(()) } } pub fn is_heif(buf: &[u8]) -> bool { let mut boxp = BoxSplitter::new(buf); while let Ok((boxtype, mut body)) = boxp.child_box() { if boxtype == b"ftyp" { let _major_brand_minor_version = if body.slice(8).is_err() { return false; }; while let Ok(compat_brand) = body.array4() { if HEIF_BRANDS.contains(&compat_brand) { return true; } } return false; } } false } struct BoxSplitter<'a> { inner: &'a [u8], } impl<'a> BoxSplitter<'a> { fn new(slice: &'a [u8]) -> BoxSplitter<'a> { Self { inner: slice } } fn is_empty(&self) -> bool { self.inner.is_empty() } fn len(&self) -> usize { self.inner.len() } // Returns type and body. fn child_box(&mut self) -> Result<(&'a [u8], BoxSplitter<'a>), Error> { let size = self.uint32()? as usize; let boxtype = self.slice(4)?; let body_len = match size { 0 => Some(self.len()), 1 => usize::try_from(self.uint64()?) .or(Err("Box is larger than the address space"))? .checked_sub(16), _ => size.checked_sub(8), }.ok_or("Invalid box size")?; let body = self.slice(body_len)?; Ok((boxtype, BoxSplitter::new(body))) } // Returns 0-, 4-, or 8-byte unsigned integer. fn size048(&mut self, size: usize) -> Result, Error> { match size { 0 => Ok(Some(0)), 4 => self.uint32().map(u64::from).map(Some), 8 => self.uint64().map(Some), _ => Ok(None), } } // Returns version and flags. fn fullbox_header(&mut self) -> Result<(u32, u32), Error> { let tmp = self.uint32()?; Ok((tmp >> 24, tmp & 0xffffff)) } fn uint16(&mut self) -> Result { self.slice(2).map(|num| BigEndian::loadu16(num, 0)) } fn uint32(&mut self) -> Result { self.slice(4).map(|num| BigEndian::loadu32(num, 0)) } fn uint64(&mut self) -> Result { self.slice(8).map(|num| BigEndian::loadu64(num, 0)) } fn array4(&mut self) -> Result<[u8; 4], Error> { self.slice(4).map(|x| x.try_into().expect("never fails")) } fn slice(&mut self, at: usize) -> Result<&'a [u8], Error> { let slice = self.inner.get(..at).ok_or("Box too small")?; self.inner = &self.inner[at..]; Ok(slice) } } #[cfg(test)] mod tests { use std::io::Cursor; use super::*; #[test] fn extract() { let file = std::fs::File::open("tests/exif.heic").unwrap(); let buf = get_exif_attr( &mut std::io::BufReader::new(&file)).unwrap(); assert_eq!(buf.len(), 79); assert!(buf.starts_with(b"MM\x00\x2a")); assert!(buf.ends_with(b"xif\0")); } #[test] fn unknown_before_ftyp() { let data = b"\0\0\0\x09XXXXx\ \0\0\0\x14ftypmif1\0\0\0\0mif1\ \0\0\0\x57meta\0\0\0\0\ \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\ \0\0\0\x22iinf\0\0\0\0\0\x01\ \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\ \0\0\0\x11idat\0\0\0\x01xabcd"; assert!(is_heif(data)); let exif = get_exif_attr(&mut Cursor::new(&data[..])).unwrap(); assert_eq!(exif, b"abcd"); } #[test] fn bad_exif_data_block() { let data = b"\0\0\0\x14ftypmif1\0\0\0\0mif1\ \0\0\0\x52meta\0\0\0\0\ \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\ \0\0\0\x22iinf\0\0\0\0\0\x01\ \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\ \0\0\0\x0cidat\0\0\0\x01"; assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])), Error::InvalidFormat("Invalid Exif header offset")); let data = b"\0\0\0\x14ftypmif1\0\0\0\0mif1\ \0\0\0\x51meta\0\0\0\0\ \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\ \0\0\0\x22iinf\0\0\0\0\0\x01\ \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\ \0\0\0\x0bidat\0\0\0"; assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])), Error::InvalidFormat("ExifDataBlock too small")); } #[test] fn parser_box_header() { // size let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abcd")); assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd"))); let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abc")); assert_err_pat!(p.read_box_header(), Error::Io(_)); let mut p = Parser::new(Cursor::new(b"\0\0\0\x07abcd")); assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_)); // max size let mut p = Parser::new(Cursor::new(b"\xff\xff\xff\xffabcd")); assert_eq!(p.read_box_header().unwrap(), Some((0xffffffff - 8, *b"abcd"))); // to the end of the file let mut p = Parser::new(Cursor::new(b"\0\0\0\0abcd")); assert_eq!(p.read_box_header().unwrap(), Some((std::u64::MAX, *b"abcd"))); // largesize let mut p = Parser::new(Cursor::new( b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x10")); assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd"))); let mut p = Parser::new(Cursor::new( b"\0\0\0\x01abcd\0\0\0\0\0\0\0")); assert_err_pat!(p.read_box_header(), Error::Io(_)); let mut p = Parser::new(Cursor::new( b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x0f")); assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_)); // max largesize let mut p = Parser::new(Cursor::new( b"\0\0\0\x01abcd\xff\xff\xff\xff\xff\xff\xff\xff")); assert_eq!(p.read_box_header().unwrap(), Some((std::u64::MAX.wrapping_sub(16), *b"abcd"))); } #[test] fn is_heif_test() { // HEIF (with any coding format) assert!(is_heif(b"\0\0\0\x14ftypmif1\0\0\0\0mif1")); // HEIC assert!(is_heif(b"\0\0\0\x18ftypheic\0\0\0\0heicmif1")); // HEIC image sequence assert!(is_heif(b"\0\0\0\x18ftyphevc\0\0\0\0msf1hevc")); // unknown major brand but compatible with HEIF assert!(is_heif(b"\0\0\0\x18ftypXXXX\0\0\0\0XXXXmif1")); // incomplete brand (OK to ignore?) assert!(is_heif(b"\0\0\0\x15ftypmif1\0\0\0\0mif1h")); assert!(is_heif(b"\0\0\0\x16ftypmif1\0\0\0\0mif1he")); assert!(is_heif(b"\0\0\0\x17ftypmif1\0\0\0\0mif1hei")); // ISO base media file but not a HEIF assert!(!is_heif(b"\0\0\0\x14ftypmp41\0\0\0\0mp41")); // missing compatible brands (what should we do?) assert!(!is_heif(b"\0\0\0\x10ftypmif1\0\0\0\0")); // truncated box let mut data: &[u8] = b"\0\0\0\x14ftypmif1\0\0\0\0mif1"; while let Some((_, rest)) = data.split_last() { data = rest; assert!(!is_heif(data)); } // short box size assert!(!is_heif(b"\0\0\0\x13ftypmif1\0\0\0\0mif1")); } #[test] fn box_splitter() { let buf = b"0123456789abcdef"; let mut boxp = BoxSplitter::new(buf); assert_err_pat!(boxp.slice(17), Error::InvalidFormat(_)); assert_eq!(boxp.slice(16).unwrap(), buf); assert_err_pat!(boxp.slice(std::usize::MAX), Error::InvalidFormat(_)); let mut boxp = BoxSplitter::new(buf); assert_eq!(boxp.slice(1).unwrap(), b"0"); assert_eq!(boxp.uint16().unwrap(), 0x3132); assert_eq!(boxp.uint32().unwrap(), 0x33343536); assert_eq!(boxp.uint64().unwrap(), 0x3738396162636465); } }