From 0e9512472c41056c6877929aee4e177980e530a3 Mon Sep 17 00:00:00 2001 From: KAMADA Ken'ichi Date: Wed, 26 Oct 2016 22:46:20 +0900 Subject: [PATCH] Parse TIFF IFD structure. --- src/endian.rs | 102 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 ++++ src/tag.rs | 40 ++++++++++++++++++ src/tiff.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/value.rs | 76 +++++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+) create mode 100644 src/endian.rs create mode 100644 src/tag.rs create mode 100644 src/tiff.rs create mode 100644 src/value.rs diff --git a/src/endian.rs b/src/endian.rs new file mode 100644 index 0000000..0bfb5ad --- /dev/null +++ b/src/endian.rs @@ -0,0 +1,102 @@ +// +// Copyright (c) 2016 KAMADA Ken'ichi. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// + +// This is a module to select endianess by using generics +// in order to avoid run-time dispatching penalty at the cost of +// increased object size. + +pub trait Endian { + fn loadu16(buf: &[u8], from: usize) -> u16; + fn loadu32(buf: &[u8], from: usize) -> u32; +} + +pub struct BigEndian; +pub struct LittleEndian; + +macro_rules! generate_load { + ($name:ident, $int_type:ident, $from_func:ident) => ( + fn $name(buf: &[u8], offset: usize) -> $int_type { + let ptr = (buf.as_ptr() as usize + offset) as *const $int_type; + let num = unsafe { ::std::mem::transmute(*ptr) }; + $int_type::$from_func(num) + } + ) +} + +impl Endian for BigEndian { + generate_load!(loadu16, u16, from_be); + generate_load!(loadu32, u32, from_be); +} + +impl Endian for LittleEndian { + generate_load!(loadu16, u16, from_le); + generate_load!(loadu32, u32, from_le); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn loadu16() { + assert_eq!(BigEndian::loadu16(&[0x01, 0x02], 0), 0x0102); + assert_eq!(BigEndian::loadu16(&[0x01, 0x02, 0x03], 1), 0x0203); + assert_eq!(LittleEndian::loadu16(&[0x01, 0x02], 0), 0x0201); + assert_eq!(LittleEndian::loadu16(&[0x01, 0x02, 0x03], 1), 0x0302); + } + + #[test] + fn loadu32() { + assert_eq!(BigEndian::loadu32(&[0x01, 0x02, 0x03, 0x04], 0), + 0x01020304); + assert_eq!(BigEndian::loadu32(&[0x01, 0x02, 0x03, 0x04, 0x05], 1), + 0x02030405); + assert_eq!(LittleEndian::loadu32(&[0x01, 0x02, 0x03, 0x04], 0), + 0x04030201); + assert_eq!(LittleEndian::loadu32(&[0x01, 0x02, 0x03, 0x04, 0x05], 1), + 0x05040302); + } + + #[test] + fn dispatch() { + fn dispatch_sub(data: &[u8]) -> u16 where E: Endian { + E::loadu16(data, 0) + } + assert_eq!(dispatch_sub::(&[0x01, 0x02]), 0x0102); + assert_eq!(dispatch_sub::(&[0x01, 0x02]), 0x0201); + } + + #[test] + fn static_dispatch() { + fn dispatch_sub(data: &[u8]) -> u16 where E: Endian { + E::loadu16(data, 0) + } + assert_eq!(dispatch_sub:: as *const (), + dispatch_sub:: as *const ()); + assert!(dispatch_sub:: as *const () != + dispatch_sub:: as *const ()); + } +} diff --git a/src/lib.rs b/src/lib.rs index c665c6c..e0e2b33 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,11 +28,19 @@ pub use error::Error; pub use jpeg::get_exif_attr as get_exif_attr_from_jpeg; +pub use tag::Tag; +pub use tiff::Field; +pub use tiff::parse_exif; +pub use value::Value; #[cfg(test)] #[macro_use] mod tmacro; +mod endian; mod error; mod jpeg; +mod tag; +mod tiff; mod util; +mod value; diff --git a/src/tag.rs b/src/tag.rs new file mode 100644 index 0000000..432d5d2 --- /dev/null +++ b/src/tag.rs @@ -0,0 +1,40 @@ +// +// Copyright (c) 2016 KAMADA Ken'ichi. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// + +// This is not an enum to keep safety and API stability, while +// supporting unknown tag values. This comment is based on the +// behavior of Rust 1.12. +// Storing unknown values in a repr(u16) enum is unsafe. The compiler +// assumes that there is no undefined discriminant even with a C-like +// enum, so the exhaustiveness check of a match expression will break. +// Storing unknown values in a special variant such as Unknown(u16) +// tends to break backward compatibility. When Tag::VariantFoo is +// defined in a new version of the library, the old codes using +// Tag::Unknown(Foo's value) will break. + +/// A tag of a TIFF field. +#[derive(Debug)] +pub struct Tag(pub u16); diff --git a/src/tiff.rs b/src/tiff.rs new file mode 100644 index 0000000..377c5c2 --- /dev/null +++ b/src/tiff.rs @@ -0,0 +1,114 @@ +// +// Copyright (c) 2016 KAMADA Ken'ichi. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// + +use endian::{Endian, BigEndian, LittleEndian}; +use error::Error; +use tag::Tag; +use value::Value; +use value::get_type_info; + +// Tiff header magic numbers [EXIF23 4.5.2]. +const TIFF_BE: u16 = 0x4d4d; +const TIFF_LE: u16 = 0x4949; +const TIFF_FORTY_TWO: u16 = 0x002a; + +/// A TIFF field. +#[derive(Debug)] +pub struct Field<'a> { + pub tag: Tag, + pub value: Value<'a>, +} + +/// Parse the Exif attributes in the TIFF format. +pub fn parse_exif(data: &[u8]) -> Result, Error> { + // Check the byte order and call the real parser. + if data.len() < 8 { + return Err(Error::InvalidFormat("Truncated TIFF header")); + } + match BigEndian::loadu16(data, 0) { + TIFF_BE => parse_exif_sub::(data), + TIFF_LE => parse_exif_sub::(data), + _ => Err(Error::InvalidFormat("Invalid TIFF byte order")), + } +} + +fn parse_exif_sub(data: &[u8]) + -> Result, Error> where E: Endian { + // Parse the rest of the header (42 and the IFD offset). + if E::loadu16(data, 2) != TIFF_FORTY_TWO { + return Err(Error::InvalidFormat("Invalid forty two")); + } + let ifd_offset = E::loadu32(data, 4) as usize; + parse_ifd::(data, ifd_offset) +} + +// Parse IFD [EXIF23 4.6.2]. +fn parse_ifd(data: &[u8], offset: usize) + -> Result, Error> where E: Endian { + // Count (the number of the entries). + if data.len() < offset || data.len() - offset < 2 { + return Err(Error::InvalidFormat("Truncated IFD")); + } + let count = E::loadu16(data, offset) as usize; + + // Array of entries. (count * 12) never overflow. + if data.len() - offset - 2 < count * 12 { + return Err(Error::InvalidFormat("Truncated IFD")); + } + let mut fields = Vec::with_capacity(count); + for i in 0..count as usize { + let tag = E::loadu16(data, offset + 2 + i * 12); + let typ = E::loadu16(data, offset + 2 + i * 12 + 2); + let cnt = E::loadu32(data, offset + 2 + i * 12 + 4); + let ofs = E::loadu32(data, offset + 2 + i * 12 + 8) as usize; + let (unitlen, parser) = get_type_info::(typ); + let vallen = try!(unitlen.checked_mul(cnt).ok_or( + Error::InvalidFormat("Invalid entry count"))) as usize; + let val; + if unitlen == 0 { + val = Value::Unknown(typ, cnt, ofs as u32); + } else if vallen <= 4 { + val = parser(data, offset + 2 + i * 12 + 8, cnt as usize); + } else { + if data.len() < ofs || data.len() - ofs < vallen { + return Err(Error::InvalidFormat("Truncated IFD")); + } + val = parser(data, ofs, cnt as usize); + } + fields.push(Field { tag: Tag(tag), value: val }); + } + + // Offset to the next IFD. + if data.len() - offset - 2 - count * 12 < 4 { + return Err(Error::InvalidFormat("Truncated IFD")); + } + let next_ifd_offset = E::loadu32(data, offset + 2 + count * 12); + if next_ifd_offset != 0 { + unimplemented!(); + } + + Ok(fields) +} diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..88d2543 --- /dev/null +++ b/src/value.rs @@ -0,0 +1,76 @@ +// +// Copyright (c) 2016 KAMADA Ken'ichi. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// + +use endian::Endian; + +/// Types and values of TIFF fields (for Exif attributes). +#[derive(Debug)] +pub enum Value<'a> { + /// Slice of 8-bit bytes containing 7-bit ASCII characters. + /// The trailing null character is not included. Note that + /// the absence of the 8th bits is not guaranteed. + Ascii(&'a [u8]), + /// Vector of 16-bit unsigned integers. + Short(Vec), + /// The type is unknown to this implementation. + /// The associated values are the type and the count, and the + /// offset of the "Value Offset" element. + Unknown(u16, u32, u32), +} + +type Parser<'a> = fn(&'a [u8], usize, usize) -> Value<'a>; + +// Return the length of a single value and the parser of the type. +pub fn get_type_info<'a, E>(typecode: u16) + -> (u32, Parser<'a>) where E: Endian { + match typecode { + 2 => (1, parse_ascii), + 3 => (2, parse_short::), + _ => (0, parse_unknown), + } +} + +fn parse_ascii<'a>(data: &'a [u8], offset: usize, count: usize) + -> Value<'a> { + let null = if count > 0 && data[offset + count - 1] == 0 { 1 } else { 0 }; + Value::Ascii(&data[offset .. offset + count - null]) +} + +fn parse_short<'a, E>(data: &'a [u8], offset: usize, count: usize) + -> Value<'a> where E: Endian { + let mut val = Vec::new(); + for i in 0..count { + val.push(E::loadu16(data, offset + i * 2)); + } + Value::Short(val) +} + +// This is a dummy function and will never be called. +#[allow(unused_variables)] +fn parse_unknown<'a>(data: &'a [u8], offset: usize, count: usize) + -> Value<'a> { + unreachable!() +}