Parse TIFF IFD structure.

This commit is contained in:
KAMADA Ken'ichi 2016-10-26 22:46:20 +09:00
parent 1eb79a1614
commit 0e9512472c
5 changed files with 340 additions and 0 deletions

102
src/endian.rs Normal file
View File

@ -0,0 +1,102 @@
//
// Copyright (c) 2016 KAMADA Ken'ichi.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
// This is a module to select endianess by using generics
// in order to avoid run-time dispatching penalty at the cost of
// increased object size.
pub trait Endian {
fn loadu16(buf: &[u8], from: usize) -> u16;
fn loadu32(buf: &[u8], from: usize) -> u32;
}
pub struct BigEndian;
pub struct LittleEndian;
macro_rules! generate_load {
($name:ident, $int_type:ident, $from_func:ident) => (
fn $name(buf: &[u8], offset: usize) -> $int_type {
let ptr = (buf.as_ptr() as usize + offset) as *const $int_type;
let num = unsafe { ::std::mem::transmute(*ptr) };
$int_type::$from_func(num)
}
)
}
impl Endian for BigEndian {
generate_load!(loadu16, u16, from_be);
generate_load!(loadu32, u32, from_be);
}
impl Endian for LittleEndian {
generate_load!(loadu16, u16, from_le);
generate_load!(loadu32, u32, from_le);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn loadu16() {
assert_eq!(BigEndian::loadu16(&[0x01, 0x02], 0), 0x0102);
assert_eq!(BigEndian::loadu16(&[0x01, 0x02, 0x03], 1), 0x0203);
assert_eq!(LittleEndian::loadu16(&[0x01, 0x02], 0), 0x0201);
assert_eq!(LittleEndian::loadu16(&[0x01, 0x02, 0x03], 1), 0x0302);
}
#[test]
fn loadu32() {
assert_eq!(BigEndian::loadu32(&[0x01, 0x02, 0x03, 0x04], 0),
0x01020304);
assert_eq!(BigEndian::loadu32(&[0x01, 0x02, 0x03, 0x04, 0x05], 1),
0x02030405);
assert_eq!(LittleEndian::loadu32(&[0x01, 0x02, 0x03, 0x04], 0),
0x04030201);
assert_eq!(LittleEndian::loadu32(&[0x01, 0x02, 0x03, 0x04, 0x05], 1),
0x05040302);
}
#[test]
fn dispatch() {
fn dispatch_sub<E>(data: &[u8]) -> u16 where E: Endian {
E::loadu16(data, 0)
}
assert_eq!(dispatch_sub::<BigEndian>(&[0x01, 0x02]), 0x0102);
assert_eq!(dispatch_sub::<LittleEndian>(&[0x01, 0x02]), 0x0201);
}
#[test]
fn static_dispatch() {
fn dispatch_sub<E>(data: &[u8]) -> u16 where E: Endian {
E::loadu16(data, 0)
}
assert_eq!(dispatch_sub::<BigEndian> as *const (),
dispatch_sub::<BigEndian> as *const ());
assert!(dispatch_sub::<BigEndian> as *const () !=
dispatch_sub::<LittleEndian> as *const ());
}
}

View File

@ -28,11 +28,19 @@
pub use error::Error;
pub use jpeg::get_exif_attr as get_exif_attr_from_jpeg;
pub use tag::Tag;
pub use tiff::Field;
pub use tiff::parse_exif;
pub use value::Value;
#[cfg(test)]
#[macro_use]
mod tmacro;
mod endian;
mod error;
mod jpeg;
mod tag;
mod tiff;
mod util;
mod value;

40
src/tag.rs Normal file
View File

@ -0,0 +1,40 @@
//
// Copyright (c) 2016 KAMADA Ken'ichi.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
// This is not an enum to keep safety and API stability, while
// supporting unknown tag values. This comment is based on the
// behavior of Rust 1.12.
// Storing unknown values in a repr(u16) enum is unsafe. The compiler
// assumes that there is no undefined discriminant even with a C-like
// enum, so the exhaustiveness check of a match expression will break.
// Storing unknown values in a special variant such as Unknown(u16)
// tends to break backward compatibility. When Tag::VariantFoo is
// defined in a new version of the library, the old codes using
// Tag::Unknown(Foo's value) will break.
/// A tag of a TIFF field.
#[derive(Debug)]
pub struct Tag(pub u16);

114
src/tiff.rs Normal file
View File

@ -0,0 +1,114 @@
//
// Copyright (c) 2016 KAMADA Ken'ichi.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
use endian::{Endian, BigEndian, LittleEndian};
use error::Error;
use tag::Tag;
use value::Value;
use value::get_type_info;
// Tiff header magic numbers [EXIF23 4.5.2].
const TIFF_BE: u16 = 0x4d4d;
const TIFF_LE: u16 = 0x4949;
const TIFF_FORTY_TWO: u16 = 0x002a;
/// A TIFF field.
#[derive(Debug)]
pub struct Field<'a> {
pub tag: Tag,
pub value: Value<'a>,
}
/// Parse the Exif attributes in the TIFF format.
pub fn parse_exif(data: &[u8]) -> Result<Vec<Field>, Error> {
// Check the byte order and call the real parser.
if data.len() < 8 {
return Err(Error::InvalidFormat("Truncated TIFF header"));
}
match BigEndian::loadu16(data, 0) {
TIFF_BE => parse_exif_sub::<BigEndian>(data),
TIFF_LE => parse_exif_sub::<LittleEndian>(data),
_ => Err(Error::InvalidFormat("Invalid TIFF byte order")),
}
}
fn parse_exif_sub<E>(data: &[u8])
-> Result<Vec<Field>, Error> where E: Endian {
// Parse the rest of the header (42 and the IFD offset).
if E::loadu16(data, 2) != TIFF_FORTY_TWO {
return Err(Error::InvalidFormat("Invalid forty two"));
}
let ifd_offset = E::loadu32(data, 4) as usize;
parse_ifd::<E>(data, ifd_offset)
}
// Parse IFD [EXIF23 4.6.2].
fn parse_ifd<E>(data: &[u8], offset: usize)
-> Result<Vec<Field>, Error> where E: Endian {
// Count (the number of the entries).
if data.len() < offset || data.len() - offset < 2 {
return Err(Error::InvalidFormat("Truncated IFD"));
}
let count = E::loadu16(data, offset) as usize;
// Array of entries. (count * 12) never overflow.
if data.len() - offset - 2 < count * 12 {
return Err(Error::InvalidFormat("Truncated IFD"));
}
let mut fields = Vec::with_capacity(count);
for i in 0..count as usize {
let tag = E::loadu16(data, offset + 2 + i * 12);
let typ = E::loadu16(data, offset + 2 + i * 12 + 2);
let cnt = E::loadu32(data, offset + 2 + i * 12 + 4);
let ofs = E::loadu32(data, offset + 2 + i * 12 + 8) as usize;
let (unitlen, parser) = get_type_info::<E>(typ);
let vallen = try!(unitlen.checked_mul(cnt).ok_or(
Error::InvalidFormat("Invalid entry count"))) as usize;
let val;
if unitlen == 0 {
val = Value::Unknown(typ, cnt, ofs as u32);
} else if vallen <= 4 {
val = parser(data, offset + 2 + i * 12 + 8, cnt as usize);
} else {
if data.len() < ofs || data.len() - ofs < vallen {
return Err(Error::InvalidFormat("Truncated IFD"));
}
val = parser(data, ofs, cnt as usize);
}
fields.push(Field { tag: Tag(tag), value: val });
}
// Offset to the next IFD.
if data.len() - offset - 2 - count * 12 < 4 {
return Err(Error::InvalidFormat("Truncated IFD"));
}
let next_ifd_offset = E::loadu32(data, offset + 2 + count * 12);
if next_ifd_offset != 0 {
unimplemented!();
}
Ok(fields)
}

76
src/value.rs Normal file
View File

@ -0,0 +1,76 @@
//
// Copyright (c) 2016 KAMADA Ken'ichi.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
use endian::Endian;
/// Types and values of TIFF fields (for Exif attributes).
#[derive(Debug)]
pub enum Value<'a> {
/// Slice of 8-bit bytes containing 7-bit ASCII characters.
/// The trailing null character is not included. Note that
/// the absence of the 8th bits is not guaranteed.
Ascii(&'a [u8]),
/// Vector of 16-bit unsigned integers.
Short(Vec<u16>),
/// The type is unknown to this implementation.
/// The associated values are the type and the count, and the
/// offset of the "Value Offset" element.
Unknown(u16, u32, u32),
}
type Parser<'a> = fn(&'a [u8], usize, usize) -> Value<'a>;
// Return the length of a single value and the parser of the type.
pub fn get_type_info<'a, E>(typecode: u16)
-> (u32, Parser<'a>) where E: Endian {
match typecode {
2 => (1, parse_ascii),
3 => (2, parse_short::<E>),
_ => (0, parse_unknown),
}
}
fn parse_ascii<'a>(data: &'a [u8], offset: usize, count: usize)
-> Value<'a> {
let null = if count > 0 && data[offset + count - 1] == 0 { 1 } else { 0 };
Value::Ascii(&data[offset .. offset + count - null])
}
fn parse_short<'a, E>(data: &'a [u8], offset: usize, count: usize)
-> Value<'a> where E: Endian {
let mut val = Vec::new();
for i in 0..count {
val.push(E::loadu16(data, offset + i * 2));
}
Value::Short(val)
}
// This is a dummy function and will never be called.
#[allow(unused_variables)]
fn parse_unknown<'a>(data: &'a [u8], offset: usize, count: usize)
-> Value<'a> {
unreachable!()
}