Simplify length-specified reads.

This commit is contained in:
KAMADA Ken'ichi 2021-02-23 22:20:01 +09:00
parent 180ac576b7
commit a2596027c8
5 changed files with 83 additions and 65 deletions

View File

@ -24,12 +24,11 @@
// SUCH DAMAGE. // SUCH DAMAGE.
// //
use std::io; use std::io::{BufRead, ErrorKind, Seek, SeekFrom};
use std::io::{Read, SeekFrom};
use crate::endian::{Endian, BigEndian}; use crate::endian::{Endian, BigEndian};
use crate::error::Error; use crate::error::Error;
use crate::util::read64; use crate::util::{read64, BufReadExt as _, ReadExt as _};
// Checking "mif1" in the compatible brands should be enough, because // Checking "mif1" in the compatible brands should be enough, because
// the "heic", "heix", "heim", and "heis" files shall include "mif1" // the "heic", "heix", "heim", and "heis" files shall include "mif1"
@ -56,10 +55,10 @@ trait AnnotatableTryInto {
impl<T> AnnotatableTryInto for T where T: From<u8> {} impl<T> AnnotatableTryInto for T where T: From<u8> {}
pub fn get_exif_attr<R>(reader: &mut R) -> Result<Vec<u8>, Error> pub fn get_exif_attr<R>(reader: &mut R) -> Result<Vec<u8>, Error>
where R: io::BufRead + io::Seek { where R: BufRead + Seek {
let mut parser = Parser::new(reader); let mut parser = Parser::new(reader);
match parser.parse() { match parser.parse() {
Err(Error::Io(ref e)) if e.kind() == io::ErrorKind::UnexpectedEof => Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
Err("Broken HEIF file".into()), Err("Broken HEIF file".into()),
Err(e) => Err(e), Err(e) => Err(e),
Ok(mut buf) => { Ok(mut buf) => {
@ -95,7 +94,7 @@ struct Location {
base_offset: u64, base_offset: u64,
} }
impl<R> Parser<R> where R: io::BufRead + io::Seek { impl<R> Parser<R> where R: BufRead + Seek {
fn new(reader: R) -> Self { fn new(reader: R) -> Self {
Self { Self {
reader: reader, reader: reader,
@ -131,13 +130,11 @@ impl<R> Parser<R> where R: io::BufRead + io::Seek {
// and returns body size and type. // and returns body size and type.
// If no byte can be read due to EOF, None is returned. // If no byte can be read due to EOF, None is returned.
fn read_box_header(&mut self) -> Result<Option<(u64, [u8; 4])>, Error> { fn read_box_header(&mut self) -> Result<Option<(u64, [u8; 4])>, Error> {
let mut buf = Vec::new(); if self.reader.is_eof()? {
match self.reader.by_ref().take(8).read_to_end(&mut buf)? { return Ok(None);
0 => return Ok(None),
1..=7 => return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"truncated box").into()),
_ => {},
} }
let mut buf = [0; 8];
self.reader.read_exact(&mut buf)?;
let size = match BigEndian::loadu32(&buf, 0) { let size = match BigEndian::loadu32(&buf, 0) {
0 => Some(std::u64::MAX), 0 => Some(std::u64::MAX),
1 => read64(&mut self.reader)?.checked_sub(16), 1 => read64(&mut self.reader)?.checked_sub(16),
@ -149,15 +146,17 @@ impl<R> Parser<R> where R: io::BufRead + io::Seek {
} }
fn read_file_level_box(&mut self, size: u64) -> Result<Vec<u8>, Error> { fn read_file_level_box(&mut self, size: u64) -> Result<Vec<u8>, Error> {
let mut buf = Vec::new(); let mut buf;
match size { match size {
std::u64::MAX => { self.reader.read_to_end(&mut buf)?; }, std::u64::MAX => {
buf = Vec::new();
self.reader.read_to_end(&mut buf)?;
},
_ => { _ => {
self.reader.by_ref().take(size).read_to_end(&mut buf)?; let size = size.try_into()
if buf.len() as u64 != size { .or(Err("Box is larger than the address space"))?;
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, buf = Vec::new();
"truncated box").into()); self.reader.read_exact_len(&mut buf, size)?;
}
}, },
} }
Ok(buf) Ok(buf)
@ -215,14 +214,13 @@ impl<R> Parser<R> where R: io::BufRead + io::Seek {
// implementation-defined, but the subsequent read // implementation-defined, but the subsequent read
// should fail. // should fail.
self.reader.seek(SeekFrom::Start(off))?; self.reader.seek(SeekFrom::Start(off))?;
let read = match len { match len {
0 => self.reader.read_to_end(&mut buf), 0 => { self.reader.read_to_end(&mut buf)?; },
_ => self.reader.by_ref() _ => {
.take(len).read_to_end(&mut buf), let len = len.try_into()
}?; .or(Err("Extent too large"))?;
if len != 0 && read as u64 != len { self.reader.read_exact_len(&mut buf, len)?;
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, },
"truncated extent").into());
} }
if buf.len() > MAX_EXIF_SIZE { if buf.len() > MAX_EXIF_SIZE {
return Err("Exif data too large".into()); return Err("Exif data too large".into());

View File

@ -24,8 +24,7 @@
// SUCH DAMAGE. // SUCH DAMAGE.
// //
use std::io; use std::io::{BufRead, ErrorKind};
use std::io::Read;
use crate::error::Error; use crate::error::Error;
use crate::util::{read8, read16}; use crate::util::{read8, read16};
@ -52,16 +51,16 @@ const EXIF_ID: [u8; 6] = [0x45, 0x78, 0x69, 0x66, 0x00, 0x00];
/// Get the Exif attribute information segment from a JPEG file. /// Get the Exif attribute information segment from a JPEG file.
pub fn get_exif_attr<R>(reader: &mut R) pub fn get_exif_attr<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
match get_exif_attr_sub(reader) { match get_exif_attr_sub(reader) {
Err(Error::Io(ref e)) if e.kind() == io::ErrorKind::UnexpectedEof => Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
Err(Error::InvalidFormat("Broken JPEG file")), Err(Error::InvalidFormat("Broken JPEG file")),
r => r, r => r,
} }
} }
fn get_exif_attr_sub<R>(reader: &mut R) fn get_exif_attr_sub<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
let mut soi = [0u8; 2]; let mut soi = [0u8; 2];
reader.read_exact(&mut soi)?; reader.read_exact(&mut soi)?;
if soi != [marker::P, marker::SOI] { if soi != [marker::P, marker::SOI] {
@ -87,8 +86,8 @@ fn get_exif_attr_sub<R>(reader: &mut R)
// Read marker segments. // Read marker segments.
let len = read16(reader)?.checked_sub(2) let len = read16(reader)?.checked_sub(2)
.ok_or(Error::InvalidFormat("Invalid segment length"))?; .ok_or(Error::InvalidFormat("Invalid segment length"))?;
let mut seg = Vec::new(); let mut seg = vec![0; len.into()];
reader.by_ref().take(len.into()).read_to_end(&mut seg)?; reader.read_exact(&mut seg)?;
if code == marker::APP1 && seg.starts_with(&EXIF_ID) { if code == marker::APP1 && seg.starts_with(&EXIF_ID) {
seg.drain(..EXIF_ID.len()); seg.drain(..EXIF_ID.len());
return Ok(seg); return Ok(seg);

View File

@ -24,12 +24,11 @@
// SUCH DAMAGE. // SUCH DAMAGE.
// //
use std::io; use std::io::{BufRead, ErrorKind};
use std::io::Read;
use crate::endian::{Endian, BigEndian}; use crate::endian::{Endian, BigEndian};
use crate::error::Error; use crate::error::Error;
use crate::util::BufReadExt; use crate::util::{BufReadExt as _, ReadExt as _};
// PNG file signature [PNG12 12.12]. // PNG file signature [PNG12 12.12].
const PNG_SIG: [u8; 8] = *b"\x89PNG\x0d\x0a\x1a\x0a"; const PNG_SIG: [u8; 8] = *b"\x89PNG\x0d\x0a\x1a\x0a";
@ -38,9 +37,9 @@ const EXIF_CHUNK_TYPE: [u8; 4] = *b"eXIf";
// Get the contents of the eXIf chunk from a PNG file. // Get the contents of the eXIf chunk from a PNG file.
pub fn get_exif_attr<R>(reader: &mut R) pub fn get_exif_attr<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
match get_exif_attr_sub(reader) { match get_exif_attr_sub(reader) {
Err(Error::Io(ref e)) if e.kind() == io::ErrorKind::UnexpectedEof => Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
Err(Error::InvalidFormat("Broken PNG file")), Err(Error::InvalidFormat("Broken PNG file")),
r => r, r => r,
} }
@ -49,7 +48,7 @@ pub fn get_exif_attr<R>(reader: &mut R)
// The location of the eXIf chunk is restricted [PNGEXT150 3.7], but this // The location of the eXIf chunk is restricted [PNGEXT150 3.7], but this
// reader is liberal about it. // reader is liberal about it.
fn get_exif_attr_sub<R>(reader: &mut R) fn get_exif_attr_sub<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
let mut sig = [0u8; 8]; let mut sig = [0u8; 8];
reader.read_exact(&mut sig)?; reader.read_exact(&mut sig)?;
if sig != PNG_SIG { if sig != PNG_SIG {
@ -57,23 +56,17 @@ fn get_exif_attr_sub<R>(reader: &mut R)
} }
// Scan the series of chunks. // Scan the series of chunks.
loop { loop {
let mut lenbuf = Vec::new(); if reader.is_eof()? {
match reader.by_ref().take(4).read_to_end(&mut lenbuf)? { return Err(Error::NotFound("PNG"));
0 => return Err(Error::NotFound("PNG")),
1..=3 => return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"truncated chunk").into()),
_ => {},
} }
let mut lenbuf = [0; 4];
reader.read_exact(&mut lenbuf)?;
let len = BigEndian::loadu32(&lenbuf, 0) as usize; let len = BigEndian::loadu32(&lenbuf, 0) as usize;
let mut ctype = [0u8; 4]; let mut ctype = [0u8; 4];
reader.read_exact(&mut ctype)?; reader.read_exact(&mut ctype)?;
if ctype == EXIF_CHUNK_TYPE { if ctype == EXIF_CHUNK_TYPE {
let mut data = Vec::new(); let mut data = Vec::new();
reader.by_ref().take(len as u64).read_to_end(&mut data)?; reader.read_exact_len(&mut data, len)?;
if data.len() != len {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"truncated chunk").into());
}
return Ok(data); return Ok(data);
} }
// Chunk data and CRC. // Chunk data and CRC.

View File

@ -25,6 +25,7 @@
// //
use std::io; use std::io;
use std::io::Read as _;
use crate::error::Error; use crate::error::Error;
@ -50,6 +51,7 @@ pub fn read64<R>(reader: &mut R) -> Result<u64, io::Error> where R: io::Read {
pub trait BufReadExt { pub trait BufReadExt {
fn discard_exact(&mut self, len: usize) -> io::Result<()>; fn discard_exact(&mut self, len: usize) -> io::Result<()>;
fn is_eof(&mut self) -> io::Result<bool>;
} }
impl<T> BufReadExt for T where T: io::BufRead { impl<T> BufReadExt for T where T: io::BufRead {
@ -68,6 +70,37 @@ impl<T> BufReadExt for T where T: io::BufRead {
} }
Ok(()) Ok(())
} }
fn is_eof(&mut self) -> io::Result<bool> {
loop {
match self.fill_buf() {
Ok(buf) => return Ok(buf.is_empty()),
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
}
}
pub trait ReadExt {
fn read_exact_len(&mut self, buf: &mut Vec<u8>, len: usize)
-> io::Result<()>;
}
impl<T> ReadExt for T where T: io::Read {
fn read_exact_len(&mut self, buf: &mut Vec<u8>, len: usize)
-> io::Result<()> {
// Using `vec![0; len]` and `read_exact` is more efficient but
// less robust against broken files; a small file can easily
// trigger OOM by a huge length value without actual data.
// When the fallible allocation feature is stabilized,
// we could revisit this.
if self.take(len as u64).read_to_end(buf)? != len {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof, "unexpected EOF"));
}
Ok(())
}
} }
// This function must not be called with more than 4 bytes. // This function must not be called with more than 4 bytes.

View File

@ -24,12 +24,11 @@
// SUCH DAMAGE. // SUCH DAMAGE.
// //
use std::io; use std::io::{BufRead, ErrorKind};
use std::io::Read;
use crate::endian::{Endian, LittleEndian}; use crate::endian::{Endian, LittleEndian};
use crate::error::Error; use crate::error::Error;
use crate::util::BufReadExt; use crate::util::{BufReadExt as _, ReadExt as _};
// Chunk identifiers for RIFF. // Chunk identifiers for RIFF.
const FCC_RIFF: [u8; 4] = *b"RIFF"; const FCC_RIFF: [u8; 4] = *b"RIFF";
@ -38,22 +37,22 @@ const FCC_EXIF: [u8; 4] = *b"EXIF";
// Get the contents of the Exif chunk from a WebP file. // Get the contents of the Exif chunk from a WebP file.
pub fn get_exif_attr<R>(reader: &mut R) pub fn get_exif_attr<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
match get_exif_attr_sub(reader) { match get_exif_attr_sub(reader) {
Err(Error::Io(ref e)) if e.kind() == io::ErrorKind::UnexpectedEof => Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
Err(Error::InvalidFormat("Broken WebP file")), Err(Error::InvalidFormat("Broken WebP file")),
r => r, r => r,
} }
} }
fn get_exif_attr_sub<R>(reader: &mut R) fn get_exif_attr_sub<R>(reader: &mut R)
-> Result<Vec<u8>, Error> where R: io::BufRead { -> Result<Vec<u8>, Error> where R: BufRead {
let mut sig = [0; 12]; let mut sig = [0; 12];
reader.read_exact(&mut sig)?; reader.read_exact(&mut sig)?;
if sig[0..4] != FCC_RIFF || sig[8..12] != FCC_WEBP { if sig[0..4] != FCC_RIFF || sig[8..12] != FCC_WEBP {
return Err(Error::InvalidFormat("Not a WebP file")); return Err(Error::InvalidFormat("Not a WebP file"));
} }
let mut file_size = LittleEndian::loadu32(&sig, 4); let mut file_size = LittleEndian::loadu32(&sig, 4) as usize;
file_size = file_size.checked_sub(4) file_size = file_size.checked_sub(4)
.ok_or(Error::InvalidFormat("Invalid header file size"))?; .ok_or(Error::InvalidFormat("Invalid header file size"))?;
@ -63,23 +62,19 @@ fn get_exif_attr_sub<R>(reader: &mut R)
.ok_or(Error::InvalidFormat("Chunk overflowing parent"))?; .ok_or(Error::InvalidFormat("Chunk overflowing parent"))?;
let mut cheader = [0; 8]; let mut cheader = [0; 8];
reader.read_exact(&mut cheader)?; reader.read_exact(&mut cheader)?;
let mut size = LittleEndian::loadu32(&cheader, 4); let mut size = LittleEndian::loadu32(&cheader, 4) as usize;
file_size = file_size.checked_sub(size) file_size = file_size.checked_sub(size)
.ok_or(Error::InvalidFormat("Chunk overflowing parent"))?; .ok_or(Error::InvalidFormat("Chunk overflowing parent"))?;
if cheader[0..4] == FCC_EXIF { if cheader[0..4] == FCC_EXIF {
let mut payload = Vec::new(); let mut payload = Vec::new();
reader.by_ref().take(size.into()).read_to_end(&mut payload)?; reader.read_exact_len(&mut payload, size)?;
if payload.len() != size as usize {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
"truncated chunk").into());
}
return Ok(payload); return Ok(payload);
} }
if size % 2 != 0 && file_size > 0 { if size % 2 != 0 && file_size > 0 {
file_size -= 1; file_size -= 1;
size = size.checked_add(1).expect("ex-file_size - size > 0"); size = size.checked_add(1).expect("ex-file_size - size > 0");
} }
reader.discard_exact(size as usize)?; reader.discard_exact(size)?;
} }
Err(Error::NotFound("WebP")) Err(Error::NotFound("WebP"))
} }