This commit is contained in:
Michael Pfaff 2022-10-23 14:33:33 -04:00
parent c6b4790bdf
commit 964fc7073c
Signed by: michael
GPG Key ID: CF402C4A012AA9D4
2 changed files with 92 additions and 59 deletions

View File

@ -14,7 +14,10 @@ const HEX_BYTES_LONG: &[u8; ASCII_BYTES_LONG.len() * 2] = b"446F6E616C64204A2E20
macro_rules! name {
($group:ident, $f:literal) => {
std::boxed::Box::leak(format!(concat!("{} - ", $f), $group).into_boxed_str())
std::boxed::Box::leak(format!(name!("{}", $f), $group).into_boxed_str())
};
($group:literal, $f:literal) => {
concat!($group, " - ", $f)
};
}
@ -66,6 +69,13 @@ fn test(hex_bytes: &[u8], bytes: &[u8]) {
.map(Box::as_ref),
stringify!(hex_bytes_dyn_unsafe_iter)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_unsafe_iter_niched(hex_bytes)
.as_ref()
.map(Box::as_ref),
stringify!(hex_bytes_dyn_unsafe_iter_niched)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_unsafe(hex_bytes).as_ref().map(Box::as_ref),
@ -96,8 +106,9 @@ fn benchmark_sized<const N: usize, const HEAP_ONLY: bool>(
const BENCH_UNSAFE: bool = true;
const BENCH_UNSAFE_ITER: bool = true;
const BENCH_NON_NICHED: bool = false;
const BENCH_NICHED: bool = false;
const BENCH_UNSAFE_ITER_NICHED: bool = true;
const BENCH_NON_NICHED: bool = true;
const BENCH_NICHED: bool = true;
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
if BENCH_UNSAFE {
@ -111,6 +122,11 @@ fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes)))
});
}
if BENCH_UNSAFE_ITER_NICHED {
c.bench_function(name!(name, "dyn unsafe iter niched"), |b| {
b.iter(|| hex_bytes_dyn_unsafe_iter_niched(black_box(bytes)))
});
}
if BENCH_NON_NICHED {
c.bench_function(name!(name, "dyn non-niched"), |b| {
b.iter(|| hex_bytes_dyn(black_box(bytes)))
@ -243,5 +259,16 @@ pub fn bench_1_6m(c: &mut Criterion) {
benchmark_sized::<LEN, true>("[1.6m]", &hex_bytes, c);
}
criterion_group!(benches, bench_16, bench_256, bench_1_6m);
criterion_main!(benches);
pub fn bench_hex_digit(c: &mut Criterion) {
let digit = ['5' as u8, 'b' as u8];
c.bench_function(name!("micro", "hex_byte"), |b| {
b.iter(|| hex_byte(black_box(digit[0]), black_box(digit[1])))
});
c.bench_function(name!("micro", "hex_byte_niched"), |b| {
b.iter(|| hex_byte_niched(black_box(digit[0]), black_box(digit[1])))
});
}
criterion_group!(decode_benches, bench_16, bench_256, bench_1_6m);
criterion_group!(micro_benches, bench_hex_digit);
criterion_main!(decode_benches, micro_benches);

View File

@ -73,7 +73,7 @@ pub const fn hex_digit(ascii: u8) -> u8 {
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
let msb = hex_digit(msb);
let lsb = hex_digit(lsb);
if msb == 255 || lsb == 255 {
if (msb | lsb) == 255 {
return None;
}
Some(msb << 4 | lsb)
@ -87,6 +87,25 @@ pub const fn hex_byte_niched(msb: u8, lsb: u8) -> u16 {
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
}
/// Parses an ascii hex byte.
#[inline]
pub const fn hex_byte_packed([msb, lsb]: &[u8; 2]) -> Option<u8> {
let msb = hex_digit(*msb);
let lsb = hex_digit(*lsb);
if (msb | lsb) == 255 {
return None;
}
Some(msb << 4 | lsb)
}
/// Parses an ascii hex byte. Any value > [`u8::MAX`] is invalid.
#[inline]
pub const fn hex_byte_packed_niched([msb, lsb]: &[u8; 2]) -> u16 {
let msb = hex_digit(*msb) as u16;
let lsb = hex_digit(*lsb) as u16;
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
}
#[inline(always)]
const fn align_down_to<const N: usize>(n: usize) -> usize {
let shift = match N.checked_ilog2() {
@ -107,19 +126,22 @@ const fn align_up_to<const N: usize>(n: usize) -> usize {
#[inline(always)]
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
debug_assert_eq!(ascii.len() >> 1 << 1, ascii.len(), "len of ascii is not a multiple of 2");
debug_assert_eq!(ascii.len() >> 1, bytes.len(), "len of ascii is not twice that of bytes");
let mut i = 0;
// use the maximum batch size that would be supported by AVX-512
const BATCH_SIZE: usize = 512 / 16;
const VECTORED: bool = false;
const VECTORED: bool = true;
if VECTORED {
while i < align_down_to::<BATCH_SIZE>(bytes.len()) {
let mut buf = MaybeUninit::<u16>::uninit_array::<BATCH_SIZE>();
let mut j = 0;
while j < buf.len() {
*unsafe { buf.get_unchecked_mut(j) } = MaybeUninit::new(hex_byte_niched(
unsafe { *ascii.get_unchecked((i + j) << 1) },
unsafe { *ascii.get_unchecked(((i + j) << 1) + 1) },
));
unsafe {
*buf.get_unchecked_mut(j) = MaybeUninit::new(hex_byte_packed_niched(
&*(ascii.as_ptr().add((i + j) << 1) as *const [u8; 2]),
))
};
j += 1;
}
let buf = unsafe { MaybeUninit::array_assume_init(buf) };
@ -191,9 +213,9 @@ pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[
Some(Box::new([0u8; N]))
} else {
let mut bytes: Box<[MaybeUninit<u8>; N]> =
unsafe { std::mem::transmute(Box::<[u8; N]>::new_uninit()) };
unsafe { Box::<[MaybeUninit<u8>; N]>::new_uninit().assume_init() };
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
Some(unsafe { std::mem::transmute(bytes) })
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
} else {
None
}
@ -206,17 +228,11 @@ pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
let mut i = 0;
while i < bytes.len() {
match hex_byte(unsafe { *ascii.get_unchecked(i << 1) }, unsafe {
*ascii.get_unchecked((i << 1) + 1)
}) {
Some(b) => bytes[i] = MaybeUninit::new(b),
None => return None,
}
i += 1;
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
} else {
None
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
@ -236,21 +252,30 @@ pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
return None;
}
}
Some(unsafe { std::mem::transmute(bytes) })
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
}
pub fn hex_bytes_dyn_unsafe_iter_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for (i, b) in ascii
.array_chunks::<2>()
.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
.enumerate()
{
if b & 0xff_00 == 0 {
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b as u8) };
} else {
return None;
}
}
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
}
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
// let mut bytes: Box<[u8]> = unsafe { std::mem::transmute(Box::<[u8]>::new_zeroed_slice(len)) };
// for (i, o) in ascii.array_chunks::<2>()
// .map(|[msb, lsb]| hex_byte(msb, lsb))
// .enumerate() {
// if let Some(b) = o {
// bytes[i] = b;
// } else {
// return None;
// }
// }
// Some(bytes)
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
@ -260,36 +285,13 @@ pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
.map(|v| v.into_boxed_slice())
}
struct ExtendRef<'a, T>(&'a mut T);
impl<'a, T, A> Extend<A> for ExtendRef<'a, T>
where
T: Extend<A>,
{
#[inline(always)]
fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
self.0.extend(iter)
}
#[inline(always)]
fn extend_one(&mut self, item: A) {
self.0.extend_one(item)
}
#[inline(always)]
fn extend_reserve(&mut self, additional: usize) {
self.0.extend_reserve(additional)
}
}
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
.map(std::convert::TryFrom::try_from)
.map(Result::ok)
.map(|b| if b & 0xff_00 == 0 { Some(b as u8) } else { None })
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
@ -313,6 +315,10 @@ mod test {
for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
assert_eq!(hex_digit(*digit as u8), i as u8);
}
for (i, digit) in HEX_DIGITS_UPPER.into_iter().enumerate() {
assert_eq!(hex_digit(*digit as u8), i as u8);
}
}
#[test]