fast-hex/fast-hex-benches/benches/dec.rs

408 lines
13 KiB
Rust

#![feature(generic_const_exprs)]
#![feature(new_uninit)]
#![feature(portable_simd)]
use std::mem::MaybeUninit;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use fast_hex::dec::*;
use fast_hex::defs::{DIGIT_BATCH_SIZE, WIDE_BATCH_SIZE};
use fast_hex::test::name;
// TODO: change this example text to something less silly
const ASCII_BYTES: &[u8; 16] = b"Donald J. Trump!";
const HEX_BYTES: &[u8; ASCII_BYTES.len() * 2] = b"446F6E616C64204A2E205472756D7021";
// TODO: change this example text to something less silly
const ASCII_BYTES_LONG: &[u8; 256] = b"Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!";
const HEX_BYTES_LONG: &[u8; ASCII_BYTES_LONG.len() * 2] = b"446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021";
macro_rules! name {
($group:literal, $f:literal) => {
concat!("[", $group, "] - ", $f)
};
($group:expr, $f:literal) => {
std::boxed::Box::leak(format!(name!("{}", $f), $group).into_boxed_str())
};
($group:expr, $f:expr) => {
std::boxed::Box::leak(format!(name!("{}", "{}"), $group, $f).into_boxed_str())
};
($group:literal, $f:expr) => {
std::boxed::Box::leak(format!(name!($group, "{}"), $f).into_boxed_str())
};
}
#[track_caller]
fn test_sized<const N: usize, const HEAP_ONLY: bool>(hex_bytes: &[u8; N * 2], bytes: &[u8; N])
where
[(); N * 2]:,
{
return;
test(hex_bytes, bytes);
if !HEAP_ONLY {
assert_eq!(
Some(bytes),
hex_bytes_sized_const::<N>(hex_bytes).as_ref(),
stringify!(hex_bytes_sized_const)
);
assert_eq!(
Some(bytes),
hex_bytes_sized::<N>(hex_bytes).as_ref(),
stringify!(hex_bytes_sized)
);
}
assert_eq!(
Some(bytes),
hex_bytes_sized_heap::<N>(hex_bytes)
.as_ref()
.map(Box::as_ref),
stringify!(hex_bytes_sized_heap)
);
}
#[track_caller]
#[inline]
fn test(hex_bytes: &[u8], bytes: &[u8]) {
assert_eq!(hex_bytes.len(), bytes.len() * 2);
assert_eq!(
Some(bytes),
hex_bytes_dyn(hex_bytes).as_ref().map(Box::as_ref),
stringify!(hex_bytes_dyn)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_unsafe_iter(hex_bytes)
.as_ref()
.map(Box::as_ref),
stringify!(hex_bytes_dyn_unsafe_iter)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_unsafe(hex_bytes).as_ref().map(Box::as_ref),
stringify!(hex_bytes_dyn_unsafe)
);
}
fn benchmark_sized<const N: usize, const HEAP_ONLY: bool>(
name: &str,
bytes: &[u8; N * 2],
c: &mut Criterion,
) where
[(); N * 2]:,
{
if !HEAP_ONLY {
c.bench_function(name!(name, "dec/sized"), |b| {
b.iter(|| hex_bytes_sized::<N>(black_box(bytes)))
});
c.bench_function(name!(name, "dec/sized-const"), |b| {
b.iter(|| hex_bytes_sized_const::<N>(black_box(bytes)))
});
}
c.bench_function(name!(name, "dec/sized-heap"), |b| {
b.iter(|| hex_bytes_sized_heap::<N>(black_box(bytes)))
});
benchmark(name, bytes, c);
}
const BENCH_UNSAFE: bool = true;
const BENCH_UNSAFE_ITER: bool = true;
const BENCH_NON_NICHED: bool = true;
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
if BENCH_UNSAFE {
c.bench_function(name!(name, "dec/dyn-unsafe"), |b| {
b.iter(|| hex_bytes_dyn_unsafe(black_box(bytes)))
});
}
//c.bench_function(format!("{name} - dyn unsafe for"), |b| b.iter(|| hex_bytes_dyn_unsafe_for(black_box(bytes))));
if BENCH_UNSAFE_ITER {
c.bench_function(name!(name, "dec/dyn-unsafe-iter"), |b| {
b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes)))
});
}
if BENCH_NON_NICHED {
c.bench_function(name!(name, "dec/dyn-non-niched"), |b| {
b.iter(|| hex_bytes_dyn(black_box(bytes)))
});
}
}
pub fn bench_16(c: &mut Criterion) {
test_sized::<{ ASCII_BYTES.len() }, false>(HEX_BYTES, ASCII_BYTES);
benchmark_sized::<{ ASCII_BYTES.len() }, false>("16", HEX_BYTES, c);
}
pub fn bench_256(c: &mut Criterion) {
test_sized::<{ ASCII_BYTES_LONG.len() }, false>(HEX_BYTES_LONG, ASCII_BYTES_LONG);
benchmark_sized::<{ ASCII_BYTES_LONG.len() }, false>("256", HEX_BYTES_LONG, c);
}
trait SliceRandom {
type Item;
fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
where
R: rand::Rng + ?Sized;
}
#[inline]
fn gen_index<R: rand::Rng + ?Sized, const UBOUND: usize>(rng: &mut R, ubound: usize) -> usize {
if UBOUND <= (core::u32::MAX as usize) {
rng.gen_range(0..ubound as u32) as usize
} else {
rng.gen_range(0..ubound)
}
}
impl<T> SliceRandom for [T] {
type Item = T;
#[inline]
fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
where
R: rand::Rng + ?Sized,
{
if self.is_empty() {
None
} else {
Some(&self[gen_index::<_, { usize::MAX }>(rng, self.len())])
}
}
}
impl<const N: usize, T> SliceRandom for [T; N] {
type Item = T;
#[inline]
fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
where
R: rand::Rng + ?Sized,
{
if self.is_empty() {
None
} else {
Some(&self[gen_index::<_, N>(rng, N)])
}
}
}
struct DisplayAsHexDigits<'a>(&'a [u8]);
impl<'a> std::fmt::Display for DisplayAsHexDigits<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use std::fmt::Write;
if self.0.is_empty() {
return f.write_str("[]");
}
f.write_str("[ ")?;
for b in self.0 {
match hex_digit(*b) {
d @ 0..=9 => f.write_char(('0' as u8 + d) as char),
d @ 10..=15 => f.write_char(('a' as u8 + d - 10) as char),
_ => write!(f, "0x{:02x}", b),
}?;
f.write_char(' ')?;
}
f.write_char(']')?;
Ok(())
}
}
pub fn bench_2k(c: &mut Criterion) {
const LEN: usize = 1024 * 2;
const LEN2: usize = LEN * 2;
let mut hex_bytes: [MaybeUninit<u8>; LEN2] =
unsafe { std::mem::MaybeUninit::uninit().assume_init() };
let mut rng = rand::thread_rng();
for b in hex_bytes.iter_mut() {
*b = MaybeUninit::new(*fast_hex::enc::HEX_CHARS_LOWER.choose(&mut rng).unwrap());
}
let hex_bytes: [u8; LEN2] = unsafe { std::mem::transmute(hex_bytes) };
let bytes = match hex_bytes_dyn(hex_bytes.as_ref()) {
Some(b) => b,
None => {
panic!(
"Generated hex bytes were invalid: {}",
DisplayAsHexDigits(hex_bytes.as_ref())
);
}
};
test_sized::<LEN, false>(&hex_bytes, bytes.as_ref().try_into().unwrap());
benchmark_sized::<LEN, false>("2k", &hex_bytes, c);
}
pub fn bench_512k(c: &mut Criterion) {
const LEN: usize = 1024 * 512;
const LEN2: usize = LEN * 2;
let mut hex_bytes: Box<[MaybeUninit<u8>; LEN2]> =
unsafe { std::mem::transmute(Box::<[u8; LEN2]>::new_uninit()) };
let mut rng = rand::thread_rng();
for b in hex_bytes.iter_mut() {
*b = MaybeUninit::new(*fast_hex::enc::HEX_CHARS_LOWER.choose(&mut rng).unwrap());
}
let hex_bytes: Box<[u8; LEN2]> = unsafe { std::mem::transmute(hex_bytes) };
let bytes = match hex_bytes_dyn(hex_bytes.as_ref()) {
Some(b) => b,
None => {
panic!(
"Generated hex bytes were invalid: {}",
DisplayAsHexDigits(hex_bytes.as_ref())
);
}
};
test_sized::<LEN, true>(&hex_bytes, bytes.as_ref().try_into().unwrap());
benchmark_sized::<LEN, true>("512k", &hex_bytes, c);
}
pub fn bench_1_6m(c: &mut Criterion) {
const LEN: usize = 1_600_000;
const LEN2: usize = LEN * 2;
let mut hex_bytes: Box<[MaybeUninit<u8>; LEN2]> =
unsafe { std::mem::transmute(Box::<[u8; LEN2]>::new_uninit()) };
let mut rng = rand::thread_rng();
for b in hex_bytes.iter_mut() {
*b = MaybeUninit::new(*fast_hex::enc::HEX_CHARS_LOWER.choose(&mut rng).unwrap());
}
let hex_bytes: Box<[u8; LEN2]> = unsafe { std::mem::transmute(hex_bytes) };
let bytes = match hex_bytes_dyn(hex_bytes.as_ref()) {
Some(b) => b,
None => {
panic!(
"Generated hex bytes were invalid: {}",
DisplayAsHexDigits(hex_bytes.as_ref())
);
}
};
test_sized::<LEN, true>(&hex_bytes, bytes.as_ref().try_into().unwrap());
benchmark_sized::<LEN, true>("1.6m", &hex_bytes, c);
}
pub fn bench_micro_hex_digit(c: &mut Criterion) {
use std::simd::Simd;
const HEX_DIGITS_VALID: [u8; DIGIT_BATCH_SIZE] = [
0xf, 0xf, 0x0, 0x0, 0x1, 0x1, 0xe, 0xf, 0xf, 0xe, 0x0, 0xf, 0xf, 0x0, 0x3, 0x4, 0xf, 0xf,
0x0, 0x0, 0x1, 0x1, 0xe, 0xf, 0xf, 0xe, 0x0, 0xf, 0xf, 0x0, 0x3,
0x4,
// 0xf, 0xf, 0x0, 0x0, 0x1, 0x1, 0xe, 0xf, 0xf, 0xe, 0x0, 0xf, 0xf, 0x0, 0x3, 0x4,
// 0xf, 0xf, 0x0, 0x0, 0x1, 0x1, 0xe, 0xf, 0xf, 0xe, 0x0, 0xf, 0xf, 0x0, 0x3, 0x4,
];
let hex_digits = Simd::from_array(black_box(HEX_DIGITS_VALID));
c.bench_function(name!("micro", "dec/hex_digit"), |b| {
b.iter(|| {
for b in black_box(HEX_DIGITS_VALID) {
black_box(hex_digit(b));
}
})
});
c.bench_function(name!("micro", "dec/hex_digit_simd"), |b| {
b.iter(|| hex_digit_simd::<DIGIT_BATCH_SIZE>(hex_digits))
});
}
pub fn bench_micro_hex_byte(c: &mut Criterion) {
const HEX_BYTES_VALID: [[u8; 2]; WIDE_BATCH_SIZE] = [
*b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34", *b"ff", *b"00", *b"11",
*b"ef", *b"fe", *b"0f", *b"f0",
*b"34",
// *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",
// *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",
];
fn bench_decoder<T: HexByteDecoder + HexByteSimdDecoder>(c: &mut Criterion, name: &str) {
let hex_bytes = conv::u8x2_to_u8(HEX_BYTES_VALID);
c.bench_function(name!("micro", format!("dec/{name}/packed")), |b| {
b.iter(|| {
for b in black_box(HEX_BYTES_VALID) {
black_box(T::decode_packed(&b));
}
})
});
c.bench_function(name!("micro", format!("dec/{name}/unpacked")), |b| {
b.iter(|| {
for [hi, lo] in black_box(HEX_BYTES_VALID) {
black_box(T::decode_unpacked(hi, lo));
}
})
});
c.bench_function(name!("micro", format!("dec/{name}/simd")), |b| {
b.iter(|| T::decode_simd(black_box(hex_bytes)))
});
}
c.bench_function(name!("micro", "dec/hex_byte"), |b| {
b.iter(|| {
for b in black_box(HEX_BYTES_VALID) {
black_box(hex_byte(b[0], b[1]));
}
})
});
bench_decoder::<HexByteDecoderA>(c, stringify!(HexByteDecoderA));
}
pub fn bench_nano_hex_digit(c: &mut Criterion) {
let digit = black_box('5' as u8);
c.bench_function(name!("nano", "dec/hex_digit"), |b| b.iter(|| hex_digit(digit)));
c.bench_function(name!("nano", "dec/hex_digit+bb"), |b| {
b.iter(|| hex_digit(black_box(digit)))
});
}
pub fn bench_nano_hex_byte(c: &mut Criterion) {
const DIGITS: [u8; 2] = ['5' as u8, 'b' as u8];
let digit = black_box(DIGITS);
c.bench_function(name!("nano", "dec/hex_byte"), |b| {
b.iter(|| hex_byte(digit[0], digit[1]))
});
fn bench_decoder<T: HexByteDecoder + HexByteSimdDecoder>(c: &mut Criterion, name: &str) {
let digit = black_box(DIGITS);
c.bench_function(name!("nano", format!("dec/{name}/packed")), |b| {
b.iter(|| {
black_box(T::decode_packed(&digit));
})
});
c.bench_function(name!("nano", format!("dec/{name}/unpacked")), |b| {
b.iter(|| {
black_box(T::decode_unpacked(
black_box(DIGITS[0]),
black_box(DIGITS[1]),
));
})
});
}
bench_decoder::<HexByteDecoderA>(c, stringify!(HexByteDecoderA));
}
fn verification() {
brisk::simd::if_trace_simd! {
panic!("Illegal benchmark state: SIMD tracing enabled");
}
}
criterion_group!(
decode_benches,
bench_16,
bench_256,
bench_2k,
bench_512k,
bench_1_6m
);
criterion_group!(micro_benches, bench_micro_hex_digit, bench_micro_hex_byte);
criterion_group!(nano_benches, bench_nano_hex_digit, bench_nano_hex_byte);
criterion_main!(verification, decode_benches, micro_benches, nano_benches);