Work
This commit is contained in:
parent
c6b4790bdf
commit
964fc7073c
|
@ -14,7 +14,10 @@ const HEX_BYTES_LONG: &[u8; ASCII_BYTES_LONG.len() * 2] = b"446F6E616C64204A2E20
|
||||||
|
|
||||||
macro_rules! name {
|
macro_rules! name {
|
||||||
($group:ident, $f:literal) => {
|
($group:ident, $f:literal) => {
|
||||||
std::boxed::Box::leak(format!(concat!("{} - ", $f), $group).into_boxed_str())
|
std::boxed::Box::leak(format!(name!("{}", $f), $group).into_boxed_str())
|
||||||
|
};
|
||||||
|
($group:literal, $f:literal) => {
|
||||||
|
concat!($group, " - ", $f)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,6 +69,13 @@ fn test(hex_bytes: &[u8], bytes: &[u8]) {
|
||||||
.map(Box::as_ref),
|
.map(Box::as_ref),
|
||||||
stringify!(hex_bytes_dyn_unsafe_iter)
|
stringify!(hex_bytes_dyn_unsafe_iter)
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
Some(bytes),
|
||||||
|
hex_bytes_dyn_unsafe_iter_niched(hex_bytes)
|
||||||
|
.as_ref()
|
||||||
|
.map(Box::as_ref),
|
||||||
|
stringify!(hex_bytes_dyn_unsafe_iter_niched)
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Some(bytes),
|
Some(bytes),
|
||||||
hex_bytes_dyn_unsafe(hex_bytes).as_ref().map(Box::as_ref),
|
hex_bytes_dyn_unsafe(hex_bytes).as_ref().map(Box::as_ref),
|
||||||
|
@ -96,8 +106,9 @@ fn benchmark_sized<const N: usize, const HEAP_ONLY: bool>(
|
||||||
|
|
||||||
const BENCH_UNSAFE: bool = true;
|
const BENCH_UNSAFE: bool = true;
|
||||||
const BENCH_UNSAFE_ITER: bool = true;
|
const BENCH_UNSAFE_ITER: bool = true;
|
||||||
const BENCH_NON_NICHED: bool = false;
|
const BENCH_UNSAFE_ITER_NICHED: bool = true;
|
||||||
const BENCH_NICHED: bool = false;
|
const BENCH_NON_NICHED: bool = true;
|
||||||
|
const BENCH_NICHED: bool = true;
|
||||||
|
|
||||||
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
|
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
|
||||||
if BENCH_UNSAFE {
|
if BENCH_UNSAFE {
|
||||||
|
@ -111,6 +122,11 @@ fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
|
||||||
b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes)))
|
b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes)))
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if BENCH_UNSAFE_ITER_NICHED {
|
||||||
|
c.bench_function(name!(name, "dyn unsafe iter niched"), |b| {
|
||||||
|
b.iter(|| hex_bytes_dyn_unsafe_iter_niched(black_box(bytes)))
|
||||||
|
});
|
||||||
|
}
|
||||||
if BENCH_NON_NICHED {
|
if BENCH_NON_NICHED {
|
||||||
c.bench_function(name!(name, "dyn non-niched"), |b| {
|
c.bench_function(name!(name, "dyn non-niched"), |b| {
|
||||||
b.iter(|| hex_bytes_dyn(black_box(bytes)))
|
b.iter(|| hex_bytes_dyn(black_box(bytes)))
|
||||||
|
@ -243,5 +259,16 @@ pub fn bench_1_6m(c: &mut Criterion) {
|
||||||
benchmark_sized::<LEN, true>("[1.6m]", &hex_bytes, c);
|
benchmark_sized::<LEN, true>("[1.6m]", &hex_bytes, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, bench_16, bench_256, bench_1_6m);
|
pub fn bench_hex_digit(c: &mut Criterion) {
|
||||||
criterion_main!(benches);
|
let digit = ['5' as u8, 'b' as u8];
|
||||||
|
c.bench_function(name!("micro", "hex_byte"), |b| {
|
||||||
|
b.iter(|| hex_byte(black_box(digit[0]), black_box(digit[1])))
|
||||||
|
});
|
||||||
|
c.bench_function(name!("micro", "hex_byte_niched"), |b| {
|
||||||
|
b.iter(|| hex_byte_niched(black_box(digit[0]), black_box(digit[1])))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(decode_benches, bench_16, bench_256, bench_1_6m);
|
||||||
|
criterion_group!(micro_benches, bench_hex_digit);
|
||||||
|
criterion_main!(decode_benches, micro_benches);
|
||||||
|
|
114
src/lib.rs
114
src/lib.rs
|
@ -73,7 +73,7 @@ pub const fn hex_digit(ascii: u8) -> u8 {
|
||||||
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
|
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
|
||||||
let msb = hex_digit(msb);
|
let msb = hex_digit(msb);
|
||||||
let lsb = hex_digit(lsb);
|
let lsb = hex_digit(lsb);
|
||||||
if msb == 255 || lsb == 255 {
|
if (msb | lsb) == 255 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
Some(msb << 4 | lsb)
|
Some(msb << 4 | lsb)
|
||||||
|
@ -87,6 +87,25 @@ pub const fn hex_byte_niched(msb: u8, lsb: u8) -> u16 {
|
||||||
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
|
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses an ascii hex byte.
|
||||||
|
#[inline]
|
||||||
|
pub const fn hex_byte_packed([msb, lsb]: &[u8; 2]) -> Option<u8> {
|
||||||
|
let msb = hex_digit(*msb);
|
||||||
|
let lsb = hex_digit(*lsb);
|
||||||
|
if (msb | lsb) == 255 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(msb << 4 | lsb)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an ascii hex byte. Any value > [`u8::MAX`] is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub const fn hex_byte_packed_niched([msb, lsb]: &[u8; 2]) -> u16 {
|
||||||
|
let msb = hex_digit(*msb) as u16;
|
||||||
|
let lsb = hex_digit(*lsb) as u16;
|
||||||
|
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
const fn align_down_to<const N: usize>(n: usize) -> usize {
|
const fn align_down_to<const N: usize>(n: usize) -> usize {
|
||||||
let shift = match N.checked_ilog2() {
|
let shift = match N.checked_ilog2() {
|
||||||
|
@ -107,19 +126,22 @@ const fn align_up_to<const N: usize>(n: usize) -> usize {
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
|
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
|
||||||
|
debug_assert_eq!(ascii.len() >> 1 << 1, ascii.len(), "len of ascii is not a multiple of 2");
|
||||||
|
debug_assert_eq!(ascii.len() >> 1, bytes.len(), "len of ascii is not twice that of bytes");
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
// use the maximum batch size that would be supported by AVX-512
|
// use the maximum batch size that would be supported by AVX-512
|
||||||
const BATCH_SIZE: usize = 512 / 16;
|
const BATCH_SIZE: usize = 512 / 16;
|
||||||
const VECTORED: bool = false;
|
const VECTORED: bool = true;
|
||||||
if VECTORED {
|
if VECTORED {
|
||||||
while i < align_down_to::<BATCH_SIZE>(bytes.len()) {
|
while i < align_down_to::<BATCH_SIZE>(bytes.len()) {
|
||||||
let mut buf = MaybeUninit::<u16>::uninit_array::<BATCH_SIZE>();
|
let mut buf = MaybeUninit::<u16>::uninit_array::<BATCH_SIZE>();
|
||||||
let mut j = 0;
|
let mut j = 0;
|
||||||
while j < buf.len() {
|
while j < buf.len() {
|
||||||
*unsafe { buf.get_unchecked_mut(j) } = MaybeUninit::new(hex_byte_niched(
|
unsafe {
|
||||||
unsafe { *ascii.get_unchecked((i + j) << 1) },
|
*buf.get_unchecked_mut(j) = MaybeUninit::new(hex_byte_packed_niched(
|
||||||
unsafe { *ascii.get_unchecked(((i + j) << 1) + 1) },
|
&*(ascii.as_ptr().add((i + j) << 1) as *const [u8; 2]),
|
||||||
));
|
))
|
||||||
|
};
|
||||||
j += 1;
|
j += 1;
|
||||||
}
|
}
|
||||||
let buf = unsafe { MaybeUninit::array_assume_init(buf) };
|
let buf = unsafe { MaybeUninit::array_assume_init(buf) };
|
||||||
|
@ -191,9 +213,9 @@ pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[
|
||||||
Some(Box::new([0u8; N]))
|
Some(Box::new([0u8; N]))
|
||||||
} else {
|
} else {
|
||||||
let mut bytes: Box<[MaybeUninit<u8>; N]> =
|
let mut bytes: Box<[MaybeUninit<u8>; N]> =
|
||||||
unsafe { std::mem::transmute(Box::<[u8; N]>::new_uninit()) };
|
unsafe { Box::<[MaybeUninit<u8>; N]>::new_uninit().assume_init() };
|
||||||
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
|
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
|
||||||
Some(unsafe { std::mem::transmute(bytes) })
|
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -206,17 +228,11 @@ pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
|
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
|
||||||
let mut i = 0;
|
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
|
||||||
while i < bytes.len() {
|
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||||
match hex_byte(unsafe { *ascii.get_unchecked(i << 1) }, unsafe {
|
} else {
|
||||||
*ascii.get_unchecked((i << 1) + 1)
|
None
|
||||||
}) {
|
|
||||||
Some(b) => bytes[i] = MaybeUninit::new(b),
|
|
||||||
None => return None,
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
}
|
||||||
Some(unsafe { std::mem::transmute(bytes) })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
|
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
|
@ -236,21 +252,30 @@ pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(unsafe { std::mem::transmute(bytes) })
|
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hex_bytes_dyn_unsafe_iter_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
|
let len = ascii.len() >> 1;
|
||||||
|
if len << 1 != ascii.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
|
||||||
|
for (i, b) in ascii
|
||||||
|
.array_chunks::<2>()
|
||||||
|
.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
if b & 0xff_00 == 0 {
|
||||||
|
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b as u8) };
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
|
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
// let mut bytes: Box<[u8]> = unsafe { std::mem::transmute(Box::<[u8]>::new_zeroed_slice(len)) };
|
|
||||||
// for (i, o) in ascii.array_chunks::<2>()
|
|
||||||
// .map(|[msb, lsb]| hex_byte(msb, lsb))
|
|
||||||
// .enumerate() {
|
|
||||||
// if let Some(b) = o {
|
|
||||||
// bytes[i] = b;
|
|
||||||
// } else {
|
|
||||||
// return None;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// Some(bytes)
|
|
||||||
let iter = ascii.array_chunks::<2>();
|
let iter = ascii.array_chunks::<2>();
|
||||||
if iter.remainder().len() != 0 {
|
if iter.remainder().len() != 0 {
|
||||||
return None;
|
return None;
|
||||||
|
@ -260,36 +285,13 @@ pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
.map(|v| v.into_boxed_slice())
|
.map(|v| v.into_boxed_slice())
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ExtendRef<'a, T>(&'a mut T);
|
|
||||||
|
|
||||||
impl<'a, T, A> Extend<A> for ExtendRef<'a, T>
|
|
||||||
where
|
|
||||||
T: Extend<A>,
|
|
||||||
{
|
|
||||||
#[inline(always)]
|
|
||||||
fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
|
|
||||||
self.0.extend(iter)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn extend_one(&mut self, item: A) {
|
|
||||||
self.0.extend_one(item)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn extend_reserve(&mut self, additional: usize) {
|
|
||||||
self.0.extend_reserve(additional)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
|
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||||
let iter = ascii.array_chunks::<2>();
|
let iter = ascii.array_chunks::<2>();
|
||||||
if iter.remainder().len() != 0 {
|
if iter.remainder().len() != 0 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
iter.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
|
iter.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
|
||||||
.map(std::convert::TryFrom::try_from)
|
.map(|b| if b & 0xff_00 == 0 { Some(b as u8) } else { None })
|
||||||
.map(Result::ok)
|
|
||||||
.collect::<Option<Vec<u8>>>()
|
.collect::<Option<Vec<u8>>>()
|
||||||
.map(|v| v.into_boxed_slice())
|
.map(|v| v.into_boxed_slice())
|
||||||
}
|
}
|
||||||
|
@ -313,6 +315,10 @@ mod test {
|
||||||
for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
|
for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
|
||||||
assert_eq!(hex_digit(*digit as u8), i as u8);
|
assert_eq!(hex_digit(*digit as u8), i as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i, digit) in HEX_DIGITS_UPPER.into_iter().enumerate() {
|
||||||
|
assert_eq!(hex_digit(*digit as u8), i as u8);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in New Issue