Work
This commit is contained in:
parent
c6b4790bdf
commit
964fc7073c
|
@ -14,7 +14,10 @@ const HEX_BYTES_LONG: &[u8; ASCII_BYTES_LONG.len() * 2] = b"446F6E616C64204A2E20
|
|||
|
||||
macro_rules! name {
|
||||
($group:ident, $f:literal) => {
|
||||
std::boxed::Box::leak(format!(concat!("{} - ", $f), $group).into_boxed_str())
|
||||
std::boxed::Box::leak(format!(name!("{}", $f), $group).into_boxed_str())
|
||||
};
|
||||
($group:literal, $f:literal) => {
|
||||
concat!($group, " - ", $f)
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -66,6 +69,13 @@ fn test(hex_bytes: &[u8], bytes: &[u8]) {
|
|||
.map(Box::as_ref),
|
||||
stringify!(hex_bytes_dyn_unsafe_iter)
|
||||
);
|
||||
assert_eq!(
|
||||
Some(bytes),
|
||||
hex_bytes_dyn_unsafe_iter_niched(hex_bytes)
|
||||
.as_ref()
|
||||
.map(Box::as_ref),
|
||||
stringify!(hex_bytes_dyn_unsafe_iter_niched)
|
||||
);
|
||||
assert_eq!(
|
||||
Some(bytes),
|
||||
hex_bytes_dyn_unsafe(hex_bytes).as_ref().map(Box::as_ref),
|
||||
|
@ -96,8 +106,9 @@ fn benchmark_sized<const N: usize, const HEAP_ONLY: bool>(
|
|||
|
||||
const BENCH_UNSAFE: bool = true;
|
||||
const BENCH_UNSAFE_ITER: bool = true;
|
||||
const BENCH_NON_NICHED: bool = false;
|
||||
const BENCH_NICHED: bool = false;
|
||||
const BENCH_UNSAFE_ITER_NICHED: bool = true;
|
||||
const BENCH_NON_NICHED: bool = true;
|
||||
const BENCH_NICHED: bool = true;
|
||||
|
||||
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
|
||||
if BENCH_UNSAFE {
|
||||
|
@ -111,6 +122,11 @@ fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
|
|||
b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes)))
|
||||
});
|
||||
}
|
||||
if BENCH_UNSAFE_ITER_NICHED {
|
||||
c.bench_function(name!(name, "dyn unsafe iter niched"), |b| {
|
||||
b.iter(|| hex_bytes_dyn_unsafe_iter_niched(black_box(bytes)))
|
||||
});
|
||||
}
|
||||
if BENCH_NON_NICHED {
|
||||
c.bench_function(name!(name, "dyn non-niched"), |b| {
|
||||
b.iter(|| hex_bytes_dyn(black_box(bytes)))
|
||||
|
@ -243,5 +259,16 @@ pub fn bench_1_6m(c: &mut Criterion) {
|
|||
benchmark_sized::<LEN, true>("[1.6m]", &hex_bytes, c);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_16, bench_256, bench_1_6m);
|
||||
criterion_main!(benches);
|
||||
pub fn bench_hex_digit(c: &mut Criterion) {
|
||||
let digit = ['5' as u8, 'b' as u8];
|
||||
c.bench_function(name!("micro", "hex_byte"), |b| {
|
||||
b.iter(|| hex_byte(black_box(digit[0]), black_box(digit[1])))
|
||||
});
|
||||
c.bench_function(name!("micro", "hex_byte_niched"), |b| {
|
||||
b.iter(|| hex_byte_niched(black_box(digit[0]), black_box(digit[1])))
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(decode_benches, bench_16, bench_256, bench_1_6m);
|
||||
criterion_group!(micro_benches, bench_hex_digit);
|
||||
criterion_main!(decode_benches, micro_benches);
|
||||
|
|
114
src/lib.rs
114
src/lib.rs
|
@ -73,7 +73,7 @@ pub const fn hex_digit(ascii: u8) -> u8 {
|
|||
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
|
||||
let msb = hex_digit(msb);
|
||||
let lsb = hex_digit(lsb);
|
||||
if msb == 255 || lsb == 255 {
|
||||
if (msb | lsb) == 255 {
|
||||
return None;
|
||||
}
|
||||
Some(msb << 4 | lsb)
|
||||
|
@ -87,6 +87,25 @@ pub const fn hex_byte_niched(msb: u8, lsb: u8) -> u16 {
|
|||
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
|
||||
}
|
||||
|
||||
/// Parses an ascii hex byte.
|
||||
#[inline]
|
||||
pub const fn hex_byte_packed([msb, lsb]: &[u8; 2]) -> Option<u8> {
|
||||
let msb = hex_digit(*msb);
|
||||
let lsb = hex_digit(*lsb);
|
||||
if (msb | lsb) == 255 {
|
||||
return None;
|
||||
}
|
||||
Some(msb << 4 | lsb)
|
||||
}
|
||||
|
||||
/// Parses an ascii hex byte. Any value > [`u8::MAX`] is invalid.
|
||||
#[inline]
|
||||
pub const fn hex_byte_packed_niched([msb, lsb]: &[u8; 2]) -> u16 {
|
||||
let msb = hex_digit(*msb) as u16;
|
||||
let lsb = hex_digit(*lsb) as u16;
|
||||
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
const fn align_down_to<const N: usize>(n: usize) -> usize {
|
||||
let shift = match N.checked_ilog2() {
|
||||
|
@ -107,19 +126,22 @@ const fn align_up_to<const N: usize>(n: usize) -> usize {
|
|||
|
||||
#[inline(always)]
|
||||
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
|
||||
debug_assert_eq!(ascii.len() >> 1 << 1, ascii.len(), "len of ascii is not a multiple of 2");
|
||||
debug_assert_eq!(ascii.len() >> 1, bytes.len(), "len of ascii is not twice that of bytes");
|
||||
let mut i = 0;
|
||||
// use the maximum batch size that would be supported by AVX-512
|
||||
const BATCH_SIZE: usize = 512 / 16;
|
||||
const VECTORED: bool = false;
|
||||
const VECTORED: bool = true;
|
||||
if VECTORED {
|
||||
while i < align_down_to::<BATCH_SIZE>(bytes.len()) {
|
||||
let mut buf = MaybeUninit::<u16>::uninit_array::<BATCH_SIZE>();
|
||||
let mut j = 0;
|
||||
while j < buf.len() {
|
||||
*unsafe { buf.get_unchecked_mut(j) } = MaybeUninit::new(hex_byte_niched(
|
||||
unsafe { *ascii.get_unchecked((i + j) << 1) },
|
||||
unsafe { *ascii.get_unchecked(((i + j) << 1) + 1) },
|
||||
));
|
||||
unsafe {
|
||||
*buf.get_unchecked_mut(j) = MaybeUninit::new(hex_byte_packed_niched(
|
||||
&*(ascii.as_ptr().add((i + j) << 1) as *const [u8; 2]),
|
||||
))
|
||||
};
|
||||
j += 1;
|
||||
}
|
||||
let buf = unsafe { MaybeUninit::array_assume_init(buf) };
|
||||
|
@ -191,9 +213,9 @@ pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[
|
|||
Some(Box::new([0u8; N]))
|
||||
} else {
|
||||
let mut bytes: Box<[MaybeUninit<u8>; N]> =
|
||||
unsafe { std::mem::transmute(Box::<[u8; N]>::new_uninit()) };
|
||||
unsafe { Box::<[MaybeUninit<u8>; N]>::new_uninit().assume_init() };
|
||||
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
|
||||
Some(unsafe { std::mem::transmute(bytes) })
|
||||
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -206,17 +228,11 @@ pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
|
|||
return None;
|
||||
}
|
||||
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match hex_byte(unsafe { *ascii.get_unchecked(i << 1) }, unsafe {
|
||||
*ascii.get_unchecked((i << 1) + 1)
|
||||
}) {
|
||||
Some(b) => bytes[i] = MaybeUninit::new(b),
|
||||
None => return None,
|
||||
}
|
||||
i += 1;
|
||||
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
|
||||
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
Some(unsafe { std::mem::transmute(bytes) })
|
||||
}
|
||||
|
||||
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||
|
@ -236,21 +252,30 @@ pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
|
|||
return None;
|
||||
}
|
||||
}
|
||||
Some(unsafe { std::mem::transmute(bytes) })
|
||||
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||
}
|
||||
|
||||
pub fn hex_bytes_dyn_unsafe_iter_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||
let len = ascii.len() >> 1;
|
||||
if len << 1 != ascii.len() {
|
||||
return None;
|
||||
}
|
||||
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
|
||||
for (i, b) in ascii
|
||||
.array_chunks::<2>()
|
||||
.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
|
||||
.enumerate()
|
||||
{
|
||||
if b & 0xff_00 == 0 {
|
||||
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b as u8) };
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8]) })
|
||||
}
|
||||
|
||||
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||
// let mut bytes: Box<[u8]> = unsafe { std::mem::transmute(Box::<[u8]>::new_zeroed_slice(len)) };
|
||||
// for (i, o) in ascii.array_chunks::<2>()
|
||||
// .map(|[msb, lsb]| hex_byte(msb, lsb))
|
||||
// .enumerate() {
|
||||
// if let Some(b) = o {
|
||||
// bytes[i] = b;
|
||||
// } else {
|
||||
// return None;
|
||||
// }
|
||||
// }
|
||||
// Some(bytes)
|
||||
let iter = ascii.array_chunks::<2>();
|
||||
if iter.remainder().len() != 0 {
|
||||
return None;
|
||||
|
@ -260,36 +285,13 @@ pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
|
|||
.map(|v| v.into_boxed_slice())
|
||||
}
|
||||
|
||||
struct ExtendRef<'a, T>(&'a mut T);
|
||||
|
||||
impl<'a, T, A> Extend<A> for ExtendRef<'a, T>
|
||||
where
|
||||
T: Extend<A>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
|
||||
self.0.extend(iter)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn extend_one(&mut self, item: A) {
|
||||
self.0.extend_one(item)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn extend_reserve(&mut self, additional: usize) {
|
||||
self.0.extend_reserve(additional)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
|
||||
let iter = ascii.array_chunks::<2>();
|
||||
if iter.remainder().len() != 0 {
|
||||
return None;
|
||||
}
|
||||
iter.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
|
||||
.map(std::convert::TryFrom::try_from)
|
||||
.map(Result::ok)
|
||||
.map(|b| if b & 0xff_00 == 0 { Some(b as u8) } else { None })
|
||||
.collect::<Option<Vec<u8>>>()
|
||||
.map(|v| v.into_boxed_slice())
|
||||
}
|
||||
|
@ -313,6 +315,10 @@ mod test {
|
|||
for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
|
||||
assert_eq!(hex_digit(*digit as u8), i as u8);
|
||||
}
|
||||
|
||||
for (i, digit) in HEX_DIGITS_UPPER.into_iter().enumerate() {
|
||||
assert_eq!(hex_digit(*digit as u8), i as u8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue