Initial commit

This commit is contained in:
Michael Pfaff 2022-10-23 11:28:06 -04:00
commit df17979b94
Signed by: michael
GPG Key ID: CF402C4A012AA9D4
4 changed files with 271 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target/
Cargo.lock

13
Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "fast-hex"
version = "0.1.0"
edition = "2021"
[dependencies]
[dev-dependencies]
criterion = "0.3"
[[bench]]
name = "bench"
harness = false

35
benches/bench.rs Normal file
View File

@ -0,0 +1,35 @@
#![feature(generic_const_exprs)]
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use fast_hex::*;
const ASCII_BYTES: &[u8; 16] = b"Donald J. Trump!";
const HEX_BYTES: &[u8; ASCII_BYTES.len() * 2] = b"446F6E616C64204A2E205472756D7021";
const ASCII_BYTES_LONG: &[u8; 256] = b"Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!Donald J. Trump!";
const HEX_BYTES_LONG: &[u8; ASCII_BYTES_LONG.len() * 2] = b"446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021446F6E616C64204A2E205472756D7021";
fn benchmark<const N: usize>(bytes: &[u8; N * 2]) -> impl FnOnce(&mut Criterion) -> () + '_
where
[(); N * 2]:
{
|c| {
c.bench_function("sized", |b| b.iter(|| hex_bytes::<N>(black_box(bytes))));
c.bench_function("dyn unsafe", |b| b.iter(|| hex_bytes_dyn_unsafe(black_box(bytes))));
c.bench_function("dyn unsafe for", |b| b.iter(|| hex_bytes_dyn_unsafe_for(black_box(bytes))));
c.bench_function("dyn unsafe iter", |b| b.iter(|| hex_bytes_dyn_unsafe_iter(black_box(bytes))));
c.bench_function("dyn non-niched", |b| b.iter(|| hex_bytes_dyn(black_box(bytes))));
c.bench_function("dyn niched", |b| b.iter(|| hex_bytes_dyn_niched(black_box(bytes))));
}
}
pub fn bench_16(c: &mut Criterion) {
benchmark::<{ ASCII_BYTES.len() }>(HEX_BYTES)(c)
}
pub fn bench_256(c: &mut Criterion) {
benchmark::<{ ASCII_BYTES_LONG.len() }>(HEX_BYTES_LONG)(c)
}
criterion_group!(benches, bench_16, bench_256);
criterion_main!(benches);

221
src/lib.rs Normal file
View File

@ -0,0 +1,221 @@
#![feature(array_chunks)]
#![feature(extend_one)]
#![feature(generic_const_exprs)]
#![feature(maybe_uninit_slice)]
#![feature(new_uninit)]
use std::fmt;
use std::mem::MaybeUninit;
#[inline]
const fn __make_ascii_digit_table() -> [u8; 256] {
let mut digits = [0u8; 256];
let mut i = u8::MIN;
while i < u8::MAX {
const DIGIT_MIN: u8 = '0' as u8;
const DIGIT_MAX: u8 = '9' as u8;
const LOWER_MIN: u8 = 'a' as u8;
const LOWER_MAX: u8 = 'z' as u8;
const UPPER_MIN: u8 = 'A' as u8;
const UPPER_MAX: u8 = 'Z' as u8;
digits[i as usize] = match i {
DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
_ => 255,
};
i += 1;
}
digits
}
const ASCII_DIGITS: [u8; 256] = __make_ascii_digit_table();
/// Returns 255 if invalid. Based on `char.to_digit()` in the stdlib.
#[inline]
pub const fn hex_digit(ascii: u8) -> u8 {
// use std::ops::RangeInclusive;
// const DIGIT_MIN: u8 = '0' as u8;
// const DIGIT_MAX: u8 = '9' as u8;
// const LOWER_MIN: u8 = 'a' as u8;
// const LOWER_MAX: u8 = 'z' as u8;
// const UPPER_MIN: u8 = 'A' as u8;
// const UPPER_MAX: u8 = 'Z' as u8;
// match ascii {
// DIGIT_MIN..=DIGIT_MAX => ascii - DIGIT_MIN,
// LOWER_MIN..=LOWER_MAX => 10 + ascii - LOWER_MIN,
// UPPER_MIN..=UPPER_MAX => 10 + ascii - UPPER_MIN,
// _ => 255,
// }
ASCII_DIGITS[ascii as usize]
// let mut digit = ascii.wrapping_sub('0' as u8);
// if digit < 10 {
// return digit;
// }
// // Force the 6th bit to be set to ensure ascii is lower case.
// digit = (ascii | 0b10_0000).wrapping_sub('a' as u8);
// if digit < 6 {
// return digit + 10;
// }
// return 255;
}
/// Parses an ascii hex byte.
#[inline]
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
let msb = hex_digit(msb);
let lsb = hex_digit(lsb);
if msb == 255 || lsb == 255 {
return None;
}
Some(msb << 4 | lsb)
}
/// Parses an ascii hex byte. Any value > [`u8::MAX`] is invalid.
#[inline]
pub const fn hex_byte_niched(msb: u8, lsb: u8) -> u16 {
let msb = hex_digit(msb) as u16;
let lsb = hex_digit(lsb) as u16;
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
}
#[inline]
pub const fn hex_bytes<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
let mut bytes = [0u8; N];
let mut i = 0;
while i < bytes.len() {
bytes[i] = match hex_byte(ascii[i], ascii[i + 1]) {
Some(b) => b,
None => return None,
};
i += 1;
}
Some(bytes)
}
pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
let mut i = 0;
while i < bytes.len() {
match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe { *ascii.get_unchecked(i + 1) }) {
Some(b) => bytes[i] = MaybeUninit::new(b),
None => return None,
}
i += 1;
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn_unsafe_for(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for i in 0..bytes.len() {
match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe { *ascii.get_unchecked(i + 1) }) {
Some(b) => bytes[i] = MaybeUninit::new(b),
None => return None,
}
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for (i, o) in ascii.array_chunks::<2>()
.map(|[msb, lsb]| hex_byte(*msb, *lsb))
.enumerate() {
if let Some(b) = o {
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
} else {
return None;
}
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
// let mut bytes: Box<[u8]> = unsafe { std::mem::transmute(Box::<[u8]>::new_zeroed_slice(len)) };
// for (i, o) in ascii.array_chunks::<2>()
// .map(|[msb, lsb]| hex_byte(msb, lsb))
// .enumerate() {
// if let Some(b) = o {
// bytes[i] = b;
// } else {
// return None;
// }
// }
// Some(bytes)
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter
.map(|[msb, lsb]| hex_byte(*msb, *lsb))
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
struct ExtendRef<'a, T>(&'a mut T);
impl<'a, T, A> Extend<A> for ExtendRef<'a, T> where T: Extend<A> {
#[inline(always)]
fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
self.0.extend(iter)
}
#[inline(always)]
fn extend_one(&mut self, item: A) {
self.0.extend_one(item)
}
#[inline(always)]
fn extend_reserve(&mut self, additional: usize) {
self.0.extend_reserve(additional)
}
}
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter
.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
.map(std::convert::TryFrom::try_from)
.map(Result::ok)
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
#[cfg(test)]
mod test {
use super::*;
const ASCII_BYTES: &[u8] = b"Donald J. Trump!";
const HEX_BYTES: &[u8] = b"446F6E616C64204A2E205472756D7021";
#[test]
fn test_non_niched() {
let result = hex_bytes_dyn(HEX_BYTES);
assert_eq!(Some(ASCII_BYTES), result.as_ref().map(Box::as_ref));
}
#[test]
fn test_niched() {
let result = hex_bytes_dyn_niched(HEX_BYTES);
assert_eq!(Some(ASCII_BYTES), result.as_ref().map(Box::as_ref));
}
}