fast-hex/src/lib.rs

222 lines
6.2 KiB
Rust

#![feature(array_chunks)]
#![feature(extend_one)]
#![feature(generic_const_exprs)]
#![feature(maybe_uninit_slice)]
#![feature(new_uninit)]
use std::fmt;
use std::mem::MaybeUninit;
#[inline]
const fn __make_ascii_digit_table() -> [u8; 256] {
let mut digits = [0u8; 256];
let mut i = u8::MIN;
while i < u8::MAX {
const DIGIT_MIN: u8 = '0' as u8;
const DIGIT_MAX: u8 = '9' as u8;
const LOWER_MIN: u8 = 'a' as u8;
const LOWER_MAX: u8 = 'z' as u8;
const UPPER_MIN: u8 = 'A' as u8;
const UPPER_MAX: u8 = 'Z' as u8;
digits[i as usize] = match i {
DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
_ => 255,
};
i += 1;
}
digits
}
const ASCII_DIGITS: [u8; 256] = __make_ascii_digit_table();
/// Returns 255 if invalid. Based on `char.to_digit()` in the stdlib.
#[inline]
pub const fn hex_digit(ascii: u8) -> u8 {
// use std::ops::RangeInclusive;
// const DIGIT_MIN: u8 = '0' as u8;
// const DIGIT_MAX: u8 = '9' as u8;
// const LOWER_MIN: u8 = 'a' as u8;
// const LOWER_MAX: u8 = 'z' as u8;
// const UPPER_MIN: u8 = 'A' as u8;
// const UPPER_MAX: u8 = 'Z' as u8;
// match ascii {
// DIGIT_MIN..=DIGIT_MAX => ascii - DIGIT_MIN,
// LOWER_MIN..=LOWER_MAX => 10 + ascii - LOWER_MIN,
// UPPER_MIN..=UPPER_MAX => 10 + ascii - UPPER_MIN,
// _ => 255,
// }
ASCII_DIGITS[ascii as usize]
// let mut digit = ascii.wrapping_sub('0' as u8);
// if digit < 10 {
// return digit;
// }
// // Force the 6th bit to be set to ensure ascii is lower case.
// digit = (ascii | 0b10_0000).wrapping_sub('a' as u8);
// if digit < 6 {
// return digit + 10;
// }
// return 255;
}
/// Parses an ascii hex byte.
#[inline]
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
let msb = hex_digit(msb);
let lsb = hex_digit(lsb);
if msb == 255 || lsb == 255 {
return None;
}
Some(msb << 4 | lsb)
}
/// Parses an ascii hex byte. Any value > [`u8::MAX`] is invalid.
#[inline]
pub const fn hex_byte_niched(msb: u8, lsb: u8) -> u16 {
let msb = hex_digit(msb) as u16;
let lsb = hex_digit(lsb) as u16;
(msb << 4) | (lsb & 0xf) | ((lsb & 0xf0) << 8)
}
#[inline]
pub const fn hex_bytes<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
let mut bytes = [0u8; N];
let mut i = 0;
while i < bytes.len() {
bytes[i] = match hex_byte(ascii[i], ascii[i + 1]) {
Some(b) => b,
None => return None,
};
i += 1;
}
Some(bytes)
}
pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
let mut i = 0;
while i < bytes.len() {
match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe { *ascii.get_unchecked(i + 1) }) {
Some(b) => bytes[i] = MaybeUninit::new(b),
None => return None,
}
i += 1;
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn_unsafe_for(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for i in 0..bytes.len() {
match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe { *ascii.get_unchecked(i + 1) }) {
Some(b) => bytes[i] = MaybeUninit::new(b),
None => return None,
}
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
let len = ascii.len() >> 1;
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for (i, o) in ascii.array_chunks::<2>()
.map(|[msb, lsb]| hex_byte(*msb, *lsb))
.enumerate() {
if let Some(b) = o {
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
} else {
return None;
}
}
Some(unsafe { std::mem::transmute(bytes) })
}
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
// let mut bytes: Box<[u8]> = unsafe { std::mem::transmute(Box::<[u8]>::new_zeroed_slice(len)) };
// for (i, o) in ascii.array_chunks::<2>()
// .map(|[msb, lsb]| hex_byte(msb, lsb))
// .enumerate() {
// if let Some(b) = o {
// bytes[i] = b;
// } else {
// return None;
// }
// }
// Some(bytes)
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter
.map(|[msb, lsb]| hex_byte(*msb, *lsb))
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
struct ExtendRef<'a, T>(&'a mut T);
impl<'a, T, A> Extend<A> for ExtendRef<'a, T> where T: Extend<A> {
#[inline(always)]
fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
self.0.extend(iter)
}
#[inline(always)]
fn extend_one(&mut self, item: A) {
self.0.extend_one(item)
}
#[inline(always)]
fn extend_reserve(&mut self, additional: usize) {
self.0.extend_reserve(additional)
}
}
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter
.map(|[msb, lsb]| hex_byte_niched(*msb, *lsb))
.map(std::convert::TryFrom::try_from)
.map(Result::ok)
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
#[cfg(test)]
mod test {
use super::*;
const ASCII_BYTES: &[u8] = b"Donald J. Trump!";
const HEX_BYTES: &[u8] = b"446F6E616C64204A2E205472756D7021";
#[test]
fn test_non_niched() {
let result = hex_bytes_dyn(HEX_BYTES);
assert_eq!(Some(ASCII_BYTES), result.as_ref().map(Box::as_ref));
}
#[test]
fn test_niched() {
let result = hex_bytes_dyn_niched(HEX_BYTES);
assert_eq!(Some(ASCII_BYTES), result.as_ref().map(Box::as_ref));
}
}