fast-hex/src/lib.rs

#![feature(array_chunks)]
#![feature(const_slice_index)]
#![feature(const_trait_impl)]
#![feature(extend_one)]
#![feature(generic_const_exprs)]
#![feature(int_log)]
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_uninit_array)]
#![feature(maybe_uninit_array_assume_init)]
#![feature(const_maybe_uninit_array_assume_init)]
#![feature(const_maybe_uninit_uninit_array)]
#![feature(new_uninit)]
#![feature(portable_simd)]

pub(crate) mod util;

pub(crate) mod simd;

use std::mem::MaybeUninit;
use std::simd::*;

// use the maximum batch size that would be supported by AVX-512
//pub const SIMD_WIDTH: usize = 512;
pub const SIMD_WIDTH: usize = 256;

/// The batch size used for the "wide" decoded hex bytes (any bit in the upper half indicates an error).
pub const WIDE_BATCH_SIZE: usize = SIMD_WIDTH / 16;

/// The batch size used for the hex digits.
pub const DIGIT_BATCH_SIZE: usize = WIDE_BATCH_SIZE * 2;

#[inline]
const fn alternating_mask<const N: usize>(first_bias: bool) -> [bool; N] {
    let mut mask = [false; N];
    let mut i = 0;
    if first_bias {
        while i < N / 2 {
            mask[i * 2] = true;
            i += 1;
        }
    } else {
        while i < N / 2 {
            mask[i * 2 + 1] = true;
            i += 1;
        }
    }
    mask
}

#[inline]
const fn msb_lsb_indices<const N: usize>() -> [usize; N] {
    if N % 2 != 0 {
        panic!("Illegal N");
    }

    let mut indices = [0; N];
    let mut i = 0;
    while i < N / 2 {
        indices[i] = i * 2;
        indices[N / 2 + i] = i * 2 + 1;
        i += 1;
    }

    indices
}

#[inline]
const fn alternating_indices<const N: usize>(first_bias: bool) -> [usize; N] {
    let mut indices = [0; N];
    let mut i = 0;
    if first_bias {
        while i < N {
            indices[i] = i * 2;
            i += 1;
        }
    } else {
        while i < N {
            indices[i] = i * 2 + 1;
            i += 1;
        }
    }
    indices
}

const MSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(true);
const LSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(false);

pub const INVALID_BIT: u8 = 0b1000_0000;

pub const WIDE_INVALID_BIT: u16 = 0b1000_1000_0000_0000;

const ASCII_DIGITS: [u8; 256] = {
    let mut digits = [0u8; 256];
    let mut i = u8::MIN;
    while i < u8::MAX {
        const DIGIT_MIN: u8 = '0' as u8;
        const DIGIT_MAX: u8 = '9' as u8;
        const LOWER_MIN: u8 = 'a' as u8;
        const LOWER_MAX: u8 = 'f' as u8;
        const UPPER_MIN: u8 = 'A' as u8;
        const UPPER_MAX: u8 = 'F' as u8;

        digits[i as usize] = match i {
            DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
            LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
            UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
            _ => INVALID_BIT,
        };

        i += 1;
    }
    digits
};

/// Returns [`INVALID_BIT`] if invalid. Based on `char.to_digit()` in the stdlib.
#[inline]
pub const fn hex_digit(ascii: u8) -> u8 {
    // use std::ops::RangeInclusive;
    // const DIGIT_MIN: u8 = '0' as u8;
    // const DIGIT_MAX: u8 = '9' as u8;
    // const LOWER_MIN: u8 = 'a' as u8;
    // const LOWER_MAX: u8 = 'f' as u8;
    // const UPPER_MIN: u8 = 'A' as u8;
    // const UPPER_MAX: u8 = 'F' as u8;
    // match ascii {
    //     DIGIT_MIN..=DIGIT_MAX => ascii - DIGIT_MIN,
    //     LOWER_MIN..=LOWER_MAX => 10 + ascii - LOWER_MIN,
    //     UPPER_MIN..=UPPER_MAX => 10 + ascii - UPPER_MIN,
    //     _ => INVALID_BIT,
    // }
    ASCII_DIGITS[ascii as usize]
    // let mut digit = ascii.wrapping_sub('0' as u8);
    // if digit < 10 {
    //     return digit;
    // }
    // // Force the 6th bit to be set to ensure ascii is lower case.
    // digit = (ascii | 0b10_0000).wrapping_sub('a' as u8);
    // if digit < 6 {
    //     return digit + 10;
    // }
    // return INVALID_BIT;
}

#[inline(always)]
pub fn hex_digit_simd<const LANES: usize>(ascii: Simd<u8, LANES>) -> Simd<u8, LANES>
where
    LaneCount<LANES>: SupportedLaneCount,
{
    unsafe {
        Simd::gather_select_unchecked(
            &ASCII_DIGITS,
            Mask::splat(true),
            ascii.cast(),
            simd::splat_0::<LANES>(),
        )
    }
}

/// Parses an ascii hex byte.
#[inline(always)]
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
    let msb = hex_digit(msb);
    let lsb = hex_digit(lsb);
    // second is faster (perhaps it pipelines better?)
    //if (msb | lsb) & INVALID_BIT != 0 {
    if (msb & INVALID_BIT) | (lsb & INVALID_BIT) != 0 {
        return None;
    }
    Some(msb << 4 | lsb)
}

/// A decoder for a single hex byte.
#[const_trait]
pub trait HexByteDecoder {
    /// Parses an ascii hex byte. Any return value exceeding [`u8::MAX`] indicates invalid input.
    fn decode_unpacked(hi: u8, lo: u8) -> u16;

    /// Parses an ascii hex byte. Any return value exceeding [`u8::MAX`] indicates invalid input.
    #[inline(always)]
    fn decode_packed([hi, lo]: &[u8; 2]) -> u16 {
        Self::decode_unpacked(*hi, *lo)
    }
}

/// A decoder for a sized batch of hex bytes.
pub trait HexByteSimdDecoder {
    /// Parses an ascii hex byte. Any element of the return value exceeding [`u8::MAX`] indicates invalid input.
    fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
}

pub struct HexByteDecoderA;

impl const HexByteDecoder for HexByteDecoderA {
    #[inline(always)]
    fn decode_unpacked(hi: u8, lo: u8) -> u16 {
        let hi = hex_digit(hi) as u16;
        let lo = hex_digit(lo) as u16;
        // might these these masks allow the ORs the be pipelined more efficiently?
        (hi << 4) | (lo & 0xf) | ((lo & 0xf0) << 8)
    }

    #[inline(always)]
    fn decode_packed([hi, lo]: &[u8; 2]) -> u16 {
        let hi = hex_digit(*hi) as u16;
        let lo = hex_digit(*lo) as u16;
        (hi << 4) | (lo & 0xf) | ((lo & 0xf0) << 8)
    }
}

impl HexByteSimdDecoder for HexByteDecoderA {
    #[inline(always)]
    fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>> {
        let hex_digits = hex_digit_simd::<DIGIT_BATCH_SIZE>(Simd::from_array(hi_los));
        if ((hex_digits & simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT))
            .simd_ne(simd::splat_0::<DIGIT_BATCH_SIZE>()))
        .any()
        {
            return None;
        }
        let msb = simd_swizzle!(hex_digits, MSB_INDICES);
        let lsb = simd_swizzle!(hex_digits, LSB_INDICES);
        /*let msb = msb.cast::<u16>();
        let lsb = lsb.cast::<u16>();
        let buf = msb << simd::splat_n::<WIDE_BATCH_SIZE>(4) | lsb | ((lsb & simd::splat_n::<WIDE_BATCH_SIZE>(0xf0)) << simd::splat_n::<WIDE_BATCH_SIZE>(8));
        if buf.simd_gt(simd::splat_n::<WIDE_BATCH_SIZE>(u8::MAX as u16)).any() {
            return None;
        }
        Some(buf.cast::<u8>())*/
        Some((msb << simd::splat_n::<WIDE_BATCH_SIZE>(4)) | lsb)
    }
}

pub struct HexByteDecoderB;

impl const HexByteDecoder for HexByteDecoderB {
    util::defer_impl! {
        => HexByteDecoderA;

        //fn decode_unpacked(hi: u8, lo: u8) -> u16;

        //fn decode_packed(hi_lo: &[u8; 2]) -> u16;
    }

    #[inline(always)]
    fn decode_unpacked(hi: u8, lo: u8) -> u16 {
        let lo = hex_digit(lo) as u16;
        let hi = hex_digit(hi) as u16;
        // kind of bizarre: changing the order of these decreases perf by 6-12%
        (hi << 4) | lo | ((lo & INVALID_BIT as u16) << 8)
    }

    #[inline(always)]
    fn decode_packed([hi, lo]: &[u8; 2]) -> u16 {
        let lo = hex_digit(*lo) as u16;
        let hi = hex_digit(*hi) as u16;
        (hi << 4) | lo | ((lo & INVALID_BIT as u16) << 8)
    }
}

impl HexByteSimdDecoder for HexByteDecoderB {
    util::defer_impl! {
        => HexByteDecoderA;

        //fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
    }

    #[inline(always)]
    fn decode_simd(mut hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>> {
        for b in hi_los.iter_mut() {
            *b = hex_digit(*b);
        }
        let hex_digits = Simd::from_array(hi_los);
        if (hex_digits & simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT))
            .simd_ne(simd::splat_0::<DIGIT_BATCH_SIZE>())
            .any()
        {
            //if hex_digits.simd_eq(simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT)).any() {
            return None;
        }
        let msb = simd_swizzle!(hex_digits, MSB_INDICES);
        let lsb = simd_swizzle!(hex_digits, LSB_INDICES);
        let mut v = Simd::from_array([0u8; WIDE_BATCH_SIZE]);
        for (i, v) in v.as_mut_array().iter_mut().enumerate() {
            let hi = unsafe { *msb.as_array().get_unchecked(i) };
            let lo = unsafe { *lsb.as_array().get_unchecked(i) };
            *v = (hi << 4) | lo;
        }
        Some(v)
        //msb << simd::splat_n::<WIDE_BATCH_SIZE>(4) | lsb | ((lsb & simd::splat_n::<WIDE_BATCH_SIZE>(0xf0)) << simd::splat_n::<WIDE_BATCH_SIZE>(8))
    }
}

pub type HBD = HexByteDecoderB;

pub mod conv {
    use std::simd::{LaneCount, Simd, SupportedLaneCount};

    /*trait Size {
        const N: usize;
    }

    macro_rules! size_impl {
        ($ident:ident($size:expr)) => {
            struct $ident;

            impl Size for $ident {
                const N: usize = $size;
            }
        };
        ($ident:ident<$size:ty>) => {
            size_impl!($ident(std::mem::size_of::<$size>()));
        };
    }

    struct SizeMul<const N: usize, T>(std::marker::PhantomData<T>);

    impl<const N: usize, T: Size> Size for SizeMul<N, T> {
        const N: usize = T::N * N;
    }

    size_impl!(SizeU8<u8>);
    size_impl!(SizeU16<u16>);
    size_impl!(SizeU32<u32>);
    size_impl!(SizeU64<u64>);

    trait SizeOf {
        type Size: Size;

        //const SIZE: usize;
    }

    //impl<T> SizeOf for T {
    //    const SIZE: usize = std::mem::size_of::<T>();
    //}

    macro_rules! size_of_impl {
        ($type:ty = $size:ident) => {
            impl SizeOf for $type {
                type Size = $size;
            }
        };
    }

    size_of_impl!(u8 = SizeU8);
    size_of_impl!([u8; 2] = SizeU16);
    size_of_impl!(u16 = SizeU16);
    size_of_impl!(u32 = SizeU32);
    size_of_impl!(u64 = SizeU64);*/

    #[allow(non_camel_case_types, non_snake_case)]
    union u8_u16<const N_u16: usize>
    where
        [u8; N_u16 * 2]:,
    {
        u8: [u8; N_u16 * 2],
        u16: [u16; N_u16],
    }

    #[allow(non_camel_case_types, non_snake_case)]
    union u8x2_u8<const N_u8x2: usize>
    where
        [u8; N_u8x2 * 2]:,
    {
        u8x2: [[u8; 2]; N_u8x2],
        u8: [u8; N_u8x2 * 2],
    }

    #[allow(non_camel_case_types, non_snake_case)]
    union SimdU8_SimdU16<const N_U16: usize>
    where
        LaneCount<{ N_U16 * 2 }>: SupportedLaneCount,
        LaneCount<N_U16>: SupportedLaneCount,
    {
        SimdU8: Simd<u8, { N_U16 * 2 }>,
        SimdU16: Simd<u16, N_U16>,
    }

    #[inline(always)]
    pub const fn u8_to_u16<const N_OUT: usize>(a: [u8; N_OUT * 2]) -> [u16; N_OUT] {
        unsafe { u8_u16 { u8: a }.u16 }
    }

    #[inline(always)]
    pub const fn u8x2_to_u8<const N_IN: usize>(a: [[u8; 2]; N_IN]) -> [u8; N_IN * 2] {
        unsafe { u8x2_u8 { u8x2: a }.u8 }
    }

    #[inline(always)]
    pub const fn simdu8_to_simdu16<const N_OUT: usize>(
        a: Simd<u8, { N_OUT * 2 }>,
    ) -> Simd<u16, N_OUT>
    where
        LaneCount<{ N_OUT * 2 }>: SupportedLaneCount,
        LaneCount<N_OUT>: SupportedLaneCount,
    {
        unsafe { SimdU8_SimdU16 { SimdU8: a }.SimdU16 }
    }
}

#[inline(always)]
const fn align_down_to<const N: usize>(n: usize) -> usize {
    let shift = match N.checked_ilog2() {
        Some(x) => x,
        None => 0,
    };
    return n >> shift << shift;
}

#[inline(always)]
const fn align_up_to<const N: usize>(n: usize) -> usize {
    let shift = match N.checked_ilog2() {
        Some(x) => x,
        None => 0,
    };
    return (n + (N - 1)) >> shift << shift;
}

macro_rules! decode_hex_bytes_non_vectored {
    ($i:ident, $ascii:ident, $bytes:ident, $o:expr) => {{
        while $i < $ascii.len() {
            match unsafe { hex_byte(*$ascii.get_unchecked($i), *$ascii.get_unchecked($i + 1)) } {
                Some(b) => unsafe { *$bytes.get_unchecked_mut($o + ($i >> 1)) = MaybeUninit::new(b) },
                None => {
                    //println!("bad hex byte at {} ({}{})", $i, $ascii[$i] as char, $ascii[$i + 1] as char);
                    return false
                }
            }
            $i += 2;
        }
    }};
}

#[inline(always)]
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
    debug_assert_eq!(
        ascii.len() >> 1 << 1,
        ascii.len(),
        "len of ascii is not a multiple of 2"
    );
    debug_assert_eq!(
        ascii.len() >> 1,
        bytes.len(),
        "len of ascii is not twice that of bytes"
    );
    const VECTORED_A: bool = false;
    const VECTORED_B: bool = false;
    const VECTORED_C: bool = false;
    if VECTORED_A {
        let mut i = 0;
        while i < align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
            let buf = HBD::decode_simd(unsafe {
                *(ascii.as_ptr().add(i) as *const [u8; DIGIT_BATCH_SIZE])
            });
            let buf = match buf {
                Some(buf) => buf,
                None => return false,
            };
            let mut j = 0;
            while j < DIGIT_BATCH_SIZE {
                unsafe {
                    *bytes.get_unchecked_mut((i >> 1) + j) =
                        MaybeUninit::new(*buf.as_array().get_unchecked(j))
                };
                j += 1;
            }
            i += DIGIT_BATCH_SIZE;
        }

        decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
    } else if VECTORED_B {
        let (ascii_pre, ascii_simd, ascii_post) =
            unsafe { ascii.align_to::<Simd<u8, DIGIT_BATCH_SIZE>>() };

        debug_assert_eq!(ascii_pre.len() % 2, 0);
        debug_assert_eq!(ascii_post.len() % 2, 0);

        let mut i = 0;
        decode_hex_bytes_non_vectored!(i, ascii_pre, bytes, 0);

        let mut i = 0;
        while i < ascii_simd.len() {
            // this to_array and any subsequent from_array should be eliminated anyway
            let buf = HBD::decode_simd(unsafe { ascii_simd.get_unchecked(i) }.to_array());
            let buf = match buf {
                Some(buf) => buf,
                None => return false,
            };
            let mut j = 0;
            let k = ascii_pre.len() + i * DIGIT_BATCH_SIZE;
            while j < DIGIT_BATCH_SIZE {
                unsafe {
                    *bytes.get_unchecked_mut(k + j) =
                        MaybeUninit::new(*buf.as_array().get_unchecked(j))
                };
                j += 1;
            }
            i += 1;
        }

        let mut i = 0;
        let k = ascii.len() - ascii_post.len();
        decode_hex_bytes_non_vectored!(i, ascii_post, bytes, k);
    } else if VECTORED_C {
        let mut i = 0;
        while i < align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
            let buf = HBD::decode_simd(unsafe {
                *(ascii.as_ptr().add(i) as *const [u8; DIGIT_BATCH_SIZE])
            });
            let buf = match buf {
                Some(buf) => buf,
                None => return false,
            };
            let mut j = 0;
            while j < DIGIT_BATCH_SIZE {
                unsafe {
                    *bytes.get_unchecked_mut((i >> 1) + j) =
                        MaybeUninit::new(*buf.as_array().get_unchecked(j))
                };
                j += 1;
            }
            i += DIGIT_BATCH_SIZE;
        }

        decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
    } else {
        let mut i = 0;
        decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
    }
    true
}

/// Use of this function should be restricted to `const` contexts because it is not vectorized like
/// the non-`const` alternative.
#[inline]
pub const fn hex_bytes_sized_const<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
    if N == 0 {
        Some([0u8; N])
    } else {
        let mut bytes = MaybeUninit::<u8>::uninit_array::<N>();
        let mut i = 0;
        while i < N * 2 {
            if i >> 1 >= bytes.len() {
                unsafe { std::hint::unreachable_unchecked() };
            }
            match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe {
                *ascii.get_unchecked(i + 1)
            }) {
                Some(b) => bytes[i >> 1] = MaybeUninit::new(b),
                None => return None,
            }
            i += 2;
        }
        Some(unsafe { MaybeUninit::array_assume_init(bytes) })
    }
}

#[inline]
pub fn hex_bytes_sized<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
    if N == 0 {
        Some([0u8; N])
    } else {
        let mut bytes = MaybeUninit::<u8>::uninit_array::<N>();
        if decode_hex_bytes_unchecked(ascii, &mut bytes) {
            Some(unsafe { MaybeUninit::array_assume_init(bytes) })
        } else {
            None
        }
    }
}

#[inline]
pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[u8; N]>> {
    if N == 0 {
        Some(Box::new([0u8; N]))
    } else {
        let mut bytes = unsafe { Box::<[MaybeUninit<u8>; N]>::new_uninit().assume_init() };
        if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
            Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
        } else {
            None
        }
    }
}

#[inline]
pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
    let len = ascii.len() >> 1;
    if len << 1 != ascii.len() {
        return None;
    }
    let mut bytes = Box::new_uninit_slice(len);
    if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
        Some(unsafe { Box::<[_]>::assume_init(bytes) })
    } else {
        None
    }
}

#[inline]
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
    let len = ascii.len() >> 1;
    if len << 1 != ascii.len() {
        return None;
    }
    let mut bytes = Box::<[u8]>::new_uninit_slice(len);
    for (i, o) in ascii
        .array_chunks::<2>()
        .map(|[msb, lsb]| hex_byte(*msb, *lsb))
        .enumerate()
    {
        if let Some(b) = o {
            unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
        } else {
            return None;
        }
    }
    Some(unsafe { Box::<[_]>::assume_init(bytes) })
}

#[inline]
pub fn hex_bytes_dyn_unsafe_iter_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
    let len = ascii.len() >> 1;
    if len << 1 != ascii.len() {
        return None;
    }
    let mut bytes = Box::<[u8]>::new_uninit_slice(len);
    for (i, b) in ascii
        .array_chunks::<2>()
        .map(HBD::decode_packed)
        .enumerate()
    {
        if b & WIDE_INVALID_BIT != 0 {
            return None;
        }
        unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b as u8) };
    }
    Some(unsafe { Box::<[_]>::assume_init(bytes) })
}

#[inline]
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
    let iter = ascii.array_chunks::<2>();
    if iter.remainder().len() != 0 {
        return None;
    }
    iter.map(|[msb, lsb]| hex_byte(*msb, *lsb))
        .collect::<Option<Vec<u8>>>()
        .map(|v| v.into_boxed_slice())
}

#[inline]
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
    let iter = ascii.array_chunks::<2>();
    if iter.remainder().len() != 0 {
        return None;
    }
    iter.map(HBD::decode_packed)
        .map(|b| {
            if b & WIDE_INVALID_BIT != 0 {
                None
            } else {
                Some(b as u8)
            }
        })
        .collect::<Option<Vec<u8>>>()
        .map(|v| v.into_boxed_slice())
}

#[cfg(test)]
mod test {
    use super::*;

    const BYTES: &[u8] = b"Donald J. Trump!";
    const HEX_BYTES: &[u8] = b"446F6E616C64204A2E205472756D7021";

    const LONG_BYTES: &[u8] = b"Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
    const LONG_HEX_BYTES: &[u8] = b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";

    struct Sample {
        bytes: &'static [u8],
        hex_bytes: &'static [u8],
    }

    const SAMPLES: &[Sample] = &[
        Sample {
            bytes: BYTES,
            hex_bytes: HEX_BYTES,
        },
        Sample {
            bytes: LONG_BYTES,
            hex_bytes: LONG_HEX_BYTES,
        },
    ];

    const INVALID_SAMPLES: &[&[u8]] = &[
        b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
        b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
        b"446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
        b"446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
    ];

    #[test]
    fn test_hex_digit() {
        const HEX_DIGITS_LOWER: &[char; 16] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
        ];
        const HEX_DIGITS_UPPER: &[char; 16] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
        ];

        for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
            assert_eq!(hex_digit(*digit as u8), i as u8);
        }

        for (i, digit) in HEX_DIGITS_UPPER.into_iter().enumerate() {
            assert_eq!(hex_digit(*digit as u8), i as u8);
        }
    }

    #[test]
    fn test_hex_digit_simd() {
        const HEX_DIGITS: &[char; DIGIT_BATCH_SIZE] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '0',
            '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
            'F',
            //            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
        ];

        let mut set8 = [0u8; DIGIT_BATCH_SIZE];
        for (c, b) in HEX_DIGITS.iter().zip(set8.iter_mut()) {
            *b = *c as u8;
        }
        let mut sete = [0u8; DIGIT_BATCH_SIZE];
        for i in 0..(DIGIT_BATCH_SIZE) {
            sete[i] = (i as u8) % 16;
        }
        assert_eq!(
            hex_digit_simd::<DIGIT_BATCH_SIZE>(Simd::from_array(set8)),
            Simd::from_array(sete)
        );
    }

    #[test]
    fn test_hex_byte() {
        const HEX_BYTES_VALID: &[([u8; 2], u8)] = &[
            (['f' as u8, 'f' as u8], 0xff),
            (['0' as u8, '0' as u8], 0x00),
            (['1' as u8, '1' as u8], 0x11),
            (['e' as u8, 'f' as u8], 0xef),
            (['f' as u8, 'e' as u8], 0xfe),
            (['0' as u8, 'f' as u8], 0x0f),
            (['f' as u8, '0' as u8], 0xf0),
        ];

        for (hb, b) in HEX_BYTES_VALID {
            assert_eq!(hex_byte(hb[0], hb[1]), Some(*b));

            assert_eq!(HexByteDecoderA::decode_unpacked(hb[0], hb[1]), *b as u16);
            assert_eq!(HexByteDecoderB::decode_unpacked(hb[0], hb[1]), *b as u16);

            assert_eq!(HexByteDecoderA::decode_packed(hb), *b as u16);
            assert_eq!(HexByteDecoderB::decode_packed(hb), *b as u16);
        }

        const HEX_BYTES_INVALID: &[[u8; 2]] = &[
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['1' as u8, 'g' as u8],
            ['e' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
        ];

        for hb in HEX_BYTES_INVALID {
            assert_eq!(hex_byte(hb[0], hb[1]), None);
            assert_ne!(
                HexByteDecoderA::decode_unpacked(hb[0], hb[1]) & WIDE_INVALID_BIT,
                0
            );
            assert_ne!(
                HexByteDecoderB::decode_unpacked(hb[0], hb[1]) & WIDE_INVALID_BIT,
                0
            );

            assert_ne!(HexByteDecoderA::decode_packed(hb) & WIDE_INVALID_BIT, 0);
            assert_ne!(HexByteDecoderB::decode_packed(hb) & WIDE_INVALID_BIT, 0);
        }
    }

    #[test]
    fn test_hex_byte_simd() {
        const HEX_BYTES_VALID: [[u8; 2]; WIDE_BATCH_SIZE] = [
            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34", *b"ff", *b"00", *b"11",
            *b"ef", *b"fe", *b"0f", *b"f0",
            *b"34",
            //            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",

            //            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",
        ];
        const BYTES_VALID: [u8; WIDE_BATCH_SIZE] = [
            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34, 0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f,
            0xf0,
            0x34,
            //            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34,

            //            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34,
        ];

        let hex_bytes = conv::u8x2_to_u8(HEX_BYTES_VALID);
        let bytes = Simd::from_array(BYTES_VALID);
        println!("hex_bytes: {HEX_BYTES_VALID:02x?}");
        println!("hex_bytes: {hex_bytes:02x?}");
        println!("bytes: {BYTES_VALID:02x?}");
        println!("bytes: {bytes:04x?}");
        assert_eq!(HexByteDecoderA::decode_simd(hex_bytes), Some(bytes));
        assert_eq!(HexByteDecoderB::decode_simd(hex_bytes), Some(bytes));

        /*const HEX_BYTES_INVALID: &[[u8; 2]] = &[
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['1' as u8, 'g' as u8],
            ['e' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
        ];

        for hb in HEX_BYTES_INVALID {
            assert_eq!(hex_byte(hb[0], hb[1]), None);
            assert!(hex_byte_niched(hb[0], hb[1]) & WIDE_INVALID_BIT != 0);
        }*/
    }

    fn test_f(f: fn(&[u8]) -> Option<Box<[u8]>>) {
        for (i, Sample { bytes, hex_bytes }) in SAMPLES.into_iter().enumerate() {
            let result = f(hex_bytes);
            assert_eq!(
                Some(*bytes),
                result.as_ref().map(Box::as_ref),
                "Sample {i} did not decode correctly"
            );
        }

        for (i, hex_bytes) in INVALID_SAMPLES.into_iter().enumerate() {
            let result = f(hex_bytes);
            assert_eq!(
                None,
                result.as_ref().map(Box::as_ref),
                "Sample {i} did not decode correctly"
            );
        }
    }

    #[test]
    fn test_dyn_iter_option() {
        test_f(hex_bytes_dyn);
    }

    #[test]
    fn test_dyn_iter_u16() {
        test_f(hex_bytes_dyn_niched);
    }

    #[test]
    fn test_dyn_unsafe() {
        test_f(hex_bytes_dyn_unsafe);
    }
}