fast-hex/src/decode.rs

#![cfg_attr(not(feature = "std"), no_std)]
#![feature(array_chunks)]
#![feature(const_slice_index)]
#![feature(const_trait_impl)]
#![feature(extend_one)]
#![feature(generic_const_exprs)]
#![feature(int_log)]
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_uninit_array)]
#![feature(maybe_uninit_array_assume_init)]
#![feature(const_maybe_uninit_array_assume_init)]
#![feature(const_maybe_uninit_uninit_array)]
#![cfg_attr(feature = "alloc", feature(new_uninit))]
#![feature(portable_simd)]

pub(crate) mod util;

pub(crate) mod simd;

#[cfg(feature = "alloc")]
extern crate alloc;

use core::mem::MaybeUninit;
use core::simd::*;

#[cfg(feature = "alloc")]
use alloc::{boxed::Box, vec::Vec};

use simd::SimdTestAnd as _;
use simd::SimdBitwise as _;

use util::array_op;

// use the maximum batch size that would be supported by AVX-512
//pub const SIMD_WIDTH: usize = 512;
pub const SIMD_WIDTH: usize = 256;

/// The batch size used for the "wide" decoded hex bytes (any bit in the upper half indicates an error).
pub const WIDE_BATCH_SIZE: usize = SIMD_WIDTH / 16;

/// The batch size used for the hex digits.
pub const DIGIT_BATCH_SIZE: usize = WIDE_BATCH_SIZE * 2;

const GATHER_BATCH_SIZE: usize = DIGIT_BATCH_SIZE / 4;

macro_rules! if_trace_simd {
    ($( $tt:tt )*) => {
        // disabled
        //{ $( $tt )* }
    };
}

const VALIDATE: bool = true;

#[inline]
const fn alternating_indices<const N: usize>(first_bias: bool) -> [usize; N] {
    if first_bias {
        array_op!(gen[N] |i| i * 2)
    } else {
        array_op!(gen[N] |i| i * 2 + 1)
    }
}

#[inline]
const fn cast_u8_u32<const N: usize>(arr: [u8; N]) -> [u32; N] {
    array_op!(map[N, arr] |_, v| v as u32)
}

const MSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(true);
const LSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(false);

pub const INVALID_BIT: u8 = 0b1000_0000;

pub const WIDE_INVALID_BIT: u16 = 0b1000_1000_0000_0000;

const ASCII_DIGITS: [u8; 256] = {
    array_op!(gen[256] |i| {
        const DIGIT_MIN: u8 = '0' as u8;
        const DIGIT_MAX: u8 = '9' as u8;
        const LOWER_MIN: u8 = 'a' as u8;
        const LOWER_MAX: u8 = 'f' as u8;
        const UPPER_MIN: u8 = 'A' as u8;
        const UPPER_MAX: u8 = 'F' as u8;

        let i = i as u8;
        match i {
            DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
            LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
            UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
            _ => INVALID_BIT,
        }
    })
};

const __ASCII_DIGITS_SIMD: [u32; 256] = cast_u8_u32(ASCII_DIGITS);

const ASCII_DIGITS_SIMD: *const i32 = &__ASCII_DIGITS_SIMD as *const u32 as *const i32;

/// Returns [`INVALID_BIT`] if invalid. Based on `char.to_digit()` in the stdlib.
#[inline]
pub const fn hex_digit(ascii: u8) -> u8 {
    ASCII_DIGITS[ascii as usize]
}

#[inline(always)]
pub fn hex_digit_simd<const LANES: usize>(ascii: Simd<u8, LANES>) -> Simd<u8, LANES>
where
    LaneCount<LANES>: SupportedLaneCount,
{
    unsafe {
        Simd::gather_select_unchecked(
            &ASCII_DIGITS,
            Mask::splat(true),
            ascii.cast(),
            simd::splat_0u8::<LANES>(),
        )
    }
}

/// Parses an ascii hex byte.
#[inline(always)]
pub const fn hex_byte(msb: u8, lsb: u8) -> Option<u8> {
    let msb = hex_digit(msb);
    let lsb = hex_digit(lsb);
    // second is faster (perhaps it pipelines better?)
    //if (msb | lsb) & INVALID_BIT != 0 {
    if (msb & INVALID_BIT) | (lsb & INVALID_BIT) != 0 {
        return None;
    }
    Some(msb << 4 | lsb)
}

/// A decoder for a single hex byte.
#[const_trait]
pub trait HexByteDecoder {
    /// Parses an ascii hex byte. Any return value exceeding [`u8::MAX`] indicates invalid input.
    fn decode_unpacked(hi: u8, lo: u8) -> u16;

    /// Parses an ascii hex byte. Any return value exceeding [`u8::MAX`] indicates invalid input.
    #[inline(always)]
    fn decode_packed([hi, lo]: &[u8; 2]) -> u16 {
        Self::decode_unpacked(*hi, *lo)
    }
}

/// A decoder for a sized batch of hex bytes.
pub trait HexByteSimdDecoder {
    /// Parses an ascii hex byte. Any element of the return value exceeding [`u8::MAX`] indicates invalid input.
    fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
}

pub struct HexByteDecoderA;

impl const HexByteDecoder for HexByteDecoderA {
//    util::defer_impl! {
//        => HexByteDecoderA;
//
//        fn decode_unpacked(hi: u8, lo: u8) -> u16;
//
//        fn decode_packed(hi_lo: &[u8; 2]) -> u16;
//    }

    #[inline(always)]
    fn decode_unpacked(hi: u8, lo: u8) -> u16 {
        let lo = hex_digit(lo) as u16;
        let hi = hex_digit(hi) as u16;
        // kind of bizarre: changing the order of these decreases perf by 6-12%
        (hi << 4) | lo | ((lo & INVALID_BIT as u16) << 8)
    }

    #[inline(always)]
    fn decode_packed([hi, lo]: &[u8; 2]) -> u16 {
        let lo = hex_digit(*lo) as u16;
        let hi = hex_digit(*hi) as u16;
        (hi << 4) | lo | ((lo & INVALID_BIT as u16) << 8)
    }
}

macro_rules! hex_digits_simd_inline {
    ($ptr:ident) => {{
        if_trace_simd! {
            println!("hi_los: {:x?}", *$ptr);
        }

        let a = *$ptr;
        let b = *$ptr.add(1);
        let c = *$ptr.add(2);
        let d = *$ptr.add(3);

        if_trace_simd! {
            let f = |x| __ASCII_DIGITS_SIMD[x as usize];

            println!(
                "{:x?}, {:x?}, {:x?}, {:x?}",
                a.map(f),
                b.map(f),
                c.map(f),
                d.map(f)
            );
        }

        let a = Simd::from_array(a);
        let b = Simd::from_array(b);
        let c = Simd::from_array(c);
        let d = Simd::from_array(d);

        let a = a.cast::<u32>();
        let b = b.cast::<u32>();
        let c = c.cast::<u32>();
        let d = d.cast::<u32>();

        if_trace_simd! {
            println!("{a:x?}, {b:x?}, {c:x?}, {d:x?}");
        }

        let a = a.into();
        let b = b.into();
        let c = c.into();
        let d = d.into();

        let a = simd::arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, a, 4);
        let b = simd::arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, b, 4);
        let c = simd::arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, c, 4);
        let d = simd::arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, d, 4);

        let a = Simd::<u32, GATHER_BATCH_SIZE>::from(a).cast::<u8>();
        let b = Simd::<u32, GATHER_BATCH_SIZE>::from(b).cast::<u8>();
        let c = Simd::<u32, GATHER_BATCH_SIZE>::from(c).cast::<u8>();
        let d = Simd::<u32, GATHER_BATCH_SIZE>::from(d).cast::<u8>();

        if_trace_simd! {
            println!("{a:x?}, {b:x?}, {c:x?}, {d:x?}");
        }

        // load the 64-bit integers into registers
        let a = simd::load_u64_m128(util::cast(a));
        let b = simd::load_u64_m128(util::cast(b));
        let c = simd::load_u64_m128(util::cast(c));
        let d = simd::load_u64_m128(util::cast(d));

        if_trace_simd! {
            let a = Simd::<u8, 16>::from(a);
            let b = Simd::<u8, 16>::from(b);
            let c = Simd::<u8, 16>::from(c);
            let d = Simd::<u8, 16>::from(d);
            println!("a,b,c,d:   {a:x?}, {b:x?}, {c:x?}, {d:x?}");
        }

        // copy the second 64-bit integer into the upper half of xmm0 (lower half is the first 64-bit integer)
        let ab = simd::merge_lo_hi_m128(a, b);
        // copy the fourth 64-bit integer into the upper half of xmm2 (lower half is the third 64-bit integer)
        let cd = simd::merge_lo_hi_m128(c, d);

        if_trace_simd! {
            let ab = Simd::<u8, 16>::from(ab);
            let cd = Simd::<u8, 16>::from(cd);
            println!("ab,cd:   {ab:x?}, {cd:x?}");
        }

        // merge the xmm0 and xmm1 (ymm1) registers into ymm0
        let abcd = simd::merge_m128_m256(ab, cd);

        if_trace_simd! {
            let abcd: Simd<u8, DIGIT_BATCH_SIZE> = abcd.into();
            println!("abcd: {abcd:x?}");
        }

        abcd
    }};
}

macro_rules! merge_hex_digits_into_bytes_inline {
    ($hex_digits:ident) => {{
        let msb = simd::extract_lo_bytes($hex_digits);
        let lsb = simd::extract_hi_bytes($hex_digits);

        let msb1: simd::arch::__m128i;
        unsafe { std::arch::asm!("vpsllw {dst}, {src}, 4", src = in(xmm_reg) msb, dst = lateout(xmm_reg) msb1) };
        if_trace_simd! {
            let msb1: Simd<u8, WIDE_BATCH_SIZE> = msb1.into();
            println!("msb1: {msb1:x?}");
        }
        let msb2 = msb1.and(Simd::from_array([0xf0f0u16; WIDE_BATCH_SIZE / 2]).into());
        let b = msb2.or(lsb);

        if_trace_simd! {
            let msb: Simd<u8, WIDE_BATCH_SIZE> = msb.into();
            let msb1: Simd<u8, WIDE_BATCH_SIZE> = msb1.into();
            let msb2: Simd<u8, WIDE_BATCH_SIZE> = msb2.into();
            let lsb: Simd<u8, WIDE_BATCH_SIZE> = lsb.into();
            let b: Simd<u8, WIDE_BATCH_SIZE> = b.into();

            println!("| Packed | Msb | <<4 | &   | Lsb | Bytes |   |");
            Simd::<u8, DIGIT_BATCH_SIZE>::from($hex_digits)
                .to_array()
                .chunks(2)
                .zip(msb.to_array())
                .zip(msb1.to_array())
                .zip(msb2.to_array())
                .zip(lsb.to_array())
                .zip(b.to_array())
                .for_each(|(((((chunk, msb), msb1), msb2), lsb), b)| {
                    println!(
                        "| {chunk:02x?}     | {msb:x?}   | {msb1:x?}   | {msb2:x?}   | {lsb:x?}   | {b:02x?}    | {ok} |",
                        chunk = (chunk[0] as u16) << 4 | (chunk[1] as u16),
                        ok = if chunk[0] == msb && chunk[1] == lsb {
                            '✓'
                        } else {
                            '✗'
                        }
                    );
                });
        }

        b
    }};
}

impl HexByteSimdDecoder for HexByteDecoderA {
//    util::defer_impl! {
//        => HexByteDecoderA;
//
//        fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
//    }

    #[inline(always)]
    fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>> {
        let hi_los = hi_los.as_ptr() as *const [u8; GATHER_BATCH_SIZE];

        let hex_digits = unsafe { hex_digits_simd_inline!(hi_los) };

        if hex_digits.test_and_non_zero(simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT).into()) {
            return None;
        }

        Some(merge_hex_digits_into_bytes_inline!(hex_digits).into())
    }
}

pub type HBD = HexByteDecoderA;

pub mod conv {
    use core::simd::{LaneCount, Simd, SupportedLaneCount};

    use crate::util;

    #[inline(always)]
    pub const fn u8_to_u16<const N_OUT: usize>(a: [u8; N_OUT * 2]) -> [u16; N_OUT] {
        unsafe { util::cast(a) }
    }

    #[inline(always)]
    pub const fn u8x2_to_u8<const N_IN: usize>(a: [[u8; 2]; N_IN]) -> [u8; N_IN * 2] {
        unsafe { util::cast(a) }
    }

    #[inline(always)]
    pub const fn simdu8_to_simdu16<const N_OUT: usize>(
        a: Simd<u8, { N_OUT * 2 }>,
    ) -> Simd<u16, N_OUT>
    where
        LaneCount<{ N_OUT * 2 }>: SupportedLaneCount,
        LaneCount<N_OUT>: SupportedLaneCount,
    {
        unsafe { util::cast(a) }
    }
}

macro_rules! decode_hex_bytes_non_vectored {
    ($i:ident, $ascii:ident, $bytes:ident) => {{
        //let mut bad = 0u16;
        let mut bad = 0u8;
        while $i < $ascii.len() {
            /*let b = HBD::decode_packed(unsafe { &*($ascii.as_ptr().add($i) as *const [u8; 2]) });
            bad |= b;
            unsafe { *$bytes.get_unchecked_mut($i >> 1) = MaybeUninit::new(b as u8) };*/

            let [hi, lo] = unsafe { *($ascii.as_ptr().add($i) as *const [u8; 2]) };
            let lo = hex_digit(lo);
            let hi = hex_digit(hi);
            bad |= lo;
            bad |= hi;
            /*if (hi & INVALID_BIT) | (lo & INVALID_BIT) != 0 {
                println!("bad hex byte at {} ({}{})", $i, $ascii[$i] as char, $ascii[$i + 1] as char);
            }*/
            let b = (hi << 4) | lo;
            unsafe { *$bytes.get_unchecked_mut($i >> 1) = MaybeUninit::new(b) };

            $i += 2;
        }
        //if (bad & WIDE_INVALID_BIT) != 0 {
        if (bad & INVALID_BIT) != 0 {
            return false;
        }
    }};
}

/*simd::swizzle_indices!(MSB_INDICES = [
    0,  2,  4,  6,
    8,  10, 12, 14,
    16, 18, 20, 22,
    24, 26, 28, 30
], [_ . . . _ . . . _ . . . _ . . .]);
simd::swizzle_indices!(LSB_INDICES = [
    1,  3,  5,  7,
    9,  11, 13, 15,
    17, 19, 21, 23,
    25, 27, 29, 31
], [_ . . . _ . . . _ . . . _ . . .]);*/

#[inline(always)]
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
    // these checks should always be eliminated because they are performed more efficiently
    // (sometimes statically) in the callers, but they provide a major safeguard against nasty
    // memory safety issues.
    debug_assert_eq!(
        ascii.len() >> 1 << 1,
        ascii.len(),
        "len of ascii is not a multiple of 2"
    );
    if ascii.len() >> 1 << 1 != ascii.len() {
        return false;
    }
    debug_assert_eq!(
        ascii.len() >> 1,
        bytes.len(),
        "len of ascii is not twice that of bytes"
    );
    if ascii.len() >> 1 != bytes.len() {
        return false;
    }

    const VECTORED: bool = true;
    if VECTORED {
        use simd::arch;

        let mut bad: arch::__m256i = simd::splat_0u8().into();
        let mut i = 0;
        while i < util::align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
            let hex_digits = unsafe {
                let hi_los = ascii.as_ptr().add(i) as *const [u8; GATHER_BATCH_SIZE];

                hex_digits_simd_inline!(hi_los)
            };
            if VALIDATE {
                unsafe {
                    core::arch::asm!("vpor {bad}, {digits}, {bad}", bad = inout(ymm_reg) bad, digits = in(ymm_reg) hex_digits, options(pure, nomem, preserves_flags, nostack));
                }
            }

            let buf = merge_hex_digits_into_bytes_inline!(hex_digits);

            unsafe {
                //         vmovaps xmm0, xmmword ptr [rsi]
                //         vmovups xmmword ptr [rdi], xmm0
                //core::arch::asm!("vmovdqu8 {}, [{}]", in(xmm_reg) buf, in(reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
                //let all: arch::__m128i = Mask::<i64, 2>::splat(true).to_int().into();
                //core::arch::asm!("vpmaskmovq {}, {}, [{}]", in(xmm_reg) buf, in(xmm_reg) all, in(xmm_reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
                //core::arch::asm!("vpmaskmovq {}, {}, [{}]", in(xmm_reg) buf, in(xmm_reg) 0u64, in(xmm_reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
                // arch::_mm_storeu_epi8(bytes.as_mut_ptr().add(i >> 1) as *mut i8, buf)
                //arch::_mm_maskstore_epi64(bytes.as_mut_ptr().add(i >> 1) as *mut i64, core::mem::transmute(!0u128), buf);
                core::arch::asm!("vmovdqa [{}], {}", in(reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8, in(xmm_reg) buf, options(preserves_flags, nostack));
            };
            i += DIGIT_BATCH_SIZE;
        }

        decode_hex_bytes_non_vectored!(i, ascii, bytes);
        !bad.test_and_non_zero(simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT).into())
    } else {
        let mut i = 0;
        decode_hex_bytes_non_vectored!(i, ascii, bytes);
        true
    }
}

/// Use of this function should be restricted to `const` contexts because it is not vectorized like
/// the non-`const` alternative.
#[inline]
pub const fn hex_bytes_sized_const<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
    if N == 0 {
        Some([0u8; N])
    } else {
        let mut bytes = MaybeUninit::uninit_array();
        let mut i = 0;
        while i < N * 2 {
            if i >> 1 >= bytes.len() {
                unsafe { core::hint::unreachable_unchecked() };
            }
            match hex_byte(unsafe { *ascii.get_unchecked(i) }, unsafe {
                *ascii.get_unchecked(i + 1)
            }) {
                Some(b) => bytes[i >> 1] = MaybeUninit::new(b),
                None => return None,
            }
            i += 2;
        }
        Some(unsafe { MaybeUninit::array_assume_init(bytes) })
    }
}

#[inline]
pub fn hex_bytes_sized<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
    if N == 0 {
        Some([0u8; N])
    } else {
        let mut bytes = MaybeUninit::uninit_array();
        if decode_hex_bytes_unchecked(ascii, &mut bytes) {
            Some(unsafe { MaybeUninit::array_assume_init(bytes) })
        } else {
            None
        }
    }
}

#[cfg(feature = "alloc")]
#[inline]
pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[u8; N]>> {
    if N == 0 {
        Some(Box::new([0u8; N]))
    } else {
        let mut bytes = unsafe { Box::<[_; N]>::new_uninit().assume_init() };
        if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
            Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
        } else {
            None
        }
    }
}

#[cfg(feature = "alloc")]
#[inline]
pub fn hex_bytes_dyn_unsafe(ascii: &[u8]) -> Option<Box<[u8]>> {
    let len = ascii.len() >> 1;
    if len << 1 != ascii.len() {
        return None;
    }
    let mut bytes = Box::new_uninit_slice(len);
    if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
        Some(unsafe { Box::<[_]>::assume_init(bytes) })
    } else {
        None
    }
}

#[cfg(feature = "alloc")]
#[inline]
pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
    let len = ascii.len() >> 1;
    if len << 1 != ascii.len() {
        return None;
    }
    let mut bytes = Box::<[u8]>::new_uninit_slice(len);
    for (i, [hi, lo]) in ascii.array_chunks::<2>().enumerate() {
        let lo = hex_digit(*lo);
        let hi = hex_digit(*hi);
        if (lo & INVALID_BIT) | (hi & INVALID_BIT) != 0 {
            return None;
        }
        let b = (hi << 4) | lo;
        unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
    }
    Some(unsafe { Box::<[_]>::assume_init(bytes) })
}

#[cfg(feature = "alloc")]
#[inline]
pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
    let iter = ascii.array_chunks::<2>();
    if iter.remainder().len() != 0 {
        return None;
    }
    iter.map(|[msb, lsb]| hex_byte(*msb, *lsb))
        .collect::<Option<Vec<u8>>>()
        .map(|v| v.into_boxed_slice())
}

#[cfg(test)]
mod test {
    use super::*;

    const BYTES: &str = "Donald J. Trump!";
    const HEX_BYTES: &str = "446F6E616C64204A2E205472756D7021";

    const LONG_BYTES: &str = "Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
    const LONG_HEX_BYTES: &str = "446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";

    struct Sample {
        bytes: &'static str,
        hex_bytes: &'static str,
    }

    const SAMPLES: &[Sample] = &[
        Sample {
            bytes: BYTES,
            hex_bytes: HEX_BYTES,
        },
        Sample {
            bytes: LONG_BYTES,
            hex_bytes: LONG_HEX_BYTES,
        },
    ];

    const INVALID_SAMPLES: &[&str] = &[
        "446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
        "446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
        "446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
        "446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
    ];

    #[test]
    fn test_hex_digit() {
        const HEX_DIGITS_LOWER: &[char; 16] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
        ];
        const HEX_DIGITS_UPPER: &[char; 16] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
        ];

        for (i, digit) in HEX_DIGITS_LOWER.into_iter().enumerate() {
            assert_eq!(hex_digit(*digit as u8), i as u8);
        }

        for (i, digit) in HEX_DIGITS_UPPER.into_iter().enumerate() {
            assert_eq!(hex_digit(*digit as u8), i as u8);
        }
    }

    #[test]
    fn test_hex_digit_simd() {
        const HEX_DIGITS: &[char; DIGIT_BATCH_SIZE] = &[
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '0',
            '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
            'F',
            //            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
        ];

        let mut set8 = [0u8; DIGIT_BATCH_SIZE];
        for (c, b) in HEX_DIGITS.iter().zip(set8.iter_mut()) {
            *b = *c as u8;
        }
        let mut sete = [0u8; DIGIT_BATCH_SIZE];
        for i in 0..(DIGIT_BATCH_SIZE) {
            sete[i] = (i as u8) % 16;
        }
        assert_eq!(
            hex_digit_simd::<DIGIT_BATCH_SIZE>(Simd::from_array(set8)),
            Simd::from_array(sete)
        );
    }

    #[test]
    fn test_hex_byte() {
        const HEX_BYTES_VALID: &[([u8; 2], u8)] = &[
            (['f' as u8, 'f' as u8], 0xff),
            (['0' as u8, '0' as u8], 0x00),
            (['1' as u8, '1' as u8], 0x11),
            (['e' as u8, 'f' as u8], 0xef),
            (['f' as u8, 'e' as u8], 0xfe),
            (['0' as u8, 'f' as u8], 0x0f),
            (['f' as u8, '0' as u8], 0xf0),
        ];

        for (hb, b) in HEX_BYTES_VALID {
            assert_eq!(hex_byte(hb[0], hb[1]), Some(*b));

            assert_eq!(HexByteDecoderA::decode_unpacked(hb[0], hb[1]), *b as u16);

            assert_eq!(HexByteDecoderA::decode_packed(hb), *b as u16);
        }

        const HEX_BYTES_INVALID: &[[u8; 2]] = &[
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['1' as u8, 'g' as u8],
            ['e' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
        ];

        for hb in HEX_BYTES_INVALID {
            assert_eq!(hex_byte(hb[0], hb[1]), None);
            assert_ne!(
                HexByteDecoderA::decode_unpacked(hb[0], hb[1]) & WIDE_INVALID_BIT,
                0
            );

            assert_ne!(HexByteDecoderA::decode_packed(hb) & WIDE_INVALID_BIT, 0);
        }
    }

    #[test]
    fn test_hex_byte_simd() {
        const HEX_BYTES_VALID: [[u8; 2]; WIDE_BATCH_SIZE] = [
            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34", *b"ff", *b"00", *b"11",
            *b"ef", *b"fe", *b"0f", *b"f0",
            *b"34",
            //            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",

            //            *b"ff", *b"00", *b"11", *b"ef", *b"fe", *b"0f", *b"f0", *b"34",
        ];
        const BYTES_VALID: [u8; WIDE_BATCH_SIZE] = [
            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34, 0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f,
            0xf0,
            0x34,
            //            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34,

            //            0xff, 0x00, 0x11, 0xef, 0xfe, 0x0f, 0xf0, 0x34,
        ];

        let hex_bytes = conv::u8x2_to_u8(HEX_BYTES_VALID);
        let bytes = Simd::from_array(BYTES_VALID);
        if_trace_simd! {
            println!("hex_bytes: {HEX_BYTES_VALID:02x?}");
            println!("hex_bytes: {hex_bytes:02x?}");
            println!("bytes: {BYTES_VALID:02x?}");
            println!("bytes: {bytes:04x?}");
        }
        assert_eq!(HexByteDecoderA::decode_simd(hex_bytes), Some(bytes));

        /*const HEX_BYTES_INVALID: &[[u8; 2]] = &[
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['1' as u8, 'g' as u8],
            ['e' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
            ['0' as u8, 'g' as u8],
            ['f' as u8, 'g' as u8],
        ];

        for hb in HEX_BYTES_INVALID {
            assert_eq!(hex_byte(hb[0], hb[1]), None);
            assert!(hex_byte_niched(hb[0], hb[1]) & WIDE_INVALID_BIT != 0);
        }*/
    }

    macro_rules! test_f {
        (boxed $f:ident) => {
            test_f!(@ $f, Box::as_ref)
        };
        (@ $f:ident, $trans:expr) => {
            for (i, Sample { bytes, hex_bytes }) in SAMPLES.into_iter().enumerate() {
                let result = $f(hex_bytes.as_bytes());
                assert_eq!(
                    result.as_ref().map($trans),
                    Some(bytes.as_bytes()),
                    "Sample {i} ({hex_bytes:?} => {bytes:?}) did not decode correctly (expected Some)"
                );
            }

            for (i, hex_bytes) in INVALID_SAMPLES.into_iter().enumerate() {
                let result = $f(hex_bytes.as_bytes());
                assert_eq!(
                    result.as_ref().map($trans),
                    None,
                    "Sample {i} ({hex_bytes:?}) did not decode correctly (expected None)"
                );
            }
        };
    }

    #[cfg(feature = "alloc")]
    #[test]
    fn test_dyn_iter_option() {
        test_f!(boxed hex_bytes_dyn);
    }

    #[cfg(feature = "alloc")]
    #[test]
    fn test_dyn_unsafe() {
        test_f!(boxed hex_bytes_dyn_unsafe);
    }

    #[cfg(feature = "alloc")]
    #[test]
    fn test_dyn_unsafe_iter() {
        test_f!(boxed hex_bytes_dyn_unsafe_iter);
    }
}