WIP: Add encoder

This commit is contained in:
Michael Pfaff 2022-11-01 19:21:15 -04:00
parent c1197284af
commit 5a98826784
Signed by: michael
GPG Key ID: CF402C4A012AA9D4
6 changed files with 225 additions and 62 deletions

View File

@ -8,9 +8,9 @@ use alloc::{boxed::Box, vec::Vec};
use crate::{simd, util};
use simd::SimdTestAnd as _;
use simd::SimdBitwise as _;
use simd::{SIMD_WIDTH, if_trace_simd};
use simd::SimdTestAnd as _;
use simd::{if_trace_simd, SIMD_WIDTH};
use util::array_op;
@ -34,22 +34,24 @@ pub const INVALID_BIT: u8 = 0b1000_0000;
pub const WIDE_INVALID_BIT: u16 = 0b1000_1000_0000_0000;
const ASCII_DIGITS: [u8; 256] = {
array_op!(gen[256] |i| {
const DIGIT_MIN: u8 = '0' as u8;
const DIGIT_MAX: u8 = '9' as u8;
const LOWER_MIN: u8 = 'a' as u8;
const LOWER_MAX: u8 = 'f' as u8;
const UPPER_MIN: u8 = 'A' as u8;
const UPPER_MAX: u8 = 'F' as u8;
array_op!(
gen[256] | i | {
const DIGIT_MIN: u8 = '0' as u8;
const DIGIT_MAX: u8 = '9' as u8;
const LOWER_MIN: u8 = 'a' as u8;
const LOWER_MAX: u8 = 'f' as u8;
const UPPER_MIN: u8 = 'A' as u8;
const UPPER_MAX: u8 = 'F' as u8;
let i = i as u8;
match i {
DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
_ => INVALID_BIT,
let i = i as u8;
match i {
DIGIT_MIN..=DIGIT_MAX => i - DIGIT_MIN,
LOWER_MIN..=LOWER_MAX => 10 + i - LOWER_MIN,
UPPER_MIN..=UPPER_MAX => 10 + i - UPPER_MIN,
_ => INVALID_BIT,
}
}
})
)
};
const __ASCII_DIGITS_SIMD: [u32; 256] = cast_u8_u32(ASCII_DIGITS);
@ -112,13 +114,13 @@ pub trait HexByteSimdDecoder {
pub struct HexByteDecoderA;
impl const HexByteDecoder for HexByteDecoderA {
// util::defer_impl! {
// => HexByteDecoderA;
//
// fn decode_unpacked(hi: u8, lo: u8) -> u16;
//
// fn decode_packed(hi_lo: &[u8; 2]) -> u16;
// }
// util::defer_impl! {
// => HexByteDecoderA;
//
// fn decode_unpacked(hi: u8, lo: u8) -> u16;
//
// fn decode_packed(hi_lo: &[u8; 2]) -> u16;
// }
#[inline(always)]
fn decode_unpacked(hi: u8, lo: u8) -> u16 {
@ -277,11 +279,11 @@ macro_rules! merge_hex_digits_into_bytes_inline {
}
impl HexByteSimdDecoder for HexByteDecoderA {
// util::defer_impl! {
// => HexByteDecoderA;
//
// fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
// }
// util::defer_impl! {
// => HexByteDecoderA;
//
// fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>>;
// }
#[inline(always)]
fn decode_simd(hi_los: [u8; DIGIT_BATCH_SIZE]) -> Option<Simd<u8, WIDE_BATCH_SIZE>> {
@ -523,34 +525,7 @@ pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
mod test {
use super::*;
const BYTES: &str = "Donald J. Trump!";
const HEX_BYTES: &str = "446F6E616C64204A2E205472756D7021";
const LONG_BYTES: &str = "Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
const LONG_HEX_BYTES: &str = "446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";
struct Sample {
bytes: &'static str,
hex_bytes: &'static str,
}
const SAMPLES: &[Sample] = &[
Sample {
bytes: BYTES,
hex_bytes: HEX_BYTES,
},
Sample {
bytes: LONG_BYTES,
hex_bytes: LONG_HEX_BYTES,
},
];
const INVALID_SAMPLES: &[&str] = &[
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
"446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
"446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
];
use crate::test::*;
#[test]
fn test_hex_digit() {

134
src/enc.rs Normal file
View File

@ -0,0 +1,134 @@
//! SIMD-accelerated hex encoding.
use std::mem::MaybeUninit;
use crate::{simd, util};
use util::array_op;
const HEX_CHARS_LOWER: [u8; 16] = array_op!(map[16, ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']] |_, c| c as u8);
const HEX_CHARS_UPPER: [u8; 16] =
array_op!(map[16, HEX_CHARS_LOWER] |_, c| (c as char).to_ascii_uppercase() as u8);
macro_rules! select {
($cond:ident ? $true:ident : $false:ident) => {
if $cond {
$true
} else {
$false
}
};
(($cond:expr) ? ($true:expr) : ($false:expr)) => {
if $cond {
$true
} else {
$false
}
};
}
macro_rules! const_impl {
($UPPER:ident, $src:ident, $dst:ident) => {{
let mut i = 0;
let ub = $dst.len();
while i < ub {
let b = $src[i >> 1];
$dst[i] = MaybeUninit::new(select!($UPPER ? HEX_CHARS_UPPER : HEX_CHARS_LOWER)[(b >> 4) as usize]);
$dst[i + 1] = MaybeUninit::new(select!($UPPER ? HEX_CHARS_UPPER : HEX_CHARS_LOWER)[(b & 0x0f) as usize]);
i += 2;
}
}};
}
macro_rules! common_impl {
($UPPER:ident, $src:ident, $dst:ident) => {
const_impl!($UPPER, $src, $dst)
};
}
pub trait Encode {
/// Encodes the sized input on the stack.
fn enc_sized<const N: usize>(src: &[u8; N]) -> [u8; N * 2]
where
[u8; N * 2]:;
/// Encodes the sized input on the heap.
fn enc_sized_heap<const N: usize>(src: &[u8; N]) -> Box<[u8; N * 2]>
where
[u8; N * 2]:;
/// Encodes the unsized input on the heap.
fn enc_slice(src: &[u8]) -> Box<[u8]>;
}
pub struct Encoder<const UPPER: bool = false>;
impl<const UPPER: bool> Encode for Encoder<UPPER> {
#[inline]
fn enc_sized<const N: usize>(src: &[u8; N]) -> [u8; N * 2]
where
[u8; N * 2]:,
{
let mut buf = MaybeUninit::uninit_array();
common_impl!(UPPER, src, buf);
unsafe { MaybeUninit::array_assume_init(buf) }
}
#[inline]
fn enc_sized_heap<const N: usize>(src: &[u8; N]) -> Box<[u8; N * 2]>
where
[u8; N * 2]:,
{
let mut buf: Box<[MaybeUninit<u8>; N * 2]> = unsafe { Box::new_uninit().assume_init() };
common_impl!(UPPER, src, buf);
unsafe { Box::from_raw(Box::into_raw(buf).cast()) }
}
#[inline]
fn enc_slice(src: &[u8]) -> Box<[u8]> {
let mut buf = Box::new_uninit_slice(src.len() * 2);
common_impl!(UPPER, src, buf);
unsafe { Box::<[_]>::assume_init(buf) }
}
}
impl<const UPPER: bool> Encoder<UPPER> {
#[inline]
pub fn enc_const<const N: usize>(src: &[u8; N]) -> [u8; N * 2]
where
[u8; N * 2]:,
{
let mut buf = MaybeUninit::uninit_array();
const_impl!(UPPER, src, buf);
unsafe { MaybeUninit::array_assume_init(buf) }
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::*;
macro_rules! for_each_sample {
($name:ident, |$sb:ident, $shb:ident| $expr:expr) => {
#[test]
fn $name() {
let $sb = BYTES;
let $shb = HEX_BYTES;
$expr;
let $sb = LONG_BYTES;
let $shb = LONG_HEX_BYTES;
$expr;
}
};
}
type Enc = Encoder::<true>;
for_each_sample!(enc_const, |b, hb| assert_eq!(Enc::enc_const(b), *hb));
for_each_sample!(enc_sized, |b, hb| assert_eq!(Enc::enc_sized(b), *hb));
for_each_sample!(enc_sized_heap, |b, hb| assert_eq!(Enc::enc_sized_heap(b), Box::new(*hb)));
for_each_sample!(enc_slice, |b, hb| assert_eq!(Enc::enc_slice(b), (*hb).into_iter().collect::<Vec<_>>().into_boxed_slice()));
}

View File

@ -14,14 +14,16 @@
#![feature(const_maybe_uninit_uninit_array)]
#![cfg_attr(feature = "alloc", feature(new_uninit))]
#![feature(portable_simd)]
// ignores warning about `generic_const_exprs`
#![allow(incomplete_features)]
#[cfg(feature = "alloc")]
extern crate alloc;
pub(crate) mod util;
pub(crate) mod simd;
pub(crate) mod util;
pub(crate) mod test;
pub mod dec;
pub mod enc;

View File

@ -1,4 +1,4 @@
use core::simd::{LaneCount, Simd, SupportedLaneCount, SimdElement};
use core::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use crate::util::cast;
@ -39,7 +39,11 @@ pub trait IsSimd {
const LANES: usize;
}
impl<T, const LANES: usize> IsSimd for Simd<T, LANES> where LaneCount<LANES>: SupportedLaneCount, T: SimdElement {
impl<T, const LANES: usize> IsSimd for Simd<T, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
T: SimdElement,
{
type Lane = T;
const LANES: usize = LANES;

38
src/test.rs Normal file
View File

@ -0,0 +1,38 @@
macro_rules! from_utf8 {
($bytes:ident) => {
unsafe { std::str::from_utf8_unchecked($bytes) }
};
}
pub const BYTES: &[u8; 16] = b"Donald J. Trump!";
pub const HEX_BYTES: &[u8; 32] = b"446F6E616C64204A2E205472756D7021";
pub const STR: &str = from_utf8!(BYTES);
pub const HEX_STR: &str = from_utf8!(HEX_BYTES);
pub const LONG_BYTES: &[u8; 297] = b"Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
pub const LONG_HEX_BYTES: &[u8; 594] = b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";
pub const LONG_STR: &str = from_utf8!(LONG_BYTES);
pub const LONG_HEX_STR: &str = from_utf8!(LONG_HEX_BYTES);
pub struct Sample {
pub bytes: &'static str,
pub hex_bytes: &'static str,
}
pub const SAMPLES: &[Sample] = &[
Sample {
bytes: STR,
hex_bytes: HEX_STR,
},
Sample {
bytes: LONG_STR,
hex_bytes: LONG_HEX_STR,
},
];
pub const INVALID_SAMPLES: &[&str] = &[
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
"446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
"446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
];

View File

@ -5,13 +5,23 @@ macro_rules! __array_op {
let mut out = std::mem::MaybeUninit::uninit_array();
let mut i = 0;
while i < $len {
out[i] = std::mem::MaybeUninit::new(match i { $i => $val });
out[i] = std::mem::MaybeUninit::new(match i {
$i => $val,
});
i += 1;
}
unsafe { std::mem::MaybeUninit::array_assume_init(out) }
}};
(map[$len:expr, $src:expr] |$i:pat_param, $s:pat_param| $val:expr) => {{
$crate::util::array_op!(gen[$len] |i| match i { $i => match $src[i] { $s => $val } })
$crate::util::array_op!(
gen[$len]
| i
| match i {
$i => match $src[i] {
$s => $val,
},
}
)
}};
}