This commit is contained in:
Michael Pfaff 2022-10-31 22:42:01 -04:00
parent 379fa71d78
commit ee3bdc43cf
Signed by: michael
GPG Key ID: CF402C4A012AA9D4
4 changed files with 556 additions and 233 deletions

View File

@ -33,6 +33,7 @@ fn test_sized<const N: usize, const HEAP_ONLY: bool>(hex_bytes: &[u8; N * 2], by
where
[(); N * 2]:,
{
return;
test(hex_bytes, bytes);
if !HEAP_ONLY {
assert_eq!(
@ -64,11 +65,6 @@ fn test(hex_bytes: &[u8], bytes: &[u8]) {
hex_bytes_dyn(hex_bytes).as_ref().map(Box::as_ref),
stringify!(hex_bytes_dyn)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_niched(hex_bytes).as_ref().map(Box::as_ref),
stringify!(hex_bytes_dyn_niched)
);
assert_eq!(
Some(bytes),
hex_bytes_dyn_unsafe_iter(hex_bytes)
@ -115,7 +111,6 @@ const BENCH_UNSAFE: bool = true;
const BENCH_UNSAFE_ITER: bool = true;
const BENCH_UNSAFE_ITER_NICHED: bool = true;
const BENCH_NON_NICHED: bool = true;
const BENCH_NICHED: bool = true;
fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
if BENCH_UNSAFE {
@ -139,11 +134,6 @@ fn benchmark(name: &str, bytes: &[u8], c: &mut Criterion) {
b.iter(|| hex_bytes_dyn(black_box(bytes)))
});
}
if BENCH_NICHED {
c.bench_function(name!(name, "dyn niched"), |b| {
b.iter(|| hex_bytes_dyn_niched(black_box(bytes)))
});
}
}
pub fn bench_16(c: &mut Criterion) {
@ -243,6 +233,51 @@ impl<'a> std::fmt::Display for DisplayAsHexDigits<'a> {
}
}
pub fn bench_2k(c: &mut Criterion) {
const LEN: usize = 1024 * 2;
const LEN2: usize = LEN * 2;
let mut hex_bytes: [MaybeUninit<u8>; LEN2] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
let mut rng = rand::thread_rng();
for b in hex_bytes.iter_mut() {
*b = MaybeUninit::new(*HEX_CHARS.choose(&mut rng).unwrap());
}
let hex_bytes: [u8; LEN2] = unsafe { std::mem::transmute(hex_bytes) };
let bytes = match hex_bytes_dyn(hex_bytes.as_ref()) {
Some(b) => b,
None => {
panic!(
"Generated hex bytes were invalid: {}",
DisplayAsHexDigits(hex_bytes.as_ref())
);
}
};
test_sized::<LEN, false>(&hex_bytes, bytes.as_ref().try_into().unwrap());
benchmark_sized::<LEN, false>("2k", &hex_bytes, c);
}
pub fn bench_512k(c: &mut Criterion) {
const LEN: usize = 1024 * 512;
const LEN2: usize = LEN * 2;
let mut hex_bytes: Box<[MaybeUninit<u8>; LEN2]> =
unsafe { std::mem::transmute(Box::<[u8; LEN2]>::new_uninit()) };
let mut rng = rand::thread_rng();
for b in hex_bytes.iter_mut() {
*b = MaybeUninit::new(*HEX_CHARS.choose(&mut rng).unwrap());
}
let hex_bytes: Box<[u8; LEN2]> = unsafe { std::mem::transmute(hex_bytes) };
let bytes = match hex_bytes_dyn(hex_bytes.as_ref()) {
Some(b) => b,
None => {
panic!(
"Generated hex bytes were invalid: {}",
DisplayAsHexDigits(hex_bytes.as_ref())
);
}
};
test_sized::<LEN, true>(&hex_bytes, bytes.as_ref().try_into().unwrap());
benchmark_sized::<LEN, true>("512k", &hex_bytes, c);
}
pub fn bench_1_6m(c: &mut Criterion) {
const LEN: usize = 1_600_000;
const LEN2: usize = LEN * 2;
@ -379,7 +414,7 @@ pub fn bench_nano_hex_byte(c: &mut Criterion) {
bench_decoder::<HexByteDecoderB>(c, stringify!(HexByteDecoderB));
}
criterion_group!(decode_benches, bench_16, bench_256, bench_1_6m);
criterion_group!(decode_benches, bench_16, bench_256, bench_2k, bench_512k, bench_1_6m);
criterion_group!(micro_benches, bench_micro_hex_digit, bench_micro_hex_byte);
criterion_group!(nano_benches, bench_nano_hex_digit, bench_nano_hex_byte);
criterion_main!(decode_benches, micro_benches, nano_benches);

View File

@ -29,6 +29,8 @@ pub const WIDE_BATCH_SIZE: usize = SIMD_WIDTH / 16;
/// The batch size used for the hex digits.
pub const DIGIT_BATCH_SIZE: usize = WIDE_BATCH_SIZE * 2;
const TRACE_SIMD: bool = false;
#[inline]
const fn alternating_mask<const N: usize>(first_bias: bool) -> [bool; N] {
let mut mask = [false; N];
@ -82,6 +84,17 @@ const fn alternating_indices<const N: usize>(first_bias: bool) -> [usize; N] {
indices
}
#[inline]
const fn cast_usize_u8<const N: usize>(arr: [usize; N]) -> [u8; N] {
let mut arr1 = [0u8; N];
let mut i = 0;
while i < arr.len() {
arr1[i] = arr[i] as u8;
i += 1;
}
arr1
}
const MSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(true);
const LSB_INDICES: [usize; DIGIT_BATCH_SIZE / 2] = alternating_indices(false);
@ -112,6 +125,18 @@ const ASCII_DIGITS: [u8; 256] = {
digits
};
const __ASCII_DIGITS_SIMD: [u32; 256] = {
let mut digits = [0u32; 256];
let mut i = u8::MIN;
while i < u8::MAX {
digits[i as usize] = ASCII_DIGITS[i as usize] as u32;
i += 1;
}
digits
};
const ASCII_DIGITS_SIMD: *const i32 = &__ASCII_DIGITS_SIMD as *const u32 as *const i32;
/// Returns [`INVALID_BIT`] if invalid. Based on `char.to_digit()` in the stdlib.
#[inline]
pub const fn hex_digit(ascii: u8) -> u8 {
@ -397,136 +422,248 @@ pub mod conv {
}
}
#[inline(always)]
const fn align_down_to<const N: usize>(n: usize) -> usize {
let shift = match N.checked_ilog2() {
Some(x) => x,
None => 0,
};
return n >> shift << shift;
}
#[inline(always)]
const fn align_up_to<const N: usize>(n: usize) -> usize {
let shift = match N.checked_ilog2() {
Some(x) => x,
None => 0,
};
return (n + (N - 1)) >> shift << shift;
}
macro_rules! decode_hex_bytes_non_vectored {
($i:ident, $ascii:ident, $bytes:ident, $o:expr) => {{
($i:ident, $ascii:ident, $bytes:ident) => {{
//let mut bad = 0u16;
let mut bad = 0u8;
while $i < $ascii.len() {
match unsafe { hex_byte(*$ascii.get_unchecked($i), *$ascii.get_unchecked($i + 1)) } {
Some(b) => unsafe { *$bytes.get_unchecked_mut($o + ($i >> 1)) = MaybeUninit::new(b) },
/*let b = HBD::decode_packed(unsafe { &*($ascii.as_ptr().add($i) as *const [u8; 2]) });
bad |= b;
unsafe { *$bytes.get_unchecked_mut($i >> 1) = MaybeUninit::new(b as u8) };*/
let [hi, lo] = unsafe { *($ascii.as_ptr().add($i) as *const [u8; 2]) };
let lo = hex_digit(lo);
let hi = hex_digit(hi);
bad |= lo;
bad |= hi;
let b = (hi << 4) | lo;
unsafe { *$bytes.get_unchecked_mut($i >> 1) = MaybeUninit::new(b) };
/*match unsafe { hex_byte(*$ascii.get_unchecked($i), *$ascii.get_unchecked($i + 1)) } {
Some(b) => unsafe { *$bytes.get_unchecked_mut($i >> 1) = MaybeUninit::new(b) },
None => {
//println!("bad hex byte at {} ({}{})", $i, $ascii[$i] as char, $ascii[$i + 1] as char);
return false
}
}
}*/
$i += 2;
}
//if (bad & WIDE_INVALID_BIT) != 0 {
if (bad & INVALID_BIT) != 0 {
return false;
}
}};
}
/*simd::swizzle_indices!(MSB_INDICES = [
0, 2, 4, 6,
8, 10, 12, 14,
16, 18, 20, 22,
24, 26, 28, 30
], [_ . . . _ . . . _ . . . _ . . .]);
simd::swizzle_indices!(LSB_INDICES = [
1, 3, 5, 7,
9, 11, 13, 15,
17, 19, 21, 23,
25, 27, 29, 31
], [_ . . . _ . . . _ . . . _ . . .]);*/
#[inline(always)]
fn decode_hex_bytes_unchecked(ascii: &[u8], bytes: &mut [MaybeUninit<u8>]) -> bool {
// these checks should always be eliminated because they are performed more efficiently
// (sometimes statically) in the callers, but they provide a major safeguard against nasty
// memory safety issues.
debug_assert_eq!(
ascii.len() >> 1 << 1,
ascii.len(),
"len of ascii is not a multiple of 2"
);
if ascii.len() >> 1 << 1 != ascii.len() {
return false;
}
debug_assert_eq!(
ascii.len() >> 1,
bytes.len(),
"len of ascii is not twice that of bytes"
);
const VECTORED_A: bool = false;
const VECTORED_B: bool = false;
const VECTORED_C: bool = false;
if VECTORED_A {
if ascii.len() >> 1 != bytes.len() {
return false;
}
const VECTORED: bool = true;
if VECTORED {
#[cfg(target_arch = "x86")]
use std::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64 as arch;
let mut i = 0;
while i < align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
let buf = HBD::decode_simd(unsafe {
*(ascii.as_ptr().add(i) as *const [u8; DIGIT_BATCH_SIZE])
});
let buf = match buf {
Some(buf) => buf,
None => return false,
let mut bad = Mask::splat(false);
while i < util::align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
const GATHER_BATCH_SIZE: usize = DIGIT_BATCH_SIZE / 4;
let hex_digits = unsafe {
let hi_los = ascii.as_ptr().add(i) as *const [u8; GATHER_BATCH_SIZE];
if TRACE_SIMD {
println!("hi_los: {:x?}", *hi_los);
}
let a = *hi_los;
let b = *hi_los.add(1);
let c = *hi_los.add(2);
let d = *hi_los.add(3);
let f = |x| __ASCII_DIGITS_SIMD[x as usize];
if TRACE_SIMD {
println!(
"{:x?}, {:x?}, {:x?}, {:x?}",
a.map(f),
b.map(f),
c.map(f),
d.map(f)
);
}
let a = Simd::from_array(a);
let b = Simd::from_array(b);
let c = Simd::from_array(c);
let d = Simd::from_array(d);
let a = a.cast::<u32>();
let b = b.cast::<u32>();
let c = c.cast::<u32>();
let d = d.cast::<u32>();
if TRACE_SIMD {
println!("{a:x?}, {b:x?}, {c:x?}, {d:x?}");
}
let a = a.into();
let b = b.into();
let c = c.into();
let d = d.into();
let a = arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, a, 4);
let b = arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, b, 4);
let c = arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, c, 4);
let d = arch::_mm256_i32gather_epi32(ASCII_DIGITS_SIMD, d, 4);
let a = Simd::<u32, GATHER_BATCH_SIZE>::from(a).cast::<u8>();
let b = Simd::<u32, GATHER_BATCH_SIZE>::from(b).cast::<u8>();
let c = Simd::<u32, GATHER_BATCH_SIZE>::from(c).cast::<u8>();
let d = Simd::<u32, GATHER_BATCH_SIZE>::from(d).cast::<u8>();
if TRACE_SIMD {
println!("{a:x?}, {b:x?}, {c:x?}, {d:x?}");
}
// load the 64-bit integers into registers
let a = simd::load_u64_m128(util::cast(a));
let b = simd::load_u64_m128(util::cast(b));
let c = simd::load_u64_m128(util::cast(c));
let d = simd::load_u64_m128(util::cast(d));
{
let a = Simd::<u8, 16>::from(a);
let b = Simd::<u8, 16>::from(b);
let c = Simd::<u8, 16>::from(c);
let d = Simd::<u8, 16>::from(d);
if TRACE_SIMD {
println!("a,b,c,d: {a:x?}, {b:x?}, {c:x?}, {d:x?}");
}
}
// copy the second 64-bit integer into the upper half of xmm0 (lower half is the first 64-bit integer)
let ab = simd::merge_low_hi_m128(a, b);
// copy the fourth 64-bit integer into the upper half of xmm2 (lower half is the third 64-bit integer)
let cd = simd::merge_low_hi_m128(c, d);
{
let ab = Simd::<u8, 16>::from(ab);
let cd = Simd::<u8, 16>::from(cd);
if TRACE_SIMD {
println!("ab,cd: {ab:x?}, {cd:x?}");
}
}
// merge the xmm0 and xmm1 (ymm1) registers into ymm0
simd::merge_m128_m256(ab, cd)
};
let mut j = 0;
while j < DIGIT_BATCH_SIZE {
unsafe {
*bytes.get_unchecked_mut((i >> 1) + j) =
MaybeUninit::new(*buf.as_array().get_unchecked(j))
};
j += 1;
let hex_digits: Simd<u8, DIGIT_BATCH_SIZE> = hex_digits.into();
bad |= (hex_digits & simd::splat_n::<DIGIT_BATCH_SIZE>(INVALID_BIT))
.simd_ne(simd::splat_0::<DIGIT_BATCH_SIZE>());
if TRACE_SIMD {
println!("hex_digits: {hex_digits:x?}");
}
let hex_digits: arch::__m256i = hex_digits.into();
let msb: arch::__m128i;
let lsb: arch::__m128i;
const MSB_INDICES_B: arch::__m128i = unsafe { std::mem::transmute(cast_usize_u8(MSB_INDICES)) };
const LSB_INDICES_B: arch::__m128i = unsafe { std::mem::transmute(cast_usize_u8(LSB_INDICES)) };
//unsafe { println!("MSB_INDICES_B: {MSB_INDICES_B:?}"); }
//unsafe { println!("LSB_INDICES_B: {LSB_INDICES_B:?}"); }
unsafe {
//simd::swizzle!(ymm_reg, hex_digits, msb, data(ymm_reg) MSB_INDICES_B);
//simd::swizzle!(ymm_reg, hex_digits, lsb, data(ymm_reg) LSB_INDICES_B);
//msb = simd_swizzle!(hex_digits, MSB_INDICES);
//lsb = simd_swizzle!(hex_digits, LSB_INDICES);
msb = simd::extract_lo_bytes(hex_digits);
lsb = simd::extract_hi_bytes(hex_digits);
}
let msb: Simd<u8, WIDE_BATCH_SIZE> = msb.into();
let lsb: Simd<u8, WIDE_BATCH_SIZE> = lsb.into();
if TRACE_SIMD {
println!("msb: {msb:x?}");
println!("lsb: {lsb:x?}");
println!("| Packed | Msb | Lsb | |");
Simd::<u8, DIGIT_BATCH_SIZE>::from(hex_digits)
.to_array()
.chunks(2)
.zip(msb.to_array())
.zip(lsb.to_array())
.for_each(|((chunk, msb), lsb)| {
println!(
"| {chunk:02x?} | {msb:x?} | {lsb:x?} | {ok} |",
chunk = (chunk[0] as u16) << 4 | (chunk[1] as u16),
ok = if chunk[0] == msb && chunk[1] == lsb {
'✓'
} else {
'✗'
}
);
});
}
let buf = (msb << simd::splat_n::<WIDE_BATCH_SIZE>(4)) | lsb;
//let buf: arch::__m128i = unsafe { util::cast(buf) };
let buf: arch::__m128i = buf.into();
unsafe {
// vmovaps xmm0, xmmword ptr [rsi]
// vmovups xmmword ptr [rdi], xmm0
//core::arch::asm!("vmovdqu8 {}, [{}]", in(xmm_reg) buf, in(reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
//let all: arch::__m128i = Mask::<i64, 2>::splat(true).to_int().into();
//core::arch::asm!("vpmaskmovq {}, {}, [{}]", in(xmm_reg) buf, in(xmm_reg) all, in(xmm_reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
//core::arch::asm!("vpmaskmovq {}, {}, [{}]", in(xmm_reg) buf, in(xmm_reg) 0u64, in(xmm_reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8);
// arch::_mm_storeu_epi8(bytes.as_mut_ptr().add(i >> 1) as *mut i8, buf)
//arch::_mm_maskstore_epi64(bytes.as_mut_ptr().add(i >> 1) as *mut i64, std::mem::transmute(!0u128), buf);
core::arch::asm!("vmovdqa [{}], {}", in(reg) bytes.as_mut_ptr().add(i >> 1) as *mut i8, in(xmm_reg) buf);
};
i += DIGIT_BATCH_SIZE;
}
decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
} else if VECTORED_B {
let (ascii_pre, ascii_simd, ascii_post) =
unsafe { ascii.align_to::<Simd<u8, DIGIT_BATCH_SIZE>>() };
debug_assert_eq!(ascii_pre.len() % 2, 0);
debug_assert_eq!(ascii_post.len() % 2, 0);
let mut i = 0;
decode_hex_bytes_non_vectored!(i, ascii_pre, bytes, 0);
let mut i = 0;
while i < ascii_simd.len() {
// this to_array and any subsequent from_array should be eliminated anyway
let buf = HBD::decode_simd(unsafe { ascii_simd.get_unchecked(i) }.to_array());
let buf = match buf {
Some(buf) => buf,
None => return false,
};
let mut j = 0;
let k = ascii_pre.len() + i * DIGIT_BATCH_SIZE;
while j < DIGIT_BATCH_SIZE {
unsafe {
*bytes.get_unchecked_mut(k + j) =
MaybeUninit::new(*buf.as_array().get_unchecked(j))
};
j += 1;
}
i += 1;
}
let mut i = 0;
let k = ascii.len() - ascii_post.len();
decode_hex_bytes_non_vectored!(i, ascii_post, bytes, k);
} else if VECTORED_C {
let mut i = 0;
while i < align_down_to::<DIGIT_BATCH_SIZE>(ascii.len()) {
let buf = HBD::decode_simd(unsafe {
*(ascii.as_ptr().add(i) as *const [u8; DIGIT_BATCH_SIZE])
});
let buf = match buf {
Some(buf) => buf,
None => return false,
};
let mut j = 0;
while j < DIGIT_BATCH_SIZE {
unsafe {
*bytes.get_unchecked_mut((i >> 1) + j) =
MaybeUninit::new(*buf.as_array().get_unchecked(j))
};
j += 1;
}
i += DIGIT_BATCH_SIZE;
}
decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
decode_hex_bytes_non_vectored!(i, ascii, bytes);
!bad.any()
} else {
let mut i = 0;
decode_hex_bytes_non_vectored!(i, ascii, bytes, 0);
decode_hex_bytes_non_vectored!(i, ascii, bytes);
true
}
true
}
/// Use of this function should be restricted to `const` contexts because it is not vectorized like
@ -536,7 +673,7 @@ pub const fn hex_bytes_sized_const<const N: usize>(ascii: &[u8; N * 2]) -> Optio
if N == 0 {
Some([0u8; N])
} else {
let mut bytes = MaybeUninit::<u8>::uninit_array::<N>();
let mut bytes = MaybeUninit::uninit_array();
let mut i = 0;
while i < N * 2 {
if i >> 1 >= bytes.len() {
@ -559,7 +696,7 @@ pub fn hex_bytes_sized<const N: usize>(ascii: &[u8; N * 2]) -> Option<[u8; N]> {
if N == 0 {
Some([0u8; N])
} else {
let mut bytes = MaybeUninit::<u8>::uninit_array::<N>();
let mut bytes = MaybeUninit::uninit_array();
if decode_hex_bytes_unchecked(ascii, &mut bytes) {
Some(unsafe { MaybeUninit::array_assume_init(bytes) })
} else {
@ -573,7 +710,7 @@ pub fn hex_bytes_sized_heap<const N: usize>(ascii: &[u8; N * 2]) -> Option<Box<[
if N == 0 {
Some(Box::new([0u8; N]))
} else {
let mut bytes = unsafe { Box::<[MaybeUninit<u8>; N]>::new_uninit().assume_init() };
let mut bytes = unsafe { Box::<[_; N]>::new_uninit().assume_init() };
if decode_hex_bytes_unchecked(ascii, bytes.as_mut()) {
Some(unsafe { Box::from_raw(Box::into_raw(bytes) as *mut [u8; N]) })
} else {
@ -602,13 +739,12 @@ pub fn hex_bytes_dyn_unsafe_iter(ascii: &[u8]) -> Option<Box<[u8]>> {
if len << 1 != ascii.len() {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for (i, o) in ascii
let mut bytes = Box::new_uninit_slice(len);
for (i, [msb, lsb]) in ascii
.array_chunks::<2>()
.map(|[msb, lsb]| hex_byte(*msb, *lsb))
.enumerate()
{
if let Some(b) = o {
if let Some(b) = hex_byte(*msb, *lsb) {
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
} else {
return None;
@ -624,15 +760,17 @@ pub fn hex_bytes_dyn_unsafe_iter_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
return None;
}
let mut bytes = Box::<[u8]>::new_uninit_slice(len);
for (i, b) in ascii
for (i, [hi, lo]) in ascii
.array_chunks::<2>()
.map(HBD::decode_packed)
.enumerate()
{
if b & WIDE_INVALID_BIT != 0 {
let lo = hex_digit(*lo);
let hi = hex_digit(*hi);
if (lo & INVALID_BIT) | (hi & INVALID_BIT) != 0 {
return None;
}
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b as u8) };
let b = (hi << 4) | lo;
unsafe { *bytes.get_unchecked_mut(i) = MaybeUninit::new(b) };
}
Some(unsafe { Box::<[_]>::assume_init(bytes) })
}
@ -648,37 +786,19 @@ pub fn hex_bytes_dyn(ascii: &[u8]) -> Option<Box<[u8]>> {
.map(|v| v.into_boxed_slice())
}
#[inline]
pub fn hex_bytes_dyn_niched(ascii: &[u8]) -> Option<Box<[u8]>> {
let iter = ascii.array_chunks::<2>();
if iter.remainder().len() != 0 {
return None;
}
iter.map(HBD::decode_packed)
.map(|b| {
if b & WIDE_INVALID_BIT != 0 {
None
} else {
Some(b as u8)
}
})
.collect::<Option<Vec<u8>>>()
.map(|v| v.into_boxed_slice())
}
#[cfg(test)]
mod test {
use super::*;
const BYTES: &[u8] = b"Donald J. Trump!";
const HEX_BYTES: &[u8] = b"446F6E616C64204A2E205472756D7021";
const BYTES: &str = "Donald J. Trump!";
const HEX_BYTES: &str = "446F6E616C64204A2E205472756D7021";
const LONG_BYTES: &[u8] = b"Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
const LONG_HEX_BYTES: &[u8] = b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";
const LONG_BYTES: &str = "Dolorum distinctio ut earum quidem distinctio necessitatibus quam. Sit praesentium facere perspiciatis iure aut sunt et et. Adipisci enim rerum illum et officia nisi recusandae. Vitae doloribus ut quia ea unde consequuntur quae illum. Id eius harum est. Inventore ipsum ut sit ut vero consectetur.";
const LONG_HEX_BYTES: &str = "446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E";
struct Sample {
bytes: &'static [u8],
hex_bytes: &'static [u8],
bytes: &'static str,
hex_bytes: &'static str,
}
const SAMPLES: &[Sample] = &[
@ -692,11 +812,11 @@ mod test {
},
];
const INVALID_SAMPLES: &[&[u8]] = &[
b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
b"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
b"446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
b"446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
const INVALID_SAMPLES: &[&str] = &[
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722G",
"446F6C6F72756D2064697374696E6374696F20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E7365637465747572GE",
"446F6C6F72756D2064697374696E6374696G20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
"446F6C6F72756D2064697374696E637469GF20757420656172756D2071756964656D2064697374696E6374696F206E65636573736974617469627573207175616D2E20536974207072616573656E7469756D20666163657265207065727370696369617469732069757265206175742073756E742065742065742E20416469706973636920656E696D20726572756D20696C6C756D206574206F666669636961206E697369207265637573616E6461652E20566974616520646F6C6F7269627573207574207175696120656120756E646520636F6E73657175756E747572207175616520696C6C756D2E204964206569757320686172756D206573742E20496E76656E746F726520697073756D20757420736974207574207665726F20636F6E73656374657475722E",
];
#[test]
@ -809,10 +929,12 @@ mod test {
let hex_bytes = conv::u8x2_to_u8(HEX_BYTES_VALID);
let bytes = Simd::from_array(BYTES_VALID);
println!("hex_bytes: {HEX_BYTES_VALID:02x?}");
println!("hex_bytes: {hex_bytes:02x?}");
println!("bytes: {BYTES_VALID:02x?}");
println!("bytes: {bytes:04x?}");
if TRACE_SIMD {
println!("hex_bytes: {HEX_BYTES_VALID:02x?}");
println!("hex_bytes: {hex_bytes:02x?}");
println!("bytes: {BYTES_VALID:02x?}");
println!("bytes: {bytes:04x?}");
}
assert_eq!(HexByteDecoderA::decode_simd(hex_bytes), Some(bytes));
assert_eq!(HexByteDecoderB::decode_simd(hex_bytes), Some(bytes));
@ -832,38 +954,48 @@ mod test {
}*/
}
fn test_f(f: fn(&[u8]) -> Option<Box<[u8]>>) {
for (i, Sample { bytes, hex_bytes }) in SAMPLES.into_iter().enumerate() {
let result = f(hex_bytes);
assert_eq!(
Some(*bytes),
result.as_ref().map(Box::as_ref),
"Sample {i} did not decode correctly"
);
}
macro_rules! test_f {
(boxed $f:ident) => {
test_f!(@ $f, Box::as_ref)
};
(@ $f:ident, $trans:expr) => {
for (i, Sample { bytes, hex_bytes }) in SAMPLES.into_iter().enumerate() {
let result = $f(hex_bytes.as_bytes());
assert_eq!(
Some(bytes.as_bytes()),
result.as_ref().map($trans),
"Sample {i} ({hex_bytes:?} => {bytes:?}) did not decode correctly (expected Some)"
);
}
for (i, hex_bytes) in INVALID_SAMPLES.into_iter().enumerate() {
let result = f(hex_bytes);
assert_eq!(
None,
result.as_ref().map(Box::as_ref),
"Sample {i} did not decode correctly"
);
}
for (i, hex_bytes) in INVALID_SAMPLES.into_iter().enumerate() {
let result = $f(hex_bytes.as_bytes());
assert_eq!(
None,
result.as_ref().map($trans),
"Sample {i} ({hex_bytes:?}) did not decode correctly (expected None)"
);
}
};
}
#[test]
fn test_dyn_iter_option() {
test_f(hex_bytes_dyn);
}
#[test]
fn test_dyn_iter_u16() {
test_f(hex_bytes_dyn_niched);
test_f!(boxed hex_bytes_dyn);
}
#[test]
fn test_dyn_unsafe() {
test_f(hex_bytes_dyn_unsafe);
test_f!(boxed hex_bytes_dyn_unsafe);
}
#[test]
fn test_dyn_unsafe_iter() {
test_f!(boxed hex_bytes_dyn_unsafe_iter);
}
#[test]
fn test_dyn_unsafe_iter_niched() {
test_f!(boxed hex_bytes_dyn_unsafe_iter_niched);
}
}

View File

@ -2,88 +2,202 @@ use std::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::util::cast;
pub trait SimdSplatZero<const LANES: usize> {
fn splat_zero() -> Simd<u8, LANES>
where
LaneCount<LANES>: SupportedLaneCount;
}
pub trait SimdSplatN<const LANES: usize> {
fn splat_n(n: u8) -> Simd<u8, LANES>
where
LaneCount<LANES>: SupportedLaneCount;
}
pub struct SimdOps;
const W_128: usize = 128 / 8;
const W_256: usize = 256 / 8;
const W_512: usize = 512 / 8;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64 as arch;
#[cfg(target_arch = "arm")]
use std::arch::arm as arch;
#[cfg(target_arch = "wasm32")]
use std::arch::wasm32 as arch;
#[cfg(target_arch = "wasm64")]
use std::arch::wasm64 as arch;
#[cfg(target_arch = "x86")]
use std::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64 as arch;
macro_rules! specialized {
($LANES:ident, $trait:ident {
$(
fn $name:ident($( $argn:ident: $argt:ty ),*) -> $rt:ty $(where [ $( $where:tt )* ])? {
($(
$vis:vis fn $name:ident<$LANES:ident$(, $( $generics:tt )+)?>($( $argn:ident: $argt:ty ),*) -> $rt:ty $(where [ $( $where:tt )* ])? {
$(
$width:pat_param $( if $cfg:meta )? => $impl:expr
),+
$(,)?
}
)*
}) => {
impl<const $LANES: usize> $trait<$LANES> for SimdOps {
$(
#[inline(always)]
fn $name($( $argn: $argt ),*) -> $rt $( where $( $where )* )? {
// abusing const generics to specialize without the unsoundness of real specialization!
match $LANES {
$(
$( #[cfg( $cfg )] )?
$width => $impl
),+
}
)+) => {$(
#[inline(always)]
$vis fn $name<const $LANES: usize$(, $( $generics )+)?>($( $argn: $argt ),*) -> $rt $( where $( $where )* )? {
// abusing const generics to specialize without the unsoundness of real specialization!
match $LANES {
$(
$( #[cfg( $cfg )] )?
$width => $impl
),+
}
)*
}
};
}
}
)+};
}
specialized!(LANES, SimdSplatN {
fn splat_n(n: u8) -> Simd<u8, LANES> where [LaneCount<LANES>: SupportedLaneCount] {
W_128 if all(target_arch = "x86_64", target_feature = "sse2") => unsafe { cast(core::arch::x86_64::_mm_set1_epi8(n as i8)) },
W_128 if all(target_arch = "x86", target_feature = "sse2") => unsafe { cast(core::arch::x86::_mm_set1_epi8(n as i8)) },
W_256 if all(target_arch = "x86_64", target_feature = "avx") => unsafe { cast(core::arch::x86_64::_mm256_set1_epi8(n as i8)) },
W_256 if all(target_arch = "x86", target_feature = "avx") => unsafe { cast(core::arch::x86::_mm256_set1_epi8(n as i8)) },
macro_rules! set1 {
($arch:ident, $vec:ident, $reg:ident, $n:ident) => {{
let out: std::arch::$arch::$vec;
std::arch::asm!("vpbroadcastb {}, {}", lateout($reg) out, in(xmm_reg) cast::<_, std::arch::$arch::__m128i>($n));
out
}};
}
specialized! {
// TODO: special case https://www.felixcloutier.com/x86/vpbroadcastb:vpbroadcastw:vpbroadcastd:vpbroadcastq
pub fn splat_n<LANES>(n: u8) -> Simd<u8, LANES> where [
LaneCount<LANES>: SupportedLaneCount,
] {
W_128 if all(target_arch = "x86_64", target_feature = "avx2") => unsafe { cast(set1!(x86_64, __m128i, xmm_reg, n)) },
W_128 if all(target_arch = "x86", target_feature = "avx2") => unsafe { cast(set1!(x86, __m128i, xmm_reg, n)) },
W_256 if all(target_arch = "x86_64", target_feature = "avx2") => unsafe { cast(set1!(x86_64, __m256i, ymm_reg, n)) },
W_256 if all(target_arch = "x86", target_feature = "avx2") => unsafe { cast(set1!(x86, __m256i, ymm_reg, n)) },
// these are *terrible*. They compile to a bunch of MOVs and SETs
W_128 if all(target_arch = "x86_64", target_feature = "sse2", not(target_feature = "avx2")) => unsafe { cast(core::arch::x86_64::_mm_set1_epi8(n as i8)) },
W_128 if all(target_arch = "x86", target_feature = "sse2", not(target_feature = "avx2")) => unsafe { cast(core::arch::x86::_mm_set1_epi8(n as i8)) },
W_256 if all(target_arch = "x86_64", target_feature = "avx", not(target_feature = "avx2")) => unsafe { cast(core::arch::x86_64::_mm256_set1_epi8(n as i8)) },
W_256 if all(target_arch = "x86", target_feature = "avx", not(target_feature = "avx2")) => unsafe { cast(core::arch::x86::_mm256_set1_epi8(n as i8)) },
// I can't really test these, but they're documented as doing either a broadcast or the terrible approach mentioned above.
W_512 if all(target_arch = "x86_64", target_feature = "avx512f") => unsafe { cast(core::arch::x86_64::_mm512_set1_epi8(n as i8)) },
W_512 if all(target_arch = "x86", target_feature = "avx512f") => unsafe { cast(core::arch::x86::_mm512_set1_epi8(n as i8)) },
_ => Simd::splat(n),
}
});
specialized!(LANES, SimdSplatZero {
fn splat_zero() -> Simd<u8, LANES> where [LaneCount<LANES>: SupportedLaneCount] {
pub fn splat_0<LANES>() -> Simd<u8, LANES> where [
LaneCount<LANES>: SupportedLaneCount,
] {
// these are fine, they are supposed to XOR themselves to zero out.
W_128 if all(target_arch = "x86_64", target_feature = "sse2") => unsafe { cast(core::arch::x86_64::_mm_setzero_si128()) },
W_128 if all(target_arch = "x86", target_feature = "sse2") => unsafe { cast(core::arch::x86::_mm_setzero_si128()) },
W_256 if all(target_arch = "x86_64", target_feature = "avx") => unsafe { cast(core::arch::x86_64::_mm256_setzero_si256()) },
W_256 if all(target_arch = "x86", target_feature = "avx") => unsafe { cast(core::arch::x86::_mm256_setzero_si256()) },
W_512 if all(target_arch = "x86_64", target_feature = "avx512f") => unsafe { cast(core::arch::x86_64::_mm512_setzero_si512()) },
W_512 if all(target_arch = "x86", target_feature = "avx512f") => unsafe { cast(core::arch::x86::_mm512_setzero_si512()) },
_ => <Self as SimdSplatN<LANES>>::splat_n(0),
_ => splat_n(0),
}
});
}
/// Defines the indices used by [`swizzle`].
#[macro_export]
macro_rules! __swizzle_indices {
($name:ident = [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
std::arch::global_asm!(concat!(".", stringify!($name), ":")
$( , concat!("\n .byte ", stringify!($index)) )+
$( $( , $crate::util::subst!([$padding], ["\n .zero 1"]) )+ )?);
};
}
#[macro_export]
macro_rules! __swizzle {
/*(xmm_reg, $src:expr, $dest:expr, [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
$crate::simd::swizzle!(@ xmm_reg, $src, $dest, (xmmword) [$( $index ),+] $( , [$( $padding )+] )?)
};
(ymm_reg, $src:expr, $dest:expr, [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
$crate::simd::swizzle!(@ ymm_reg, $src, $dest, (ymmword) [$( $index ),+] $( , [$( $padding )+] )?)
};
(zmm_reg, $src:expr, $dest:expr, [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
$crate::simd::swizzle!(@ zmm_reg, $src, $dest, (zmmword) [$( $index ),+] $( , [$( $padding )+] )?)
};*/
(xmm_reg, $src:expr, $mode:ident $dest:expr, $indices:ident) => {
$crate::simd::swizzle!(@ xmm_reg, x, $src, $mode $dest, (xmmword) $indices)
};
(ymm_reg, $src:expr, $mode:ident $dest:expr, $indices:ident) => {
$crate::simd::swizzle!(@ ymm_reg, y, $src, $mode $dest, (ymmword) $indices)
};
(zmm_reg, $src:expr, $mode:ident $dest:expr, $indices:ident) => {
$crate::simd::swizzle!(@ zmm_reg z, $src, $mode $dest, (zmmword) $indices)
};
($reg:ident, $src:expr, $dest:expr, mem $indices:expr) => {
std::arch::asm!("vpshufb {}, {}, [{}]", in($reg) $src, lateout($reg) $dest, in(reg) $indices);
};
($reg:ident, $src:expr, $dest:expr, data($indices_reg:ident) $indices:expr) => {
std::arch::asm!("vpshufb {}, {}, {}", in($reg) $src, lateout($reg) $dest, in($indices_reg) $indices);
};
//(@ $reg:ident, $src:expr, $dest:expr, ($indices_reg:ident) [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
(@ $reg:ident, $token:ident, $src:expr, $mode:ident $dest:expr, ($indices_reg:ident) $indices:ident) => {
std::arch::asm!(concat!("vpshufb {:", stringify!($token), "}, {:", stringify!($token), "}, ", stringify!($indices_reg), " ptr [rip + .", stringify!($indices), "]"), $mode($reg) $dest, in($reg) $src);
// std::arch::asm!("2:"
// $( , concat!("\n .byte ", stringify!($index)) )+
// $( $( , $crate::util::subst!([$padding], ["\n .zero 1"]) )+ )?
// , "\n3:\n", concat!(" vpshufb {}, {}, ", stringify!($indices_reg), " ptr [rip + 2b]"), in($reg) $src, lateout($reg) $dest)
};
// ($src:expr, $dest:expr, [$( $index:literal ),+] $( , [$( $padding:tt )+] )?) => {
// $crate::simd::swizzle!(@ $src, $dest, [$( stringify!($index) ),+] $( , [$( "\n ", subst!($padding, ""), "zero 1" )+] )?)
// };
// (@ $src:expr, $dest:expr, [$( $index:literal ),+] $( , [$( $padding:literal )+] )?) => {
// std::arch::asm!(r#"
// .indices:"#,
// $( "\n .byte ", $index ),+
// $( $( $padding ),+ )?
// r#"
// lsb:
// vpshufb {}, {}, xmmword ptr [rip + .indices]
// "#, in(xmm_reg) $src, lateout(xmm_reg) $dest)
// };
}
pub use __swizzle as swizzle;
pub use __swizzle_indices as swizzle_indices;
#[inline(always)]
pub fn splat_0<const LANES: usize>() -> Simd<u8, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
{
<SimdOps as SimdSplatZero<LANES>>::splat_zero()
pub fn load_u64_m128(v: u64) -> arch::__m128i {
unsafe {
let out: _;
std::arch::asm!("vmovq {}, {}", lateout(xmm_reg) out, in(reg) v);
out
}
}
#[inline(always)]
pub fn splat_n<const LANES: usize>(n: u8) -> Simd<u8, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
{
<SimdOps as SimdSplatN<LANES>>::splat_n(n)
pub fn merge_low_hi_m128(a: arch::__m128i, b: arch::__m128i) -> arch::__m128i {
unsafe {
// xmm0 = xmm1[0],xmm0[0]
let out: _;
std::arch::asm!("vpunpcklqdq {}, {}, {}", lateout(xmm_reg) out, in(xmm_reg) a, in(xmm_reg) b);
out
}
}
/// The args are in little endian order (first arg is lowest order)
pub fn merge_m128_m256(a: arch::__m128i, b: arch::__m128i) -> arch::__m256i {
unsafe {
let out: _;
std::arch::asm!("vinserti128 {}, {:y}, {}, 0x1", lateout(ymm_reg) out, in(ymm_reg) a, in(xmm_reg) b);
out
}
}
macro_rules! extract_lohi_bytes {
(($mask:expr, $op12:ident, $op3:ident), $in:ident) => {{
const MASK: arch::__m128i = unsafe { std::mem::transmute($mask) };
unsafe {
let out: _;
std::arch::asm!(
//concat!("vmovdqa {mask}, xmmword ptr [rip + .", stringify!($mask), "]"),
"vextracti128 {inter}, {input:y}, 1",
concat!(stringify!($op12), " {inter}, {inter}, {mask}"),
concat!(stringify!($op12), " {output:x}, {input:x}, {mask}"),
concat!(stringify!($op3), " {output:x}, {output:x}, {inter}"),
mask = in(xmm_reg) MASK, input = in(ymm_reg) $in, output = lateout(xmm_reg) out, inter = out(xmm_reg) _);
out
}
}};
}
#[inline(always)]
pub fn extract_lo_bytes(v: arch::__m256i) -> arch::__m128i {
extract_lohi_bytes!(([0xffu16; 8], vpand, vpackuswb), v)
}
#[inline(always)]
pub fn extract_hi_bytes(v: arch::__m256i) -> arch::__m128i {
extract_lohi_bytes!(([0x1u8, 0x3, 0x5, 0x7, 0x9, 0xb, 0xd, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0], vpshufb, vpunpcklqdq), v)
}

View File

@ -53,3 +53,45 @@ pub unsafe fn cast<A, B>(a: A) -> B {
.b,
)
}
#[doc(hidden)]
#[macro_export]
macro_rules! __subst {
([$( $ignore:tt )*], [$( $use:tt )*]) => {
$( $use )*
};
}
pub use __subst as subst;
#[inline(always)]
pub const fn align_down_to<const N: usize>(n: usize) -> usize {
let shift = match N.checked_ilog2() {
Some(x) => x,
None => 0,
};
return n >> shift << shift;
}
#[inline(always)]
pub const fn align_up_to<const N: usize>(n: usize) -> usize {
let shift = match N.checked_ilog2() {
Some(x) => x,
None => 0,
};
return (n + (N - 1)) >> shift << shift;
}
#[cfg(test)]
mod test {
use super::*;
#[test]
pub fn test_align_down_to() {
assert_eq!(align_down_to::<8>(8), 8);
assert_eq!(align_down_to::<16>(8), 0);
assert_eq!(align_down_to::<16>(16), 16);
assert_eq!(align_down_to::<16>(15), 0);
assert_eq!(align_down_to::<16>(17), 16);
}
}