sailfish/sailfish/src/escape.rs

157 lines
5.3 KiB
Rust

use std::marker::PhantomData;
use array_vec::ArrayStr;
use super::Buffer;
/// Holds computed constants for common escaping scheme identity checks.
pub struct CommonIdents<E: Escape>(PhantomData<E>);
/// Converts an array of ASCII bytes to an array of characters.
pub const fn ascii_chars<const N: usize>(s: &[u8; N]) -> [char; N] {
assert!(s.is_ascii());
let mut chars = ['\0'; N];
let mut i = 0;
while i < s.len() {
chars[i] = s[i] as char;
i += 1;
}
chars
}
/// Returns `true` if, for the given escaping scheme `E`, every character in `set` requires no escaping.
pub const fn are_all_chars_identity<E: Escape + ~const EscapeMeta>(set: &[char]) -> bool {
let mut i = 0;
while i < set.len() {
if !E::is_identity(set[i]) {
return false;
}
i += 1;
}
true
}
const ALPHA_LOWERCASE_CHARS: &[char] = &ascii_chars(b"abcdefghijklmnopqrstuvwxyz");
const ALPHA_UPPERCASE_CHARS: &[char] = &ascii_chars(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ");
const DIGIT_CHARS: &[char] = &ascii_chars(b"0123456789");
const BOOL_CHARS: &[char] = &ascii_chars(b"truefals");
impl<E: Escape> CommonIdents<E> {
/// True if lowercase ASCII alphabetic characters will never need escaping.
pub const ALPHA_LOWERCASE: bool = are_all_chars_identity::<E>(ALPHA_LOWERCASE_CHARS);
/// True if uppercase ASCII alphabetic characters will never need escaping.
pub const ALPHA_UPPERCASE: bool = are_all_chars_identity::<E>(ALPHA_UPPERCASE_CHARS);
/// True if base10 digit characters will never need escaping.
pub const DIGITS: bool = are_all_chars_identity::<E>(DIGIT_CHARS);
/// True if `true` and `false` will never need escaping.
pub const BOOLS: bool = are_all_chars_identity::<E>(BOOL_CHARS);
/// True if unsigned integers will never need escaping.
pub const UINTS: bool = Self::DIGITS;
/// True if signed integers will never need escaping.
pub const INTS: bool = Self::DIGITS && are_all_chars_identity::<E>(&['-']);
/// True if floats (using [`ryu`]'s formatting) will never need escaping.
pub const FLOATS: bool =
Self::DIGITS && are_all_chars_identity::<E>(&['-', '.', 'e']);
}
/// Constant metadata about an impl of [`Escape`].
#[const_trait]
pub trait EscapeMeta {
/// Returns `true` if the escaping scheme will never map the given character, regardless of its
/// configuration.
fn is_identity(c: char) -> bool;
}
/// A scheme for escaping strings.
pub trait Escape: const EscapeMeta {
/// The type of an escaped character.
type Escaped: AsRef<str>;
/// If the character needs to be escaped, does so and returns it as a string. Otherwise,
/// returns `None`.
fn escape(&self, c: char) -> Option<Self::Escaped>;
/// Writes the `string` to the `buffer`, applying any necessary escaping.
#[inline]
fn escape_to_buf(&self, buffer: &mut Buffer, string: &str) {
buffer.reserve(string.len());
let mut i = 0;
for (j, c) in string.char_indices() {
if let Some(rep) = self.escape(c) {
buffer.push_str(&string[i..j]);
buffer.push_str(rep.as_ref());
i = j + c.len_utf8();
}
}
}
/// Writes the `string` to the `buffer`, applying any necessary escaping.
///
/// # Examples
///
/// ```
/// use sailfish::{Escape, EscapeHtml};
///
/// let mut buf = String::new();
/// EscapeHtml.escape_to_string(&mut buf, "<h1>Hello, world!</h1>");
/// assert_eq!(buf, "&lt;h1&gt;Hello, world!&lt;/h1&gt;");
/// ```
#[inline]
fn escape_to_string(&self, buffer: &mut String, string: &str) {
let mut buf = Buffer::from(std::mem::take(buffer));
self.escape_to_buf(&mut buf, string);
*buffer = buf.into_string();
}
}
/// A scheme for escaping strings for safe insertion into JSON strings.
pub struct EscapeJsonString;
impl const EscapeMeta for EscapeJsonString {
#[inline]
fn is_identity(c: char) -> bool {
!matches!(c, '"' | '\\' | '\u{0000}'..='\u{001F}')
}
}
impl Escape for EscapeJsonString {
type Escaped = ArrayStr<4>;
#[inline]
fn escape(&self, c: char) -> Option<Self::Escaped> {
match c {
'"' => Some(ArrayStr::try_from(r#"\""#).unwrap()),
'\\' => Some(ArrayStr::try_from(r"\\").unwrap()),
'\u{0000}'..='\u{001F}' => {
let c = c as u8;
let mut s = ArrayStr::try_from(r"\u").unwrap();
unsafe {
const HEX_DIGITS: [u8; 16] = *b"0123456789ABCDEF";
// SAFETY: we only write valid UTF-8
let arr = s.data_mut();
arr.unused_mut()[0].write(HEX_DIGITS[usize::from(c >> 4)]);
arr.unused_mut()[1].write(HEX_DIGITS[usize::from(c & 0xF)]);
// SAFETY: we just initialized the last 2 bytes
arr.set_len(arr.len() + 2);
}
Some(s)
}
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::{CommonIdents, EscapeJsonString};
#[test]
fn check_idents() {
assert!(CommonIdents::<EscapeJsonString>::BOOLS);
assert!(CommonIdents::<EscapeJsonString>::UINTS);
assert!(CommonIdents::<EscapeJsonString>::INTS);
assert!(CommonIdents::<EscapeJsonString>::FLOATS);
}
}