minify-html/rust/main/src/minify/content.rs

use aho_corasick::{AhoCorasickBuilder, MatchKind};
use lazy_static::lazy_static;

use crate::ast::{NodeData, ScriptOrStyleLang};
use crate::cfg::Cfg;
use crate::common::gen::codepoints::TAG_NAME_CHAR;
use crate::common::pattern::Replacer;
use crate::common::spec::tag::whitespace::{
    get_whitespace_minification_for_tag, WhitespaceMinification,
};
use crate::common::whitespace::{collapse_whitespace, is_all_whitespace, left_trim, right_trim};
use crate::entity::encode::encode_entities;
use crate::minify::bang::minify_bang;
use crate::minify::comment::minify_comment;
use crate::minify::css::minify_css;
use crate::minify::doctype::minify_doctype;
use crate::minify::element::minify_element;
use crate::minify::instruction::minify_instruction;
use crate::minify::js::minify_js;

fn build_chevron_replacer() -> Replacer {
    let mut patterns = Vec::<Vec<u8>>::new();
    let mut replacements = Vec::<Vec<u8>>::new();

    // Replace all `<` with a `&LT` if it's followed by a TAG_NAME_CHAR, `/`, `!`, or `?`.
    for c in 0u8..128u8 {
        // TODO Create single lookup.
        if TAG_NAME_CHAR[c] || c == b'/' || c == b'!' || c == b'?' {
            patterns.push(vec![b'<', c]);
            replacements.push(vec![b'&', b'L', b'T', c]);
        };
    }

    Replacer::new(
        AhoCorasickBuilder::new()
            .dfa(true)
            .match_kind(MatchKind::LeftmostLongest)
            .build(patterns),
        replacements,
    )
}

lazy_static! {
    static ref CHEVRON_REPLACER: Replacer = build_chevron_replacer();
}

pub fn minify_content(
    cfg: &Cfg,
    out: &mut Vec<u8>,
    descendant_of_pre: bool,
    // Use empty slice if none.
    parent: &[u8],
    mut nodes: Vec<NodeData>,
) {
    let &WhitespaceMinification {
        collapse,
        destroy_whole,
        trim,
    } = get_whitespace_minification_for_tag(parent, descendant_of_pre);

    // TODO Document or fix: even though bangs/comments/etc. don't affect layout, we don't collapse/destroy-whole/trim combined text nodes across bangs/comments/etc., as that's too complex and is ambiguous about which nodes should whitespace be deleted from.
    let mut found_first_text_or_elem = false;
    let mut index_of_last_nonempty_text_or_elem: isize = -1;
    let mut index_of_last_text_or_elem: isize = -1;
    for i in 0..nodes.len() {
        let (previous_nodes, next_nodes) = nodes.split_at_mut(i);
        let n = &mut next_nodes[0];
        match n {
            NodeData::Element { name, .. } => {
                if index_of_last_nonempty_text_or_elem > -1 {
                    if let NodeData::Element {
                        next_sibling_element_name,
                        ..
                    } = &mut previous_nodes[index_of_last_nonempty_text_or_elem as usize]
                    {
                        debug_assert!(next_sibling_element_name.is_empty());
                        next_sibling_element_name.extend_from_slice(name);
                    };
                };
                found_first_text_or_elem = true;
                index_of_last_nonempty_text_or_elem = i as isize;
                index_of_last_text_or_elem = i as isize;
            }
            NodeData::Text { value } => {
                if !found_first_text_or_elem {
                    // This is the first element or text node, and it's a text node.
                    found_first_text_or_elem = true;
                    if trim {
                        left_trim(value);
                    };
                };
                // Our parser is guaranteed to output contiguous text as a single node,
                // so the adjacent nodes to a text node (not counting comments/bangs/etc.) should be elements.
                // TODO debug_assert this and add tests.
                if destroy_whole && is_all_whitespace(value) {
                    value.clear();
                } else if collapse {
                    collapse_whitespace(value);
                };
                // Set AFTER processing.
                index_of_last_text_or_elem = i as isize;
                if !value.is_empty() {
                    index_of_last_nonempty_text_or_elem = i as isize;
                };
            }
            _ => {}
        };
    }
    if trim && index_of_last_text_or_elem > -1 {
        if let NodeData::Text { value } =
            nodes.get_mut(index_of_last_text_or_elem as usize).unwrap()
        {
            right_trim(value);
        };
    }

    for (i, c) in nodes.into_iter().enumerate() {
        match c {
            NodeData::Bang { code, ended } => minify_bang(cfg, out, &code, ended),
            NodeData::Comment { code, ended } => minify_comment(cfg, out, &code, ended),
            NodeData::Doctype { legacy, ended } => minify_doctype(cfg, out, &legacy, ended),
            NodeData::Element {
                attributes,
                children,
                closing_tag,
                name,
                namespace: child_ns,
                next_sibling_element_name,
            } => minify_element(
                cfg,
                out,
                descendant_of_pre,
                child_ns,
                parent,
                &next_sibling_element_name,
                (i as isize) == index_of_last_nonempty_text_or_elem,
                &name,
                attributes,
                closing_tag,
                children,
            ),
            NodeData::Instruction { code, ended } => minify_instruction(cfg, out, &code, ended),
            NodeData::ScriptOrStyleContent { code, lang } => match lang {
                ScriptOrStyleLang::CSS => minify_css(cfg, out, &code),
                ScriptOrStyleLang::Data => out.extend_from_slice(&code),
                ScriptOrStyleLang::JS => minify_js(cfg, out, &code),
            },
            NodeData::Text { value } => out
                .extend_from_slice(&CHEVRON_REPLACER.replace_all(&encode_entities(&value, false))),
        };
    }
}
Implement core minifier 2021-08-06 02:17:45 -04:00			`use aho_corasick::{AhoCorasickBuilder, MatchKind};`
			`use lazy_static::lazy_static;`

			`use crate::ast::{NodeData, ScriptOrStyleLang};`
			`use crate::cfg::Cfg;`
Formatting 2021-08-09 05:56:37 -04:00			`use crate::common::gen::codepoints::TAG_NAME_CHAR;`
			`use crate::common::pattern::Replacer;`
			`use crate::common::spec::tag::whitespace::{`
			`get_whitespace_minification_for_tag, WhitespaceMinification,`
			`};`
			`use crate::common::whitespace::{collapse_whitespace, is_all_whitespace, left_trim, right_trim};`
Move entity code 2021-08-08 04:46:51 -04:00			`use crate::entity::encode::encode_entities;`
Implement core minifier 2021-08-06 02:17:45 -04:00			`use crate::minify::bang::minify_bang;`
			`use crate::minify::comment::minify_comment;`
			`use crate::minify::css::minify_css;`
Implement c14n; minify doctypes; minify viewport tags 2021-08-09 12:56:48 -04:00			`use crate::minify::doctype::minify_doctype;`
Implement core minifier 2021-08-06 02:17:45 -04:00			`use crate::minify::element::minify_element;`
			`use crate::minify::instruction::minify_instruction;`
			`use crate::minify::js::minify_js;`

			`fn build_chevron_replacer() -> Replacer {`
			`let mut patterns = Vec::<Vec<u8>>::new();`
			`let mut replacements = Vec::<Vec<u8>>::new();`

Implement attr minification; various parser and minifier fixes 2021-08-06 07:56:54 -04:00			// Replace all `<` with a `&LT` if it's followed by a TAG_NAME_CHAR, `/`, `!`, or `?`.
Implement core minifier 2021-08-06 02:17:45 -04:00			`for c in 0u8..128u8 {`
Implement attr minification; various parser and minifier fixes 2021-08-06 07:56:54 -04:00			`// TODO Create single lookup.`
			`if TAG_NAME_CHAR[c] \|\| c == b'/' \|\| c == b'!' \|\| c == b'?' {`
Implement core minifier 2021-08-06 02:17:45 -04:00			`patterns.push(vec![b'<', c]);`
			`replacements.push(vec![b'&', b'L', b'T', c]);`
			`};`
rustfmt 2021-08-06 02:19:36 -04:00			`}`
Implement core minifier 2021-08-06 02:17:45 -04:00
			`Replacer::new(`
			`AhoCorasickBuilder::new()`
			`.dfa(true)`
			`.match_kind(MatchKind::LeftmostLongest)`
			`.build(patterns),`
			`replacements,`
			`)`
			`}`

			`lazy_static! {`
			`static ref CHEVRON_REPLACER: Replacer = build_chevron_replacer();`
			`}`

			`pub fn minify_content(`
			`cfg: &Cfg,`
			`out: &mut Vec<u8>,`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`descendant_of_pre: bool,`
Implement core minifier 2021-08-06 02:17:45 -04:00			`// Use empty slice if none.`
			`parent: &[u8],`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`mut nodes: Vec<NodeData>,`
Clippy suggestions 2021-08-06 09:18:45 -04:00			`) {`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`let &WhitespaceMinification {`
			`collapse,`
			`destroy_whole,`
			`trim,`
			`} = get_whitespace_minification_for_tag(parent, descendant_of_pre);`

			`// TODO Document or fix: even though bangs/comments/etc. don't affect layout, we don't collapse/destroy-whole/trim combined text nodes across bangs/comments/etc., as that's too complex and is ambiguous about which nodes should whitespace be deleted from.`
			`let mut found_first_text_or_elem = false;`
			`let mut index_of_last_nonempty_text_or_elem: isize = -1;`
			`let mut index_of_last_text_or_elem: isize = -1;`
Fix next sibling calc 2021-08-06 09:07:55 -04:00			`for i in 0..nodes.len() {`
			`let (previous_nodes, next_nodes) = nodes.split_at_mut(i);`
			`let n = &mut next_nodes[0];`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`match n {`
Fix next sibling calc 2021-08-06 09:07:55 -04:00			`NodeData::Element { name, .. } => {`
			`if index_of_last_nonempty_text_or_elem > -1 {`
More clippy suggestions 2021-08-06 09:23:05 -04:00			`if let NodeData::Element {`
			`next_sibling_element_name,`
			`..`
			`} = &mut previous_nodes[index_of_last_nonempty_text_or_elem as usize]`
			`{`
			`debug_assert!(next_sibling_element_name.is_empty());`
			`next_sibling_element_name.extend_from_slice(name);`
Fix next sibling calc 2021-08-06 09:07:55 -04:00			`};`
			`};`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`found_first_text_or_elem = true;`
			`index_of_last_nonempty_text_or_elem = i as isize;`
			`index_of_last_text_or_elem = i as isize;`
			`}`
			`NodeData::Text { value } => {`
			`if !found_first_text_or_elem {`
			`// This is the first element or text node, and it's a text node.`
			`found_first_text_or_elem = true;`
			`if trim {`
			`left_trim(value);`
			`};`
			`};`
			`// Our parser is guaranteed to output contiguous text as a single node,`
			`// so the adjacent nodes to a text node (not counting comments/bangs/etc.) should be elements.`
			`// TODO debug_assert this and add tests.`
			`if destroy_whole && is_all_whitespace(value) {`
			`value.clear();`
			`} else if collapse {`
			`collapse_whitespace(value);`
			`};`
			`// Set AFTER processing.`
			`index_of_last_text_or_elem = i as isize;`
			`if !value.is_empty() {`
			`index_of_last_nonempty_text_or_elem = i as isize;`
			`};`
			`}`
			`_ => {}`
			`};`
			`}`
			`if trim && index_of_last_text_or_elem > -1 {`
More clippy suggestions 2021-08-06 09:23:05 -04:00			`if let NodeData::Text { value } =`
			`nodes.get_mut(index_of_last_text_or_elem as usize).unwrap()`
			`{`
			`right_trim(value);`
Implement core minifier 2021-08-06 02:17:45 -04:00			`};`
rustfmt 2021-08-06 02:19:36 -04:00			`}`
Implement core minifier 2021-08-06 02:17:45 -04:00
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`for (i, c) in nodes.into_iter().enumerate() {`
Implement core minifier 2021-08-06 02:17:45 -04:00			`match c {`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`NodeData::Bang { code, ended } => minify_bang(cfg, out, &code, ended),`
			`NodeData::Comment { code, ended } => minify_comment(cfg, out, &code, ended),`
Implement c14n; minify doctypes; minify viewport tags 2021-08-09 12:56:48 -04:00			`NodeData::Doctype { legacy, ended } => minify_doctype(cfg, out, &legacy, ended),`
Implement core minifier 2021-08-06 02:17:45 -04:00			`NodeData::Element {`
			`attributes,`
			`children,`
			`closing_tag,`
			`name,`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`namespace: child_ns,`
Fix tag omission minification; implement entity reencoding minification 2021-08-06 08:53:33 -04:00			`next_sibling_element_name,`
			`} => minify_element(`
			`cfg,`
			`out,`
			`descendant_of_pre,`
			`child_ns,`
			`parent,`
			`&next_sibling_element_name,`
			`(i as isize) == index_of_last_nonempty_text_or_elem,`
			`&name,`
			`attributes,`
			`closing_tag,`
			`children,`
			`),`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`NodeData::Instruction { code, ended } => minify_instruction(cfg, out, &code, ended),`
Implement core minifier 2021-08-06 02:17:45 -04:00			`NodeData::ScriptOrStyleContent { code, lang } => match lang {`
Implement whitespace minification 2021-08-06 03:33:56 -04:00			`ScriptOrStyleLang::CSS => minify_css(cfg, out, &code),`
			`ScriptOrStyleLang::Data => out.extend_from_slice(&code),`
			`ScriptOrStyleLang::JS => minify_js(cfg, out, &code),`
Implement core minifier 2021-08-06 02:17:45 -04:00			`},`
Fix tag omission minification; implement entity reencoding minification 2021-08-06 08:53:33 -04:00			`NodeData::Text { value } => out`
			`.extend_from_slice(&CHEVRON_REPLACER.replace_all(&encode_entities(&value, false))),`
Implement core minifier 2021-08-06 02:17:45 -04:00			`};`
rustfmt 2021-08-06 02:19:36 -04:00			`}`
Implement core minifier 2021-08-06 02:17:45 -04:00			`}`