minify-html/archive/quoted.rs

131 lines
5.1 KiB
Rust

fn tmp() -> () {
// TODO
loop {
let is_whitespace = is_whitespace(c);
if should_collapse_and_trim_ws && is_whitespace {
// Character, after any entity decoding, is whitespace.
// Don't write whitespace.
// In order to collapse whitespace, only write one space
// character once the first non-whitespace character
// after a sequence of whitespace characters is reached.
last_char_was_whitespace = true;
proc.skip();
} else {
// Character, after any entity decoding, is not whitespace.
if last_char_was_whitespace {
// This is the first non-whitespace character after one or more whitespace
// character(s), so collapse whitespace by writing only one space.
proc.write(b' ');
has_whitespace_after_processing = true;
last_char_was_whitespace = false;
};
if c == b'"' {
count_double_quotation += 1;
} else if c == b'\'' {
count_single_quotation += 1;
} else if is_whitespace {
// `should_collapse_and_trim_ws` is false, so
// whitespace is written.
has_whitespace_after_processing = true;
};
increment_count(c);
if !processed_entity {
// Don't need to accept if hb_unit_entity has
// already been called.
proc.accept();
};
};
}
// Since it's not possible to optimise the delimiter quotes without
// knowing the complete value, mark the processed value in the output
// for post-processing later.
let proc_value_start = proc.data.get_out_pos();
let mut is_first_char = true;
loop {
let processed_entity = c == b'&';
if processed_entity {
// Characters will be consumed by hb_unit_entity, but they will never be '\'', '"', or
// whitespace, as the function only consumes characters that could form a well formed
// entity. See the function for more details.
// TODO Handle bad char
let decoded = process_entity(proc)?;
match decoded {
Some(e) => if e <= 0x7f { c = e as u8; } else { c = 0xff; },
None => c = 0xff,
};
}
is_first_char = false;
};
let proc_length = proc.data.get_out_pos() + 1 - proc_value_start;
proc.match_char(delimiter).require()?.discard();
// Technically, the specification states that values may only be
// unquoted if they don't contain ["'`=<>]. However, browsers seem to
// interpret characters after `=` and before the nearest whitespace as
// an unquoted value, so long as no quote immediately follows `=`. If a
// value cannot be unquoted, use the one that appears the least and
// therefore requires the least amount of encoding. Prefer double quotes
// to single quotes if it's a tie.
let quote_to_encode;
let quote_encoded;
let amount_of_quotes_to_encode;
if proc_length > 0 && !has_whitespace_after_processing && !starts_with_quote {
// No need to do any further processing; processed value is
// already in unquoted form.
return Ok(AttrType::Unquoted);
} else if count_single_quotation < count_double_quotation {
quote_to_encode = b'\'';
quote_encoded = ENCODED_SINGLE_QUOTE;
amount_of_quotes_to_encode = count_single_quotation;
} else {
quote_to_encode = b'"';
quote_encoded = ENCODED_DOUBLE_QUOTE;
amount_of_quotes_to_encode = count_double_quotation;
}
// TODO Improve; avoid direct memory access; clean API.
let post_length = 2 + proc_length - amount_of_quotes_to_encode + (amount_of_quotes_to_encode * quote_encoded.len());
// Where the post-processed output should start in the output array.
let out_start = proc_value_start;
let proc_end = out_start + proc_length - 1;
let post_end = out_start + post_length - 1;
let mut reader = proc_end;
let mut writer = post_end;
proc.data.set_out_char_at(writer, quote_to_encode);
writer -= 1;
// To prevent overwriting data when encoding quotes, post-process output
// in reverse. Loop condition is checked at end of loop instead of
// before to prevent underflow. WARNING: This code directly uses and
// manipulates struct members of `proc`, which in general should be
// avoided.
loop {
let c = proc.data.get_src_char_at(reader);
if c == quote_to_encode {
writer -= quote_encoded.len();
proc.data.replace_out_slice(writer + 1, quote_encoded);
} else {
proc.data.set_out_char_at(writer, c);
writer -= 1;
}
// Break before decrementing to prevent underflow.
if reader == out_start {
break;
}
reader -= 1;
}
// This must be done after previous loop to prevent overwriting data.
proc.data.set_out_char_at(writer, quote_to_encode);
proc.data.set_out_pos(post_end + 1);
Ok(AttrType::Quoted)
}