use crate::harness::suites::html5lib_tests::{get_test_cases, TestCase, TestToken, TestTokenList};
use crate::harness::{Input, TestFixture};
use lol_html::errors::RewritingError;
use lol_html::html_content::{DocumentEnd, TextType};
use lol_html::test_utils::Output;
use lol_html::{
LocalName, LocalNameHash, SharedMemoryLimiter, Namespace, SharedEncoding, StartTagHandlingResult,
Token, TokenCaptureFlags, TransformController, TransformStream, TransformStreamSettings,
};
macro_rules! expect_eql {
($actual:expr, $expected:expr, $state:expr, $input:expr, $msg:expr) => {
assert!(
$actual == $expected,
"{}\n\
actual: {:#?}\n\
expected: {:#?}\n\
state: {:?}\n\
input: {:?}\n\
",
$msg,
$actual,
$expected,
$state,
$input,
);
};
}
macro_rules! expect {
($actual:expr, $state:expr, $input:expr, $msg:expr) => {
assert!(
$actual,
"{}\n\
state: {:?}\n\
input: {:?}\n\
",
$msg, $state, $input,
);
};
}
type TokenHandler<'h> = Box;
pub struct TestTransformController<'h> {
token_handler: TokenHandler<'h>,
capture_flags: TokenCaptureFlags,
}
impl<'h> TestTransformController<'h> {
pub fn new(token_handler: TokenHandler<'h>, capture_flags: TokenCaptureFlags) -> Self {
TestTransformController {
token_handler,
capture_flags,
}
}
}
impl TransformController for TestTransformController<'_> {
fn initial_capture_flags(&self) -> TokenCaptureFlags {
self.capture_flags
}
fn handle_start_tag(&mut self, _: LocalName, _: Namespace) -> StartTagHandlingResult {
Ok(self.capture_flags)
}
fn handle_end_tag(&mut self, _: LocalName) -> TokenCaptureFlags {
self.capture_flags
}
fn handle_token(&mut self, token: &mut Token) -> Result<(), RewritingError> {
(self.token_handler)(token);
Ok(())
}
fn handle_end(&mut self, _: &mut DocumentEnd) -> Result<(), RewritingError> {
Ok(())
}
fn should_emit_content(&self) -> bool {
true
}
}
pub fn parse(
input: &Input,
capture_flags: TokenCaptureFlags,
initial_text_type: TextType,
last_start_tag_name_hash: LocalNameHash,
token_handler: TokenHandler,
) -> Result {
let encoding = input
.encoding()
.expect("Input should be initialized before parsing");
let mut output = Output::new(encoding.into());
let transform_controller = TestTransformController::new(token_handler, capture_flags);
let memory_limiter = SharedMemoryLimiter::new(2048);
let mut transform_stream = TransformStream::new(TransformStreamSettings {
transform_controller,
output_sink: |chunk: &[u8]| output.push(chunk),
preallocated_parsing_buffer_size: 0,
memory_limiter,
encoding: SharedEncoding::new(encoding),
strict: true,
});
let parser = transform_stream.parser();
parser.set_last_start_tag_name_hash(last_start_tag_name_hash);
parser.switch_text_type(initial_text_type);
for chunk in input.chunks() {
transform_stream.write(chunk)?;
}
transform_stream.end()?;
Ok(output.into())
}
fn filter_tokens(tokens: &[TestToken], capture_flags: TokenCaptureFlags) -> Vec {
tokens
.iter()
.filter(|t| match t {
TestToken::Doctype { .. } if capture_flags.contains(TokenCaptureFlags::DOCTYPES) => {
true
}
TestToken::StartTag { .. }
if capture_flags.contains(TokenCaptureFlags::NEXT_START_TAG) =>
{
true
}
TestToken::EndTag { .. } if capture_flags.contains(TokenCaptureFlags::NEXT_END_TAG) => {
true
}
TestToken::Comment(_) if capture_flags.contains(TokenCaptureFlags::COMMENTS) => true,
TestToken::Text(_) if capture_flags.contains(TokenCaptureFlags::TEXT) => true,
_ => false,
})
.cloned()
.collect()
}
fn fold_text_tokens(tokens: Vec) -> Vec {
tokens.into_iter().fold(Vec::default(), |mut res, t| {
if let TestToken::Text(ref text) = t {
if let Some(TestToken::Text(last)) = res.last_mut() {
*last += text;
return res;
}
}
res.push(t);
res
})
}
pub struct TokenCapturingTests;
impl TokenCapturingTests {
fn run_test_case(
test: &TestCase,
initial_text_type: TextType,
last_start_tag_name_hash: LocalNameHash,
) {
[
TokenCaptureFlags::all(),
TokenCaptureFlags::NEXT_START_TAG,
TokenCaptureFlags::NEXT_END_TAG,
TokenCaptureFlags::TEXT,
TokenCaptureFlags::COMMENTS,
TokenCaptureFlags::DOCTYPES,
TokenCaptureFlags::empty(),
]
.iter()
.cloned()
.for_each(|capture_flags| {
let mut expected_tokens = filter_tokens(&test.expected_tokens, capture_flags);
let mut token_list = TestTokenList::default();
let parsing_result = parse(
&test.input,
capture_flags,
initial_text_type,
last_start_tag_name_hash,
Box::new(|t| token_list.push(t)),
);
let mut actual_tokens = token_list.into();
// NOTE: text is a special case: it's impossible to achieve the same
// text chunks layout as in the test data without surrounding tokens
// (in test data all character tokens that are not separated by other
// tokens get concatenated, ignoring any non-token lexems like ` {
expect_eql!(
actual_tokens,
expected_tokens,
initial_text_type,
test.input,
format!("Token mismatch (capture: {:#?})", capture_flags)
);
expect_eql!(
output,
test.input.as_str(),
initial_text_type,
test.input,
format!(
"Serialized output doesn't match original input (capture: {:#?})",
capture_flags
)
);
}
Err(_) => {
expect!(
test.expected_bailout.is_some(),
initial_text_type,
test.input,
format!("Unexpected bailout (capture: {:#?})", capture_flags)
);
}
}
});
}
}
impl TestFixture for TokenCapturingTests {
fn test_cases() -> Vec {
get_test_cases()
}
fn run(test: &TestCase) {
for cs in &test.initial_states {
Self::run_test_case(
test,
TextType::from(cs.as_str()),
test.last_start_tag.as_str().into(),
);
}
}
}
test_fixture!(TokenCapturingTests);