// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use html5ever::driver::ParseOpts; use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope}; use html5ever::tendril::{SliceExt, StrTendril, TendrilSink}; use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer}; use html5ever::{parse_document, parse_fragment, serialize, QualName}; use markup5ever::{local_name, namespace_url, ns}; use markup5ever_rcdom::{RcDom, SerializableHandle}; use std::cell::RefCell; use std::io; struct Tokens(RefCell>); impl TokenSink for Tokens { type Handle = (); fn process_token(&self, token: Token, _: u64) -> TokenSinkResult<()> { self.0.borrow_mut().push(token); TokenSinkResult::Continue } } impl Serialize for Tokens { fn serialize(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()> where S: Serializer, { for t in self.0.borrow().iter() { match &t { // TODO: check whether this is an IE conditional comment or a spec comment Token::TagToken(tag) => { let name = QualName::new( None, "http://www.w3.org/1999/xhtml".into(), tag.name.as_ref().into(), ); match tag.kind { TagKind::StartTag => serializer.start_elem( name, tag.attrs.iter().map(|at| (&at.name, &at.value[..])), )?, TagKind::EndTag => serializer.end_elem(name)?, } }, Token::DoctypeToken(dt) => { if let Some(name) = &dt.name { serializer.write_doctype(name)? } }, Token::CommentToken(chars) => serializer.write_comment(chars)?, Token::CharacterTokens(chars) => serializer.write_text(chars)?, Token::NullCharacterToken | &Token::EOFToken => {}, Token::ParseError(e) => println!("parse error: {e:#?}"), } } Ok(()) } } fn tokenize_and_serialize(input: StrTendril) -> StrTendril { let input = { let q = ::html5ever::tokenizer::BufferQueue::default(); q.push_front(input); q }; let tokenizer = Tokenizer::new(Tokens(RefCell::new(vec![])), Default::default()); let _ = tokenizer.feed(&input); tokenizer.end(); let mut output = ::std::io::Cursor::new(vec![]); serialize( &mut output, &tokenizer.sink, SerializeOpts { create_missing_parent: true, ..Default::default() }, ) .unwrap(); StrTendril::try_from_byte_slice(&output.into_inner()).unwrap() } fn parse_and_serialize(input: StrTendril) -> StrTendril { let dom = parse_fragment( RcDom::default(), ParseOpts::default(), QualName::new(None, ns!(html), local_name!("body")), vec![], ) .one(input); let inner: SerializableHandle = dom.document.children.borrow()[0].clone().into(); let mut result = vec![]; serialize(&mut result, &inner, Default::default()).unwrap(); StrTendril::try_from_byte_slice(&result).unwrap() } macro_rules! test_fn { ($f:ident, $name:ident, $input:expr, $output:expr) => { #[test] fn $name() { assert_eq!($output, &*$f($input.to_tendril())); } }; // Shorthand for $output = $input ($f:ident, $name:ident, $input:expr) => { test_fn!($f, $name, $input, $input); }; } macro_rules! test { ($($t:tt)*) => { test_fn!(parse_and_serialize, $($t)*); }; } macro_rules! test_no_parse { ($($t:tt)*) => { test_fn!(tokenize_and_serialize, $($t)*); }; } test!(empty, r#""#); test!(fuzz, "Hello, World!

"#); test!( misnest, r#"

Hello!

, World!"#, r#"

Hello!

, World!"# ); test!(attr_literal, r#""#); test!(attr_escape_amp, r#""#); test!( attr_escape_amp_2, r#""#, r#""# ); test!( attr_escape_nbsp, "", r#""# ); test!( attr_escape_quot, r#""#, r#""# ); test!( attr_escape_several, r#""#, r#""# ); test!(text_literal, r#"

"'"

"#); test!(text_escape_amp, r#"

&

"#); test!(text_escape_amp_2, r#"

&

"#, r#"

&

"#); test!(text_escape_nbsp, "

x\u{a0}y

", r#"

x y

"#); test!(text_escape_lt, r#"

<

"#); test!(text_escape_gt, r#"

>

"#); test!(text_escape_gt2, r#"

>

"#, r#"

>

"#); test!( script_literal, r#""# ); test!( style_literal, r#""# ); test!(xmp_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); test!( iframe_literal, r#""# ); test!( noembed_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"# ); test!( noframes_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"# ); test!(pre_lf_0, "
foo bar
"); test!(pre_lf_1, "
\nfoo bar
", "
foo bar
"); test!(pre_lf_2, "
\n\nfoo bar
", "
\nfoo bar
"); test!(textarea_lf_0, ""); test!( textarea_lf_1, "", "" ); test!( textarea_lf_2, "", "" ); test!(listing_lf_0, "foo bar"); test!( listing_lf_1, "\nfoo bar", "foo bar" ); test!( listing_lf_2, "\n\nfoo bar", "\nfoo bar" ); test!(comment_1, r#"

hi

"#); test!(comment_2, r#"

hi

"#); test!(comment_3, r#"

hi

"#); test!(comment_4, r#"

hi

"#); // FIXME: test serialization of qualified tag/attribute names that can't be // parsed from HTML test!(attr_ns_1, r#""#); test!(attr_ns_2, r#""#); test!(attr_ns_3, r#""#); test!(attr_ns_4, r#""#); test_no_parse!(malformed_tokens, r#"foo
"#); #[test] fn doctype() { let dom = parse_document(RcDom::default(), ParseOpts::default()).one(""); dom.document.children.borrow_mut().truncate(1); // Remove let mut result = vec![]; let document: SerializableHandle = dom.document.clone().into(); serialize(&mut result, &document, Default::default()).unwrap(); assert_eq!(String::from_utf8(result).unwrap(), ""); } #[test] fn deep_tree() { let parser = parse_fragment( RcDom::default(), ParseOpts::default(), QualName::new(None, ns!(html), local_name!("div")), vec![], ); let src = "".repeat(60_000); let dom = parser.one(src); let opts = SerializeOpts::default(); let mut ret_val = Vec::new(); let document: SerializableHandle = dom.document.clone().into(); serialize(&mut ret_val, &document, opts) .expect("Writing to a string shouldn't fail (expect on OOM)"); }