// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use html5ever::driver::ParseOpts; use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope}; use html5ever::tendril::{SliceExt, StrTendril, TendrilSink}; use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer}; use html5ever::{parse_document, parse_fragment, serialize, QualName}; use markup5ever::{local_name, namespace_url, ns}; use markup5ever_arcdom::{ArcDom, SerializableHandle}; use std::io; struct Tokens(Vec); impl TokenSink for Tokens { type Handle = (); fn process_token(&mut self, token: Token, _: u64) -> TokenSinkResult<()> { self.0.push(token); TokenSinkResult::Continue } } impl Serialize for Tokens { fn serialize(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()> where S: Serializer, { for t in self.0.iter() { match t { // TODO: check whether this is an IE conditional comment or a spec comment &Token::TagToken(ref tag) => { let name = QualName::new( None, "http://www.w3.org/1999/xhtml".into(), tag.name.as_ref().into(), ); match tag.kind { TagKind::StartTag => serializer.start_elem( name, tag.attrs.iter().map(|at| (&at.name, &at.value[..])), )?, TagKind::EndTag => serializer.end_elem(name)?, } } &Token::DoctypeToken(ref dt) => match dt.name { Some(ref name) => serializer.write_doctype(&name)?, None => {} }, &Token::CommentToken(ref chars) => serializer.write_comment(&chars)?, &Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?, &Token::NullCharacterToken | &Token::EOFToken => {} &Token::ParseError(ref e) => println!("parse error: {:#?}", e), } } Ok(()) } } fn tokenize_and_serialize(input: StrTendril) -> StrTendril { let mut input = { let mut q = ::html5ever::tokenizer::BufferQueue::new(); q.push_front(input.into()); q }; let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default()); let _ = tokenizer.feed(&mut input); tokenizer.end(); let mut output = ::std::io::Cursor::new(vec![]); serialize( &mut output, &tokenizer.sink, SerializeOpts { create_missing_parent: true, ..Default::default() }, ) .unwrap(); StrTendril::try_from_byte_slice(&output.into_inner()).unwrap() } fn parse_and_serialize(input: StrTendril) -> StrTendril { let dom = parse_fragment( ArcDom::default(), ParseOpts::default(), QualName::new(None, ns!(html), local_name!("body")), vec![], ) .one(input); let inner: SerializableHandle = dom.document.children.borrow()[0].clone().into(); let mut result = vec![]; serialize(&mut result, &inner, Default::default()).unwrap(); StrTendril::try_from_byte_slice(&result).unwrap() } macro_rules! test_fn { ($f:ident, $name:ident, $input:expr, $output:expr) => { #[test] fn $name() { assert_eq!($output, &*$f($input.to_tendril())); } }; // Shorthand for $output = $input ($f:ident, $name:ident, $input:expr) => { test_fn!($f, $name, $input, $input); }; } macro_rules! test { ($($t:tt)*) => { test_fn!(parse_and_serialize, $($t)*); }; } macro_rules! test_no_parse { ($($t:tt)*) => { test_fn!(tokenize_and_serialize, $($t)*); }; } test!(empty, r#""#); test!(fuzz, "Hello, World!

"#); test!( misnest, r#"

Hello!

, World!"#, r#"

Hello!

, World!"# ); test!(attr_literal, r#""#); test!(attr_escape_amp, r#""#); test!( attr_escape_amp_2, r#""#, r#""# ); test!( attr_escape_nbsp, "", r#""# ); test!( attr_escape_quot, r#""#, r#""# ); test!( attr_escape_several, r#""#, r#""# ); test!(text_literal, r#"

"'"

"#); test!(text_escape_amp, r#"

&

"#); test!(text_escape_amp_2, r#"

&

"#, r#"

&

"#); test!(text_escape_nbsp, "

x\u{a0}y

", r#"

x y

"#); test!(text_escape_lt, r#"

<

"#); test!(text_escape_gt, r#"

>

"#); test!(text_escape_gt2, r#"

>

"#, r#"

>

"#); test!( script_literal, r#""# ); test!( style_literal, r#""# ); test!(xmp_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); test!( iframe_literal, r#""# ); test!( noembed_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"# ); test!( noframes_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"# ); test!(pre_lf_0, "
foo bar
"); test!(pre_lf_1, "
\nfoo bar
", "
foo bar
"); test!(pre_lf_2, "
\n\nfoo bar
", "
\nfoo bar
"); test!(textarea_lf_0, ""); test!( textarea_lf_1, "", "" ); test!( textarea_lf_2, "", "" ); test!(listing_lf_0, "foo bar"); test!( listing_lf_1, "\nfoo bar", "foo bar" ); test!( listing_lf_2, "\n\nfoo bar", "\nfoo bar" ); test!(comment_1, r#"

hi

"#); test!(comment_2, r#"

hi

"#); test!(comment_3, r#"

hi

"#); test!(comment_4, r#"

hi

"#); // FIXME: test serialization of qualified tag/attribute names that can't be // parsed from HTML test!(attr_ns_1, r#""#); test!(attr_ns_2, r#""#); test!(attr_ns_3, r#""#); test!(attr_ns_4, r#""#); test_no_parse!(malformed_tokens, r#"foo
"#); #[test] fn doctype() { let dom = parse_document(ArcDom::default(), ParseOpts::default()).one(""); dom.document.children.borrow_mut().truncate(1); // Remove let mut result = vec![]; let document: SerializableHandle = dom.document.clone().into(); serialize(&mut result, &document, Default::default()).unwrap(); assert_eq!(String::from_utf8(result).unwrap(), ""); } #[test] fn deep_tree() { let parser = parse_fragment( ArcDom::default(), ParseOpts::default(), QualName::new(None, ns!(html), local_name!("div")), vec![], ); let src = String::from("".repeat(60_000)); let dom = parser.one(src); let opts = SerializeOpts::default(); let mut ret_val = Vec::new(); let document: SerializableHandle = dom.document.clone().into(); serialize(&mut ret_val, &document, opts) .expect("Writing to a string shouldn't fail (expect on OOM)"); }