// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
extern crate html5ever;
use std::cell::Cell;
use std::io;
use html5ever::tendril::*;
use html5ever::tokenizer::BufferQueue;
use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken};
use html5ever::tokenizer::{
ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
};
#[derive(Clone)]
struct TokenPrinter {
in_char_run: Cell,
}
impl TokenPrinter {
fn is_char(&self, is_char: bool) {
match (self.in_char_run.get(), is_char) {
(false, true) => print!("CHAR : \""),
(true, false) => println!("\""),
_ => (),
}
self.in_char_run.set(is_char);
}
fn do_char(&self, c: char) {
self.is_char(true);
print!("{}", c.escape_default().collect::());
}
}
impl TokenSink for TokenPrinter {
type Handle = ();
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
match token {
CharacterTokens(b) => {
for c in b.chars() {
self.do_char(c);
}
},
NullCharacterToken => self.do_char('\0'),
TagToken(tag) => {
self.is_char(false);
// This is not proper HTML serialization, of course.
match tag.kind {
StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name),
EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
}
for attr in tag.attrs.iter() {
print!(
" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
attr.name.local, attr.value
);
}
if tag.self_closing {
print!(" \x1b[31m/\x1b[0m");
}
println!(">");
},
ParseError(err) => {
self.is_char(false);
println!("ERROR: {}", err);
},
_ => {
self.is_char(false);
println!("OTHER: {:?}", token);
},
}
TokenSinkResult::Continue
}
}
/// In this example we implement the TokenSink trait in such a way that each token is printed.
/// If a there's an error while processing a token it is printed as well.
fn main() {
let sink = TokenPrinter {
in_char_run: Cell::new(false),
};
// Read HTML from standard input
let mut chunk = ByteTendril::new();
io::stdin().read_to_tendril(&mut chunk).unwrap();
let input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());
let tok = Tokenizer::new(
sink,
TokenizerOpts {
profile: true,
..Default::default()
},
);
let _ = tok.feed(&input);
assert!(input.is_empty());
tok.end();
tok.sink.is_char(false);
}