// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. #[macro_use] extern crate html5ever; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::io; use html5ever::parse_document; use html5ever::tendril::*; use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::{Attribute, ExpandedName, QualName}; struct Sink { next_id: Cell, names: RefCell>, } impl Sink { fn get_id(&self) -> usize { let id = self.next_id.get(); self.next_id.set(id + 2); id } } /// By implementing the TreeSink trait we determine how the data from the tree building step /// is processed. In this case the DOM elements are written into the "names" hashmap. /// /// For deeper understating of each function go to the TreeSink declaration. impl TreeSink for Sink { type Handle = usize; type Output = Self; type ElemName<'a> = ExpandedName<'a>; fn finish(self) -> Self { self } fn get_document(&self) -> usize { 0 } fn get_template_contents(&self, target: &usize) -> usize { if let Some(expanded_name!(html "template")) = self.names.borrow().get(target).map(|n| n.expanded()) { target + 1 } else { panic!("not a template element") } } fn same_node(&self, x: &usize, y: &usize) -> bool { x == y } fn elem_name(&self, target: &usize) -> ExpandedName { self.names .borrow() .get(target) .expect("not an element") .expanded() } fn create_element(&self, name: QualName, _: Vec, _: ElementFlags) -> usize { let id = self.get_id(); // N.B. We intentionally leak memory here to minimize the implementation complexity // of this example code. A real implementation would either want to use a real // real DOM tree implentation, or else use an arena as the backing store for // memory used by the parser. self.names .borrow_mut() .insert(id, Box::leak(Box::new(name))); id } fn create_comment(&self, _text: StrTendril) -> usize { self.get_id() } #[allow(unused_variables)] fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize { unimplemented!() } fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText) {} fn append_based_on_parent_node( &self, _element: &usize, _prev_element: &usize, _new_node: NodeOrText, ) { } fn parse_error(&self, _msg: Cow<'static, str>) {} fn set_quirks_mode(&self, _mode: QuirksMode) {} fn append(&self, _parent: &usize, _child: NodeOrText) {} fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {} fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec) { assert!(self.names.borrow().contains_key(target), "not an element"); } fn remove_from_parent(&self, _target: &usize) {} fn reparent_children(&self, _node: &usize, _new_parent: &usize) {} fn mark_script_already_started(&self, _node: &usize) {} } /// In this example we implement the TreeSink trait which takes each parsed elements and insert /// it to a hashmap, while each element is given a numeric id. fn main() { let sink = Sink { next_id: Cell::new(1), names: RefCell::new(HashMap::new()), }; // Read HTML from the standard input and parse it let stdin = io::stdin(); parse_document(sink, Default::default()) .from_utf8() .read_from(&mut stdin.lock()) .unwrap(); }