use html5ever::data::{C1_REPLACEMENTS, NAMED_ENTITIES};
use lol_html::html_content::TextType;
use std::char;
use std::iter::Peekable;
use std::str::Chars;
pub fn to_null_decoded(s: &str) -> String {
Decoder::new(s).unsafe_null().run()
}
pub fn decode_attr_value(s: &str) -> String {
Decoder::new(s).unsafe_null().attr_entities().run()
}
pub fn decode_text(text: &str, text_type: TextType) -> String {
let mut decoder = Decoder::new(text);
if text_type.should_replace_unsafe_null_in_text() {
decoder = decoder.unsafe_null();
}
if text_type.allows_html_entities() {
decoder = decoder.text_entities();
}
decoder.run()
}
#[derive(PartialEq, Eq)]
enum Entities {
None,
Text,
Attribute,
}
struct Decoder<'a> {
chars: Peekable>,
result: String,
null: bool,
entities: Entities,
}
impl<'a> Decoder<'a> {
fn next_if_char(&mut self, expected: char) -> bool {
self.next_if(|c| c == expected).is_some()
}
fn next_if(&mut self, f: impl Fn(char) -> bool) -> Option {
self.next_opt(|c| if f(c) { Some(c) } else { None })
}
fn next_opt(&mut self, f: impl Fn(char) -> Option) -> Option {
let opt = self.chars.peek().cloned().and_then(f);
if opt.is_some() {
self.chars.next();
}
opt
}
fn decode_numeric_entity(&mut self, radix: u32) -> bool {
if let Some(mut code) = self.next_opt(|c| c.to_digit(radix)) {
while let Some(digit) = self.next_opt(|c| c.to_digit(radix)) {
if code < 0x0010_FFFF {
code = code * radix + digit;
}
}
self.result.push(
match code {
0x00 => None,
0x80..=0x9F => {
C1_REPLACEMENTS[(code - 0x80) as usize].or_else(|| char::from_u32(code))
}
_ => char::from_u32(code),
}
.unwrap_or('\u{FFFD}'),
);
self.next_if_char(';');
true
} else {
self.result += "";
false
}
}
fn decode_named_entity(&mut self) {
let mut name_buf = String::new();
let mut name_match = ('&' as u32, 0, 0);
while let Some(&c) = self.chars.peek() {
name_buf.push(c);
if let Some(&m) = NAMED_ENTITIES.get(&name_buf[..]) {
self.chars.next();
if m.0 != 0 {
if c != ';' && self.entities == Entities::Attribute {
if let Some('A'..='Z' | 'a'..='z' | '0'..='9' | '=') = self.chars.peek() {
continue;
}
}
name_match = (m.0, m.1, name_buf.len());
}
} else {
name_buf.pop();
break;
}
}
self.result.push(char::from_u32(name_match.0).unwrap());
if name_match.1 != 0 {
self.result.push(char::from_u32(name_match.1).unwrap());
}
self.result += &name_buf[name_match.2..];
}
fn decode_entity(&mut self) {
if self.next_if_char('#') {
if let Some(x) = self.next_if(|c| c == 'x' || c == 'X') {
if !self.decode_numeric_entity(16) {
self.result.push(x);
}
} else {
self.decode_numeric_entity(10);
}
} else {
self.decode_named_entity();
}
}
fn decode_cr(&mut self) {
self.result.push('\n');
self.next_if_char('\n');
}
pub fn new(src: &'a str) -> Self {
Decoder {
chars: src.chars().peekable(),
result: String::with_capacity(src.len()),
null: false,
entities: Entities::None,
}
}
pub fn unsafe_null(mut self) -> Self {
self.null = true;
self
}
pub fn text_entities(mut self) -> Self {
self.entities = Entities::Text;
self
}
pub fn attr_entities(mut self) -> Self {
self.entities = Entities::Attribute;
self
}
pub fn run(mut self) -> String {
while let Some(c) = self.chars.next() {
match c {
'\r' => {
self.decode_cr();
}
'\0' if self.null => {
self.result.push('\u{FFFD}');
}
'&' if self.entities != Entities::None => {
self.decode_entity();
}
_ => {
self.result.push(c);
}
}
}
self.result
}
}