#!/usr/bin/env rust-script //! This is a regular crate doc comment, but it also contains a partial //! Cargo manifest. Note the use of a *fenced* code block, and the //! `cargo` "language". //! //! ```cargo //! [dependencies] //! heck = "0.4.1" //! scraper = "0.16.0" //! ureq = "2.6.2" //! itertools = "0.10.5" //! ``` use heck::{AsKebabCase, ToSnakeCase, ToUpperCamelCase}; use itertools::Itertools; use scraper::{Element, ElementRef, Html, Selector}; use std::{collections::BTreeMap, io::Write}; fn main() { let agent = ureq::agent(); let resp = agent .get("https://developer.mozilla.org/en-US/docs/Web/HTML/Element") .call() .unwrap(); let html = resp.into_string().unwrap(); let document = Html::parse_document(&html); let selector = Selector::parse("td:first-child > a[href^='/en-US/docs/Web/HTML/Element/']:only-child") .unwrap(); let mut elems = Vec::new(); let global_attrs = BTreeMap::from_iter(get_global_attrs(false)); let owned_global_attrs = BTreeMap::from_iter(get_global_attrs(true)); let mut buf = String::from( "// generated by gen.rs + rustfmt - not in a build.rs because HTML tags don't change too often //! An auto-generated crate containing all HTML tags and their attributes. //! This crate is generated from the [MDN HTML element reference](https://developer.mozilla.org/en-US/docs/Web/HTML/Element). //! //! The `Owned` variants are the same as the `` variants, but without lifetimes. #![no_std] #[cfg(feature = \"alloc\")] extern crate alloc; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum AttributeValue<'life> { Str(&'life str), Bool(bool), } impl<'life> core::convert::From<&'life str> for AttributeValue<'life> { fn from(s: &'life str) -> Self { AttributeValue::Str(s) } } impl<'life> core::convert::From for AttributeValue<'life> { fn from(b: bool) -> Self { AttributeValue::Bool(b) } } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg(feature = \"alloc\")] pub enum AttributeValueOwned { #[cfg(not(feature = \"tendril\"))] Str(alloc::string::String), #[cfg(feature = \"tendril\")] Str(tendril::Tendril), Bool(bool), } #[cfg(all(feature = \"alloc\", not(feature = \"tendril\")))] impl core::convert::From for AttributeValueOwned { fn from(s: alloc::string::String) -> Self { AttributeValueOwned::Str(s) } } #[cfg(all(feature = \"alloc\", feature = \"tendril\"))] impl core::convert::From> for AttributeValueOwned { fn from(s: tendril::Tendril) -> Self { AttributeValueOwned::Str(s) } } #[cfg(feature = \"alloc\")] impl core::convert::From for AttributeValueOwned { fn from(b: bool) -> Self { AttributeValueOwned::Bool(b) } } ", ) .into_bytes(); for e in document.select(&selector) { let url = format!( "https://developer.mozilla.org{}", e.value().attr("href").unwrap() ); // the name without the brackets let name = e.text().next().unwrap(); let name = &name[1..name.len() - 1]; let name = name.to_upper_camel_case(); get_and_write_elem( &agent, &url, name, global_attrs.clone(), owned_global_attrs.clone(), &mut elems, &mut buf, ); } for name in ["H1", "H2", "H3", "H4", "H5", "H6"] { get_and_write_elem( &agent, "https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Heading_Elements", name.to_string(), global_attrs.clone(), owned_global_attrs.clone(), &mut elems, &mut buf, ); } { let doc = "/// An unknown element.".to_string(); let tag_name_doc = "The tag name of the element.".to_string(); write_elem( doc.clone(), "Unknown".to_string(), &{ let mut attrs = global_attrs.clone(); attrs.insert( "tag_name".to_string(), ( tag_name_doc.clone(), "AttributeValue<'life>".to_string(), false, ), ); attrs }, false, false, &mut buf, ); write_elem( doc, "Unknown".to_string(), &{ let mut attrs = owned_global_attrs.clone(); attrs.insert( "tag_name".to_string(), ( tag_name_doc.clone(), "AttributeValueOwned".to_string(), false, ), ); attrs }, false, true, &mut buf, ); elems.push(("Unknown".to_string(), false)); } write_elem_enum(&elems, &global_attrs, false, &mut buf); write_elem_enum(&elems, &owned_global_attrs, true, &mut buf); std::fs::write("src/lib.rs", buf).unwrap(); std::process::Command::new("rustfmt") .arg("src/lib.rs") .status() .unwrap(); } fn get_global_attrs(owned: bool) -> Vec<(String, (String, String, bool))> { let agent = ureq::agent(); let resp = agent .get("https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes") .call() .unwrap(); let html = resp.into_string().unwrap(); let document = Html::parse_document(&html); let selector = Selector::parse("dl").unwrap(); let mut dls = document.select(&selector); let mut attrs = dl_to_attrs(dls.next().unwrap(), owned); attrs.extend(dl_to_attrs(dls.next().unwrap(), owned)); // there's a weird note seperating the two dl's attrs.push(( "extra".to_string(), ( "/// Extra attributes of the element. /// This is a map of attribute names to their values, and the attribute names are in lowercase." .to_string(), if owned { "alloc::collections::BTreeMap" } else { "alloc::collections::BTreeMap<&'life str, AttributeValue<'life>>" } .to_string(), true, ), )); attrs } fn get_attrs(document: &Html, owned: bool) -> Vec<(String, (String, String, bool))> { let selector = Selector::parse(".section-content > dl").unwrap(); if let Some(dl) = document.select(&selector).next() { dl_to_attrs(dl, owned) } else { Vec::new() } } // fn get_aria_attrs() -> Vec<(String, String)> { // let agent = ureq::agent(); // // scrape MDN for all the elements // let resp = agent // .get("https://developer.mozilla.org/en-US/docs/Web/Accessibility/ARIA/Attributes") // .call() // .unwrap(); // let html = resp.into_string().unwrap(); // let document = Html::parse_document(&html); // let selector = Selector::parse( // "td:first-child > a[href^='/en-US/docs/Web/Accessibility/ARIA/Attributes/']:only-child", // ) // .unwrap(); // } fn get_mdn_doc(document: &Html, url: &str) -> String { let mut summary = document .select(&Selector::parse(".main-page-content > .section-content > p").unwrap()) .map(|e| e.inner_html()) .collect::>(); if summary.len() == 0 { summary = document .select( &Selector::parse( ".main-page-content > section[aria-labelledby='summary'] > .section-content", ) .unwrap(), ) .map(|e| e.inner_html()) .collect::>(); } let summary = summary .join("\n\n") .replace("
", "\n\n") .replace('\n', "\n/// "); format!("/// {}\n///\n/// More information: <{url}>", summary) } fn dl_to_attrs(dl: ElementRef, owned: bool) -> Vec<(String, (String, String, bool))> { let mut attrs = Vec::new(); for e in dl .children() .filter_map(ElementRef::wrap) .filter(|e| e.value().name() == "dt") { let name = e.text().next().unwrap(); let desc = e .next_sibling_element() .unwrap() .inner_html() .replace("
", "\n\n") .replace('\n', "\n/// "); let name = name.to_snake_case(); let (ty, special) = match name.as_str() { "data" => ( if owned { "alloc::collections::BTreeMap" } else { "alloc::collections::BTreeMap<&'life str, AttributeValue<'life>>" }, true, ), _ => ( if owned { "AttributeValueOwned" } else { "AttributeValue<'life>" }, false, ), }; attrs.push(( if ["type", "loop", "async", "for", "as"].contains(&&*name) { format!("{name}_") } else { name }, (desc, ty.to_string(), special), )); } attrs } fn write_elem( doc: String, name: String, attrs: &BTreeMap, deprecated: bool, owned: bool, buf: &mut Vec, ) { writeln!( buf, "{0} {1} {2} #[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct {name}{4}{3} {{ {5} }} #[allow(deprecated)] {2} impl{3} {name}{4}{3} {{ /// Get the tag name of the element. /// This is the same as the name of the struct, in kebab-case. pub fn tag() -> &'static str {{ \"{kebab}\" }} /// Sets an attribute of the element. /// This sets the attribute of the struct. If the attribute is not a known attribute, it is added to the `extra` map. /// If the `alloc` feature is disabled, this function will silently fail. /// /// # Note /// This only works when the attribute is lowercase. pub fn set_attr(&mut self, name: &{6} str, value: {7}) {{ match name {{ {8}, #[cfg(feature = \"alloc\")] _ => {{ #[allow(clippy::useless_conversion)] self.extra.insert(name.into(), value.into()); }} #[cfg(not(feature = \"alloc\"))] _ => {{}} }} }} }}", doc, if deprecated { "#[deprecated]" } else { "" }, if owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if owned { "" } else { "<'life>" }, if owned { "Owned" } else { "" }, attrs .iter() .format_with(",\n/// ", |(name, (desc, ty, special)), f| f(&format_args!( "{desc} {} pub {name}: {}", if *special { "#[cfg(feature = \"alloc\")]" } else { "" }, if *special { ty.to_string() } else { format!("core::option::Option<{}>", ty) }, ))), if owned { "" } else { " 'life" }, if owned { "impl core::convert::Into" } else { "impl core::convert::Into>" }, attrs .iter() .filter(|(_, (_, _, special))| !special) .format_with(",\n", |(name, _), f| f(&format_args!( "\"{name}\" => self.{name} = Some(value.into())", ))), kebab = AsKebabCase(name.clone()), ) .unwrap(); } fn write_elem_enum( elems: &Vec<(String, bool)>, global_attrs: &BTreeMap, owned: bool, buf: &mut Vec, ) { writeln!( buf, "#[allow(deprecated)] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] {} pub enum Element{} {{ {} }}", if owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if owned { "Owned" } else { "<'life>" }, elems .iter() .format_with(",\n", |(e, dep), f| f(&format_args!( "{} {e}({e}{})", if *dep { "#[deprecated]" } else { "" }, if owned { "Owned" } else { "<'life>" }, ))) ) .unwrap(); writeln!( buf, "#[allow(deprecated)] {0} impl{1} Element{2}{1} {{ /// Gets an element from a lowercase tag name. pub fn from_tag(tag: &str) -> Self {{ match tag {{ {3}, _ => Self::default(), }} }} /// Gets the tag name of the element. pub fn tag(&self) -> &'static str {{ match self {{ {4}, }} }} /// Sets an attribute of the element. /// This sets the attribute of the struct. If the attribute is not a known attribute, it is added to the `extra` map. /// If the `alloc` feature is disabled, this function will silently fail. /// /// # Note /// This only works when the attribute is lowercase. pub fn set_attr(&mut self, name: &{5} str, value: {6}) {{ match self {{ {7}, }} }} }}", if owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if owned { "" } else { "<'life>" }, if owned { "Owned" } else { "" }, elems.iter().format_with(",\n", |(e, _), f| f(&format_args!( "\"{}\" => Self::{e}({e}{}::default())", AsKebabCase(e), if owned { "Owned" } else { "" }, ))), elems.iter().format_with(",\n", |(e, _), f| f(&format_args!( "Self::{e}(_) => {e}::tag()", ))), if owned { "" } else { " 'life" }, if owned { "impl core::convert::Into" } else { "impl core::convert::Into>" }, elems.iter().format_with(",\n", |(e, _), f| f(&format_args!( "Self::{e}(e) => e.set_attr(name, value)", ))), ) .unwrap(); writeln!( buf, "#[allow(deprecated)] {0} impl{1} Element{2}{1} {{ {3} {4} }}", if owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if owned { "" } else { "<'life>" }, if owned { "Owned" } else { "" }, global_attrs .iter() .format_with("\n", |(name, (desc, ty, special)), f| f(&format_args!( "{desc} {} pub fn {name}(&self) -> {}{}{}{} {{ match self {{ {} }} }}", if *special || owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if *special { "" } else { "core::option::Option<" }, if *special || owned { "&" } else { "" }, ty, if *special { "" } else { ">" }, elems.iter().format_with(",", |(e, _), f| f(&format_args!( "Self::{e}(e) => {}e.{name}{}", if *special { "&" } else { "" }, if !*special && owned { ".as_ref()" } else { "" } ))) ))), global_attrs .iter() .filter(|(_, (_, _, special))| !special) .format_with("\n", |(name, (desc, ty, special)), f| f(&format_args!( "{desc} {} pub fn set_{name}(&mut self, val: {ty}) {{ match self {{ {} }}; }}", if *special || owned { "#[cfg(feature = \"alloc\")]" } else { "" }, elems.iter().format_with(",", |(e, _), f| f(&format_args!( "Self::{e}(e) => e.{name}.replace(val)", ))) ))) ) .unwrap(); writeln!( buf, "#[allow(deprecated)] {0} impl{1} Default for Element{2}{1} {{ fn default() -> Self {{ Self::Unknown(Unknown{2}::default()) }} }}", if owned { "#[cfg(feature = \"alloc\")]" } else { "" }, if owned { "" } else { "<'life>" }, if owned { "Owned" } else { "" }, ) .unwrap(); } fn get_and_write_elem( agent: &ureq::Agent, url: &str, name: String, global_attrs: BTreeMap, owned_global_attrs: BTreeMap, elems: &mut Vec<(String, bool)>, mut buf: &mut Vec, ) { let resp = agent.get(&url).call().unwrap(); let html = resp.into_string().unwrap(); let document = Html::parse_document(&html); let deprecated = document .select( &Selector::parse(".main-page-content > .section-content > .notecard.deprecated") .unwrap(), ) .count() != 0; elems.push((name.clone(), deprecated)); let mut attrs = global_attrs; attrs.extend(get_attrs(&document, false)); write_elem( get_mdn_doc(&document, &url), name.clone(), &attrs, deprecated, false, &mut buf, ); let mut attrs = owned_global_attrs; attrs.extend(get_attrs(&document, true)); write_elem( get_mdn_doc(&document, &url), name, &attrs, deprecated, true, buf, ); }