use std::env; use std::fs; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::path::Path; use phf_codegen::Map; use quote::quote; /* This build script contains a "parser" for the USB ID database. * "Parser" is in scare-quotes because it's really a line matcher with a small amount * of context needed for pairing nested entities (e.g. devices) with their parents (e.g. vendors). */ // these are the definitions for the generated maps that will be written to the source file const VENDOR_PROLOGUE: &str = "static USB_IDS: phf::Map = "; const CLASS_PROLOGUE: &str = "static USB_CLASSES: phf::Map = "; const AUDIO_TERMINAL_PROLOGUE: &str = "static USB_AUDIO_TERMINALS: phf::Map = "; const HID_ID_PROLOGUE: &str = "static USB_HID_IDS: phf::Map = "; const HID_R_PROLOGUE: &str = "static USB_HID_R_TYPES: phf::Map = "; const BIAS_PROLOGUE: &str = "static USB_BIASES: phf::Map = "; const PHY_PROLOGUE: &str = "static USB_PHYS: phf::Map = "; const HUT_PROLOGUE: &str = "static USB_HUTS: phf::Map = "; const LANG_PROLOGUE: &str = "static USB_LANGS: phf::Map = "; const HID_CC_PROLOGUE: &str = "static USB_HID_CCS: phf::Map = "; const TERMINAL_PROLOGUE: &str = "static USB_VIDEO_TERMINALS: phf::Map = "; trait CgEntry { fn id(&self) -> T; } struct CgVendor { id: u16, name: String, devices: Vec, } struct CgDevice { id: u16, name: String, interfaces: Vec, } struct CgClass { id: u8, name: String, sub_classes: Vec, } type CgSubClass = CgParentType; struct CgParentType { id: T, name: String, children: Vec, } impl> CgEntry for CgParentType { fn id(&self) -> T { self.id } } struct CgType { id: T, name: String, } impl CgEntry for CgType { fn id(&self) -> T { self.id } } type CgInterface = CgType; type CgProtocol = CgType; type CgAtType = CgType; type CgHidType = CgType; type CgRType = CgType; type CgRBiasType = CgType; type CgPhyType = CgType; type CgHidUsage = CgType; type CgHut = CgParentType; type CgDialect = CgType; type CgLang = CgParentType; type CgCountryCode = CgType; type CgTerminalType = CgType; /// Parser state parses only the type for the current section, this is because some /// parsers are ambiguous without context; device.interface == subclass.protocol for example. enum ParserState { Vendors(Map, Option, u16), Classes(Map, Option, u8), AtType(Map, Option), HidType(Map, Option), RType(Map, Option), BiasType(Map, Option), PhyType(Map, Option), HutType(Map, Option), Lang(Map, Option), CountryCode(Map, Option), TerminalType(Map, Option), } impl ParserState { /// Return the prologue string for the current state; the type definition fn prologue_str(&self) -> &'static str { match self { ParserState::Vendors(_, _, _) => VENDOR_PROLOGUE, ParserState::Classes(_, _, _) => CLASS_PROLOGUE, ParserState::AtType(_, _) => AUDIO_TERMINAL_PROLOGUE, ParserState::HidType(_, _) => HID_ID_PROLOGUE, ParserState::RType(_, _) => HID_R_PROLOGUE, ParserState::BiasType(_, _) => BIAS_PROLOGUE, ParserState::PhyType(_, _) => PHY_PROLOGUE, ParserState::HutType(_, _) => HUT_PROLOGUE, ParserState::Lang(_, _) => LANG_PROLOGUE, ParserState::CountryCode(_, _) => HID_CC_PROLOGUE, ParserState::TerminalType(_, _) => TERMINAL_PROLOGUE, } } /// Emit any pending entries to the map fn emit(&mut self) { match self { ParserState::Vendors(m, Some(vendor), _) => { m.entry(vendor.id, "e!(#vendor).to_string()); } ParserState::Classes(m, Some(class), _) => { m.entry(class.id, "e!(#class).to_string()); } ParserState::AtType(m, Some(t)) | ParserState::TerminalType(m, Some(t)) => { m.entry(t.id(), "e!(#t).to_string()); } ParserState::HidType(m, Some(t)) | ParserState::RType(m, Some(t)) | ParserState::BiasType(m, Some(t)) | ParserState::CountryCode(m, Some(t)) | ParserState::PhyType(m, Some(t)) => { m.entry(t.id(), "e!(#t).to_string()); } ParserState::HutType(m, Some(t)) => { m.entry(t.id, "e!(#t).to_string()); } ParserState::Lang(m, Some(t)) => { m.entry(t.id, "e!(#t).to_string()); } _ => {} } } /// Detects the next state based on the header line /// /// Not very efficient but since it only checks # lines and required length it is not terrible fn next_from_header(&mut self, line: &str, output: &mut impl Write) -> Option { if line.len() < 7 || !line.starts_with('#') { return None; } match &line[..7] { "# C cla" => { self.finalize(output); Some(ParserState::Classes(Map::::new(), None, 0u8)) } "# AT te" => { self.finalize(output); Some(ParserState::AtType(Map::::new(), None)) } "# HID d" => { self.finalize(output); Some(ParserState::HidType(Map::::new(), None)) } "# R ite" => { self.finalize(output); Some(ParserState::RType(Map::::new(), None)) } "# BIAS " => { self.finalize(output); Some(ParserState::BiasType(Map::::new(), None)) } "# PHY i" => { self.finalize(output); Some(ParserState::PhyType(Map::::new(), None)) } "# HUT h" => { self.finalize(output); Some(ParserState::HutType(Map::::new(), None)) } "# L lan" => { self.finalize(output); Some(ParserState::Lang(Map::::new(), None)) } "# HCC c" => { self.finalize(output); Some(ParserState::CountryCode(Map::::new(), None)) } "# VT te" => { self.finalize(output); Some(ParserState::TerminalType(Map::::new(), None)) } _ => None, } } /// Process a line of input for the current state fn process(&mut self, line: &str) { if line.is_empty() || line.starts_with('#') { return; } // Switch parser state based on line prefix and current state // this relies on ordering of classes and types in the file... match self { ParserState::Vendors(m, ref mut curr_vendor, ref mut curr_device_id) => { if let Ok((name, id)) = parser::vendor(line) { if let Some(cv) = curr_vendor { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new vendor as the current vendor. *curr_vendor = Some(CgVendor { id, name: name.into(), devices: vec![], }); // We should always have a current vendor; failure here indicates a malformed input. } else { let curr_vendor = curr_vendor .as_mut() .expect("No parent vendor whilst parsing vendors"); if let Ok((name, id)) = parser::device(line) { curr_vendor.devices.push(CgDevice { id, name: name.into(), interfaces: vec![], }); *curr_device_id = id; } else if let Ok((name, id)) = parser::interface(line) { let curr_device = curr_vendor .devices .iter_mut() .find(|d| d.id == *curr_device_id) .expect("No parent device whilst parsing interfaces"); curr_device.interfaces.push(CgInterface { id, name: name.into(), }); } } } ParserState::Classes(m, ref mut curr_class, ref mut curr_class_id) => { if let Ok((name, id)) = parser::class(line) { if let Some(cv) = curr_class { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *curr_class = Some(CgClass { id, name: name.into(), sub_classes: vec![], }); } else { let curr_class = curr_class .as_mut() .expect("No parent class whilst parsing classes"); if let Ok((name, id)) = parser::sub_class(line) { curr_class.sub_classes.push(CgSubClass { id, name: name.into(), children: vec![], }); *curr_class_id = id; } else if let Ok((name, id)) = parser::protocol(line) { let curr_device = curr_class .sub_classes .iter_mut() .find(|d| d.id == *curr_class_id) .expect("No parent sub-class whilst parsing protocols"); curr_device.children.push(CgProtocol { id, name: name.into(), }); } } } ParserState::AtType(m, ref mut current) => { let (name, id) = parser::audio_terminal_type(line).expect("Invalid audio terminal line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgAtType { id, name: name.into(), }); } ParserState::HidType(m, ref mut current) => { let (name, id) = parser::hid_type(line).expect("Invalid hid type line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgHidType { id, name: name.into(), }); } ParserState::RType(m, ref mut current) => { let (name, id) = parser::hid_item_type(line).expect("Invalid hid item type line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgRType { id, name: name.into(), }); } ParserState::BiasType(m, ref mut current) => { let (name, id) = parser::bias_type(line).expect("Invalid bias type line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgRBiasType { id, name: name.into(), }); } ParserState::PhyType(m, ref mut current) => { let (name, id) = parser::phy_type(line).expect("Invalid phy type line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgPhyType { id, name: name.into(), }); } ParserState::HutType(m, ref mut current) => { if let Ok((name, id)) = parser::hut_type(line) { if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgHut { id, name: name.into(), children: vec![], }); } else { let curr_hut = current.as_mut().expect("No parent hut whilst parsing huts"); if let Ok((name, id)) = parser::hid_usage_name(line) { curr_hut.children.push(CgHidUsage { id, name: name.into(), }); } } } ParserState::Lang(m, ref mut current) => { if let Ok((name, id)) = parser::language(line) { if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgLang { id, name: name.into(), children: vec![], }); } else { let curr_lang = current .as_mut() .expect("No parent lang whilst parsing langs"); if let Ok((name, id)) = parser::dialect(line) { curr_lang.children.push(CgDialect { id, name: name.into(), }); } } } ParserState::CountryCode(m, ref mut current) => { let (name, id) = parser::country_code(line).expect("Invalid country code line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgCountryCode { id, name: name.into(), }); } ParserState::TerminalType(m, ref mut current) => { let (name, id) = parser::terminal_type(line).expect("Invalid terminal type line"); if let Some(cv) = current { m.entry(cv.id, "e!(#cv).to_string()); } // Set our new class as the current class. *current = Some(CgTerminalType { id, name: name.into(), }); } } } /// Emit the prologue and map to the output file. /// /// Should only be called once per state, used before switching. fn finalize(&mut self, output: &mut impl Write) { // Emit any pending contained within self.emit(); // Write the prologue writeln!(output, "{}", self.prologue_str()).unwrap(); // And the map itself match self { ParserState::Vendors(m, _, _) => { writeln!(output, "{};", m.build()).unwrap(); } ParserState::Classes(m, _, _) => { writeln!(output, "{};", m.build()).unwrap(); } ParserState::AtType(m, _) | ParserState::TerminalType(m, _) => { writeln!(output, "{};", m.build()).unwrap(); } ParserState::HidType(m, _) | ParserState::RType(m, _) | ParserState::BiasType(m, _) | ParserState::CountryCode(m, _) | ParserState::PhyType(m, _) => { writeln!(output, "{};", m.build()).unwrap(); } ParserState::HutType(m, _) => { writeln!(output, "{};", m.build()).unwrap(); } ParserState::Lang(m, _) => { writeln!(output, "{};", m.build()).unwrap(); } } } /// Return the next state for the current state based on the standard ordering of the file /// /// Not as robust as the next_from_header but at lot less overhead. The issue is reliably detecting the end of a section; # comments are not reliable as there are some '# typo?' strings #[allow(dead_code)] fn next(&mut self, output: &mut impl Write) -> Option { self.finalize(output); match self { ParserState::Vendors(_, _, _) => { Some(ParserState::Classes(Map::::new(), None, 0u8)) } ParserState::Classes(_, _, _) => Some(ParserState::AtType(Map::::new(), None)), ParserState::AtType(_, _) => Some(ParserState::HidType(Map::::new(), None)), ParserState::HidType(_, _) => Some(ParserState::RType(Map::::new(), None)), ParserState::RType(_, _) => Some(ParserState::BiasType(Map::::new(), None)), ParserState::BiasType(_, _) => Some(ParserState::PhyType(Map::::new(), None)), ParserState::PhyType(_, _) => Some(ParserState::HutType(Map::::new(), None)), ParserState::HutType(_, _) => Some(ParserState::Lang(Map::::new(), None)), ParserState::Lang(_, _) => Some(ParserState::CountryCode(Map::::new(), None)), ParserState::CountryCode(_, _) => { Some(ParserState::TerminalType(Map::::new(), None)) } ParserState::TerminalType(_, _) => None, } } } #[allow(clippy::redundant_field_names)] fn main() { let out_dir = env::var_os("OUT_DIR").unwrap(); let src_path = Path::new("src/usb.ids"); let dest_path = Path::new(&out_dir).join("usb_ids.cg.rs"); let input = { let f = fs::File::open(src_path).unwrap(); BufReader::new(f) }; let mut output = { let f = fs::File::create(dest_path).unwrap(); BufWriter::new(f) }; // Parser state machine starts with vendors (first in file) let mut parser_state: ParserState = ParserState::Vendors(Map::::new(), None, 0u16); #[allow(clippy::lines_filter_map_ok)] for line in input.lines().flatten() { // Check for a state change based on the header comments if let Some(next_state) = parser_state.next_from_header(&line, &mut output) { parser_state = next_state; } // Process line for current parser parser_state.process(&line); } // Last call for last parser in file parser_state.finalize(&mut output); println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=src/usb.ids"); } mod parser { use std::num::ParseIntError; use nom::bytes::complete::{tag, take}; use nom::character::complete::{hex_digit1, tab}; use nom::combinator::{all_consuming, map_parser, map_res}; use nom::sequence::{delimited, terminated}; use nom::IResult; fn id(size: usize, from_str_radix: F) -> impl Fn(&str) -> IResult<&str, T> where F: Fn(&str, u32) -> Result, { move |input| { map_res(map_parser(take(size), all_consuming(hex_digit1)), |input| { from_str_radix(input, 16) })(input) } } pub fn vendor(input: &str) -> IResult<&str, u16> { let id = id(4, u16::from_str_radix); terminated(id, tag(" "))(input) } pub fn device(input: &str) -> IResult<&str, u16> { let id = id(4, u16::from_str_radix); delimited(tab, id, tag(" "))(input) } pub fn interface(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("\t\t"), id, tag(" "))(input) } pub fn class(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("C "), id, tag(" "))(input) } pub fn sub_class(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tab, id, tag(" "))(input) } pub fn protocol(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("\t\t"), id, tag(" "))(input) } pub fn audio_terminal_type(input: &str) -> IResult<&str, u16> { let id = id(4, u16::from_str_radix); delimited(tag("AT "), id, tag(" "))(input) } pub fn hid_type(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("HID "), id, tag(" "))(input) } pub fn hid_item_type(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("R "), id, tag(" "))(input) } pub fn bias_type(input: &str) -> IResult<&str, u8> { let id = id(1, u8::from_str_radix); delimited(tag("BIAS "), id, tag(" "))(input) } pub fn phy_type(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("PHY "), id, tag(" "))(input) } pub fn hut_type(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("HUT "), id, tag(" "))(input) } pub fn hid_usage_name(input: &str) -> IResult<&str, u16> { let id = id(3, u16::from_str_radix); delimited(tab, id, tag(" "))(input) } pub fn language(input: &str) -> IResult<&str, u16> { let id = id(4, u16::from_str_radix); delimited(tag("L "), id, tag(" "))(input) } pub fn dialect(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tab, id, tag(" "))(input) } pub fn country_code(input: &str) -> IResult<&str, u8> { let id = id(2, u8::from_str_radix); delimited(tag("HCC "), id, tag(" "))(input) } pub fn terminal_type(input: &str) -> IResult<&str, u16> { let id = id(4, u16::from_str_radix); delimited(tag("VT "), id, tag(" "))(input) } } impl quote::ToTokens for CgVendor { fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { let CgVendor { id: vendor_id, name, devices, } = self; let devices = devices.iter().map(|CgDevice { id, name, interfaces }| { quote!{ Device { vendor_id: #vendor_id, id: #id, name: #name, interfaces: &[#(#interfaces),*] } } }); tokens.extend(quote! { Vendor { id: #vendor_id, name: #name, devices: &[#(#devices),*] } }); } } impl quote::ToTokens for CgClass { fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { let CgClass { id: class_id, name, sub_classes, } = self; let sub_classes = sub_classes.iter().map(|CgSubClass { id, name, children }| { quote! { SubClass { class_id: #class_id, id: #id, name: #name, protocols: &[#(#children),*] } } }); tokens.extend(quote! { Class { id: #class_id, name: #name, sub_classes: &[#(#sub_classes),*] } }); } } impl quote::ToTokens for CgParentType { fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { let CgParentType { id, name, children } = self; tokens.extend(quote! { UsbIdWithChildren { id: #id, name: #name, children: &[#(#children),*] } }); } } impl quote::ToTokens for CgType { fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { let CgType { id, name } = self; tokens.extend(quote! { UsbId { id: #id, name: #name } }); } }