use indexmap::IndexMap; use proc_macro2::{Ident, Literal, Span}; use quote::quote; use std::collections::HashMap; use std::env; use std::error::Error; use std::fs; use std::path::Path; const LANGUAGES: &str = include_str!("./languages.yaml"); const MINIMUM_PRIORITY: u8 = 0; const MAXIMUM_PRIORITY: u8 = 100; const DEFAULT_PRIORITY: u8 = 50; /// Converts `languages.yaml` to minified JSON and writes it to /// `languages.json`. fn main() -> Result<(), Box> { // TODO This looks messy, and can use cleanup. let languages: IndexMap = serde_yaml::from_str(LANGUAGES)?; let languages_target_dir = Path::new(&env::var("OUT_DIR")?).join("languages"); fs::create_dir_all(&languages_target_dir)?; struct LanguageDefinition { variant: Ident, /// See `Category`. category: Ident, name: Literal, color_hex: Literal, color_rgb: (Literal, Literal, Literal), nerd_font_glyph: Option, priority: Literal, extensions: Vec, filenames: Vec, interpreters: Vec, patterns: Vec, heuristics: Vec, } let language_definitions: Vec<_> = languages .iter() .map(|(language_name, language_attrs)| { let language_attrs = language_attrs .as_object() .expect("language attributes to be an object"); let variant = rustify_language_name(language_name); let variant = Ident::new(&variant, Span::call_site()); let category = language_attrs["category"] .as_str() .expect("category to be a string"); let category = match category { "data" => "Data", "markup" => "Markup", "programming" => "Programming", "prose" => "Prose", "query" => "Query", unknown => unreachable!("Category {unknown}"), }; let category = Ident::new(category, Span::call_site()); let name = Literal::string(language_name); let color = language_attrs["color"] .as_str() .expect("color to be a string"); let color_hex = Literal::string(color); let color_rgb = { let color = color.strip_prefix('#').expect("'#' prefix"); assert_eq!(color.len(), 6, "Expected 6 characters"); let channels = u32::from_str_radix(color, 16).expect("valid hex string"); let r = ((channels >> 16) & 0xFF) as u8; let g = ((channels >> 8) & 0xFF) as u8; let b = (channels & 0xFF) as u8; ( Literal::u8_suffixed(r), Literal::u8_suffixed(g), Literal::u8_suffixed(b), ) }; let nerd_font_glyph = language_attrs.get("nerd-font-glyph").map(|glyph| { let glyph = glyph.as_str().expect("nerd font glyph to be a string"); Literal::string(glyph) }); let priority = language_attrs .get("priority") .map(|priority| { let priority = priority.as_u64().expect("priority to be a number"); assert!( priority >= MINIMUM_PRIORITY.into() && priority <= MAXIMUM_PRIORITY.into(), "priority to be between {MINIMUM_PRIORITY} and {MAXIMUM_PRIORITY}" ); priority as u8 }) .unwrap_or(DEFAULT_PRIORITY); let priority = Literal::u8_unsuffixed(priority); let matchers = language_attrs["matchers"] .as_object() .expect("matchers to be an object"); let extensions = matchers .get("extensions") .map(|extensions| { extensions .as_array() .expect("extensions to be an array") .to_owned() }) .unwrap_or_default() .iter() .map(|extension| { extension .as_str() .expect("extension to be a string") .to_string() }) .collect(); let filenames = matchers .get("filenames") .map(|filenames| { filenames .as_array() .expect("filenames to be an array") .to_owned() }) .unwrap_or_default() .iter() .map(|filename| { filename .as_str() .expect("filename to be a string") .to_string() }) .collect(); let interpreters = matchers .get("interpreters") .map(|interpreters| { interpreters .as_array() .expect("interpreters to be an array") .to_owned() }) .unwrap_or_default() .iter() .map(|interpreter| { interpreter .as_str() .expect("interpreter to be a string") .to_string() }) .collect(); let patterns = matchers .get("patterns") .map(|patterns| { patterns .as_array() .expect("patterns to be an array") .to_owned() }) .unwrap_or_default() .iter() .map(|pattern| { pattern .as_str() .expect("pattern to be a string") .to_string() }) .collect(); let heuristics = language_attrs .get("heuristics") .map(|heuristics| { heuristics .as_array() .expect("heuristics to be an array") .to_owned() .iter() .map(|heuristic| { heuristic .as_str() .expect("heuristic to be a string") .to_string() }) .collect() }) .unwrap_or_default(); LanguageDefinition { variant, category, name, color_hex, color_rgb, nerd_font_glyph, priority, extensions, filenames, interpreters, patterns, heuristics, } }) .collect(); let variants = language_definitions.iter().map(|def| &def.variant); let language = quote! { /// The type of language. Returned by language detection. #[non_exhaustive] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum Language { #(#variants,)* } }; fs::write( languages_target_dir.join("language.rs"), language.to_string(), )?; let category_mappings = language_definitions.iter().map( |LanguageDefinition { variant, category, .. }| { quote! { Self::#variant => Category::#category } }, ); let category_mixin = quote! { impl Language { /// Gets the category of the language. pub const fn category(&self) -> Category { match self { #(#category_mappings ,)* } } } }; fs::write( languages_target_dir.join("category_mixin.rs"), category_mixin.to_string(), )?; let name_mappings = language_definitions .iter() .map(|LanguageDefinition { variant, name, .. }| { quote! { Self::#variant => #name } }); let name_mixin = quote! { impl Language { /// Gets the name of the language. pub const fn name(&self) -> &'static str { match self { #(#name_mappings ,)* } } } }; fs::write( languages_target_dir.join("name_mixin.rs"), name_mixin.to_string(), )?; let reverse_variant_mappings = language_definitions .iter() .map(|LanguageDefinition { variant, .. }| { let variant_name = variant.to_string(); quote! { #variant_name => Some(Self::#variant) } }); let parse_variant_mixin = quote! { impl Language { /// Converts a variant's name back to the language. fn parse_variant(name: &str) -> Option { match name { #(#reverse_variant_mappings ,)* _ => None, } } } }; fs::write( languages_target_dir.join("parse_variant_mixin.rs"), parse_variant_mixin.to_string(), )?; let color_hex_mappings = language_definitions.iter().map( |LanguageDefinition { variant, color_hex, .. }| { quote! { Self::#variant => #color_hex } }, ); let color_hex_mixin = quote! { impl Language { /// Gets the color associated with the language. pub const fn color(&self) -> &'static str { match self { #(#color_hex_mappings ,)* } } } }; fs::write( languages_target_dir.join("color_hex_mixin.rs"), color_hex_mixin.to_string(), )?; let color_rgb_mappings = language_definitions.iter().map( |LanguageDefinition { variant, color_rgb: (r, g, b), .. }| { quote! { Self::#variant => (#r, #g, #b) } }, ); let color_rgb_mixin = quote! { impl Language { /// Gets the RGB color associated with the language. const fn color_rgb(&self) -> (u8, u8, u8) { match self { #(#color_rgb_mappings ,)* } } } }; fs::write( languages_target_dir.join("color_rgb_mixin.rs"), color_rgb_mixin.to_string(), )?; let nerd_font_glyph_mappings = language_definitions.iter().filter_map( |LanguageDefinition { variant, nerd_font_glyph, .. }| { nerd_font_glyph.as_ref().map(|glyph| { quote! { Self::#variant => Some(#glyph) } }) }, ); let nerd_font_glyph_mixin = quote! { impl Language { /// Gets the Nerd Font glyph associated with the language. pub const fn nerd_font_glyph(&self) -> Option<&'static str> { match self { #(#nerd_font_glyph_mappings ,)* _ => None, } } } }; fs::write( languages_target_dir.join("nerd_font_glyph_mixin.rs"), nerd_font_glyph_mixin.to_string(), )?; let priority_mappings = language_definitions.iter().map( |LanguageDefinition { variant, priority, .. }| { quote! { Self::#variant => #priority } }, ); let priority_mixin = quote! { impl Language { /// Gets the priority of the language. This is useful for sorting languages /// when multiple languages are detected. pub const fn priority(&self) -> u8 { match self { #(#priority_mappings ,)* } } } }; fs::write( languages_target_dir.join("priority_mixin.rs"), priority_mixin.to_string(), )?; let extension_to_langs: HashMap<_, Vec<_>> = language_definitions.iter().fold( HashMap::new(), |map, LanguageDefinition { variant, extensions, .. }| { extensions.iter().fold(map, |mut map, extension| { map.entry(extension.clone()) .or_default() .push(variant.clone()); map }) }, ); let extension_to_langs_mappings = extension_to_langs.iter().map(|(extension, langs)| { quote! { #extension => vec![#(Self::#langs),*] } }); let from_extension_mixin = quote! { impl Language { /// Gets languages by extension. pub fn from_extension(extension: &str) -> Vec { match extension { #(#extension_to_langs_mappings ,)* _ => vec![], } } } }; fs::write( languages_target_dir.join("from_extension_mixin.rs"), from_extension_mixin.to_string(), )?; let filenames_to_langs: HashMap<_, Vec<_>> = language_definitions.iter().fold( HashMap::new(), |map, LanguageDefinition { variant, filenames, .. }| { filenames.iter().fold(map, |mut map, filename| { map.entry(filename.clone()) .or_default() .push(variant.clone()); map }) }, ); let filenames_to_langs_mappings = filenames_to_langs.iter().map(|(filename, langs)| { quote! { #filename => vec![#(Self::#langs),*] } }); let from_filename_mixin = quote! { impl Language { /// Gets languages by filename. pub fn from_filename(filename: &str) -> Vec { match filename { #(#filenames_to_langs_mappings ,)* _ => vec![], } } } }; fs::write( languages_target_dir.join("from_filename_mixin.rs"), from_filename_mixin.to_string(), )?; let interpreters_to_langs: HashMap<_, Vec<_>> = language_definitions.iter().fold( HashMap::new(), |map, LanguageDefinition { variant, interpreters, .. }| { interpreters.iter().fold(map, |mut map, interpreter| { map.entry(interpreter.clone()) .or_default() .push(variant.clone()); map }) }, ); let interpreter_to_langs_mappings = interpreters_to_langs.iter().map(|(interpreter, langs)| { quote! { #interpreter => vec![#(Self::#langs),*] } }); let from_interpreter_mixin = quote! { impl Language { /// Gets languages by interpreter (typically found as part of a shebang). pub fn from_interpreter(interpreter: &str) -> Vec { match interpreter { #(#interpreter_to_langs_mappings ,)* _ => vec![], } } } }; fs::write( languages_target_dir.join("from_interpreter_mixin.rs"), from_interpreter_mixin.to_string(), )?; let glob_matchers = language_definitions .iter() .filter(|def| !def.patterns.is_empty()) .map( |LanguageDefinition { variant, patterns, .. }| { quote! { ( vec![#(#patterns),*], Language::#variant, ) } }, ); let glob_mappings_mixin = quote! { impl Language { /// Gets the mappings used to map a glob to its language. fn glob_mappings() -> Vec<(Vec<&'static str>, Self)> { vec![#(#glob_matchers),*] } } }; fs::write( languages_target_dir.join("glob_mappings_mixin.rs"), glob_mappings_mixin.to_string(), )?; let heuristic_tuples = language_definitions .iter() .filter(|language_definition| !language_definition.heuristics.is_empty()) .map( |LanguageDefinition { variant, heuristics, .. }| { quote! { (Self::#variant, vec![#(#heuristics),*]) } }, ); let heuristic_mappings_mixin = quote! { impl Language { /// Gets the heuristics used to determine a language. fn heuristic_mappings() -> Vec<(Self, Vec<&'static str>)> { vec![#(#heuristic_tuples ,)*] } } }; fs::write( languages_target_dir.join("heuristic_mappings_mixin.rs"), heuristic_mappings_mixin.to_string(), )?; Ok(()) } /// Converts a language name to a valid Rust identifier to be used as an enum /// variant. /// /// # Examples /// /// - `".Env"` -> `"Dotenv"` /// - `"C++"` -> `"CPlusPlus"` /// - `"C#"` -> `"CSharp"` /// - `"HTML"` -> `"Html"` /// - `"JavaScript"` -> `"Javascript"` /// - `"Batch File"` -> `"BatchFile"` fn rustify_language_name(name: &str) -> String { let name = asciiify_language_name(name); // HACK This will break if there are any leading, trailing, or consecutive // spaces in the name. let name = name.split(' ').fold(String::new(), |name, word| { let mut chars = word.chars(); // NOTE If there is a special character like ß it will become SS, but // that should never happen. let first_char = chars.next().unwrap().to_uppercase(); assert!(first_char.len() == 1); let rest = chars .map(|c| c.to_lowercase().to_string()) .collect::(); format!("{name}{first_char}{rest}") }); name } /// Replaces special characters in a language name with their ASCII /// equivalents. fn asciiify_language_name(name: &str) -> String { // NOTE .Env is a special case because the special character is at the beginning // and it should be one word. if name == ".Env" { return "Dotenv".to_string(); } // NOTE Maps special characters to their ASCII equivalents. let mappings = [("-", ""), ("'", ""), ("+", "Plus"), ("#", "Sharp")]; let name = mappings .iter() .fold(name.to_string(), |name, (pattern, replacement)| { // NOTE Adding a leading space to the replacement to ensure that it // is treated as a word boundary. name.replace(pattern, &format!(" {replacement}")) }); name }