# WARNING: This file is generated by build.rs [package] name = "langsan" version = "0.0.10" edition = "2021" authors = ["Michael de Gans "] description = "A library for sanitizing language model input and output." homepage = "https://github.com/mdegans/langsan" repository = "https://github.com/mdegans/langsan" readme = "README.md" keywords = ["sanitization", "language", "model"] categories = ["text-processing"] license = "MIT" [dependencies] serde = { version = "1", features = ["derive"], optional = true } [build-dependencies] serde_json = "1" serde = { version = "1", features = ["derive"] } static_assertions = "1" [dev-dependencies] serde_json = "1" [features] default = [] cow = [] verbose = [] serde = ["dep:serde"] # Languages english = [] spanish = ["latin-1-supplement"] french = ["latin-1-supplement"] german = ["latin-1-supplement"] italian = ["latin-1-supplement"] dutch = ["latin-1-supplement"] portuguese = ["latin-1-supplement"] russian = ["cyrillic"] emoji = [ "miscellaneous-symbols-and-pictographs", "emoticons-emoji", "ornamental-dingbats", "transport-and-map-symbols", "alchemical-symbols", "geometric-shapes-extended", "supplemental-arrows-c", "supplemental-symbols-and-pictographs", "chess-symbols", "symbols-and-pictographs-extended-a", "symbols-for-legacy-computing", ] # Unicode ranges. Note that whitespace and basic-latin are enabled by default. # "tags" are included for completion sake but very much not recommended for use. latin-1-supplement = [] latin-extended-a = [] latin-extended-b = [] ipa-extensions = [] spacing-modifier-letters = [] combining-diacritical-marks = [] greek-and-coptic = [] cyrillic = [] cyrillic-supplement = [] armenian = [] hebrew = [] arabic = [] syriac = [] arabic-supplement = [] thaana = [] nko = [] samaritan = [] mandaic = [] syriac-supplement = [] arabic-extended-b = [] arabic-extended-a = [] devanagari = [] bengali = [] gurmukhi = [] gujarati = [] oriya = [] tamil = [] telugu = [] kannada = [] malayalam = [] sinhala = [] thai = [] lao = [] tibetan = [] myanmar = [] georgian = [] hangul-jamo = [] ethiopic = [] ethiopic-supplement = [] cherokee = [] unified-canadian-aboriginal-syllabics = [] ogham = [] runic = [] tagalog = [] hanunoo = [] buhid = [] tagbanwa = [] khmer = [] mongolian = [] unified-canadian-aboriginal-syllabics-extended = [] limbu = [] tai-le = [] new-tai-lue = [] khmer-symbols = [] buginese = [] tai-tham = [] combining-diacritical-marks-extended = [] balinese = [] sundanese = [] batak = [] lepcha = [] ol-chiki = [] cyrillic-extended-c = [] georgian-extended = [] sundanese-supplement = [] vedic-extensions = [] phonetic-extensions = [] phonetic-extensions-supplement = [] combining-diacritical-marks-supplement = [] latin-extended-additional = [] greek-extended = [] general-punctuation = [] superscripts-and-subscripts = [] currency-symbols = [] combining-diacritical-marks-for-symbols = [] letterlike-symbols = [] number-forms = [] arrows = [] mathematical-operators = [] miscellaneous-technical = [] control-pictures = [] optical-character-recognition = [] enclosed-alphanumerics = [] box-drawing = [] block-elements = [] geometric-shapes = [] miscellaneous-symbols = [] dingbats = [] miscellaneous-mathematical-symbols-a = [] supplemental-arrows-a = [] braille-patterns = [] supplemental-arrows-b = [] miscellaneous-mathematical-symbols-b = [] supplemental-mathematical-operators = [] miscellaneous-symbols-and-arrows = [] glagolitic = [] latin-extended-c = [] coptic = [] georgian-supplement = [] tifinagh = [] ethiopic-extended = [] cyrillic-extended-a = [] supplemental-punctuation = [] cjk-radicals-supplement = [] kangxi-radicals = [] ideographic-description-characters = [] cjk-symbols-and-punctuation = [] hiragana = [] katakana = [] bopomofo = [] hangul-compatibility-jamo = [] kanbun = [] bopomofo-extended = [] cjk-strokes = [] katakana-phonetic-extensions = [] enclosed-cjk-letters-and-months = [] cjk-compatibility = [] cjk-unified-ideographs-extension-a = [] yijing-hexagram-symbols = [] cjk-unified-ideographs = [] yi-syllables = [] yi-radicals = [] lisu = [] vai = [] cyrillic-extended-b = [] bamum = [] modifier-tone-letters = [] latin-extended-d = [] syloti-nagri = [] common-indic-number-forms = [] phags-pa = [] saurashtra = [] devanagari-extended = [] kayah-li = [] rejang = [] hangul-jamo-extended-a = [] javanese = [] myanmar-extended-b = [] cham = [] myanmar-extended-a = [] tai-viet = [] meetei-mayek-extensions = [] ethiopic-extended-a = [] latin-extended-e = [] cherokee-supplement = [] meetei-mayek = [] hangul-syllables = [] hangul-jamo-extended-b = [] high-surrogates = [] low-surrogates = [] cjk-compatibility-ideographs = [] alphabetic-presentation-forms = [] arabic-presentation-forms-a = [] variation-selectors = [] vertical-forms = [] combining-half-marks = [] cjk-compatibility-forms = [] small-form-variants = [] arabic-presentation-forms-b = [] halfwidth-and-fullwidth-forms = [] specials = [] linear-b-syllabary = [] linear-b-ideograms = [] aegean-numbers = [] ancient-greek-numbers = [] ancient-symbols = [] phaistos-disc = [] lycian = [] carian = [] coptic-epact-numbers = [] old-italic = [] gothic = [] old-permic = [] ugaritic = [] old-persian = [] deseret = [] shavian = [] osmanya = [] osage = [] elbasan = [] caucasian-albanian = [] vithkuqi = [] linear-a = [] latin-extended-f = [] cypriot-syllabary = [] imperial-aramaic = [] palmyrene = [] nabataean = [] hatran = [] phoenician = [] lydian = [] meroitic-hieroglyphs = [] meroitic-cursive = [] kharoshthi = [] old-south-arabian = [] old-north-arabian = [] manichaean = [] avestan = [] inscriptional-parthian = [] inscriptional-pahlavi = [] psalter-pahlavi = [] old-turkic = [] old-hungarian = [] hanifi-rohingya = [] rumi-numeral-symbols = [] yezidi = [] arabic-extended-c = [] old-sogdian = [] sogdian = [] old-uyghur = [] sinhala-archaic-numbers = [] mongolian-supplement = [] dogra = [] warang-citi = [] dives-akuru = [] nandinagari = [] zanabazar-square = [] unified-canadian-aboriginal-syllabics-extended-a = [] devanagari-extended-a = [] masaram-gondi = [] gunjala-gondi = [] lisu-supplement = [] tamil-supplement = [] cuneiform = [] cuneiform-numbers-and-punctuation = [] early-dynastic-cuneiform = [] cypro-minoan = [] egyptian-hieroglyphs = [] egyptian-hieroglyph-format-controls = [] anatolian-hieroglyphs = [] bamum-supplement = [] ideographic-symbols-and-punctuation = [] tangut = [] tangut-components = [] khitan-small-script = [] tangut-supplement = [] kana-extended-b = [] kana-supplement = [] kana-extended-a = [] small-kana-extension = [] shorthand-format-controls = [] znamenny-musical-notation = [] byzantine-musical-symbols = [] musical-symbols = [] ancient-greek-musical-notation = [] kaktovik-numerals = [] mayan-numerals = [] tai-xuan-jing-symbols = [] counting-rod-numerals = [] mathematical-alphanumeric-symbols = [] sutton-signwriting = [] latin-extended-g = [] glagolitic-supplement = [] cyrillic-extended-d = [] nyiakeng-puachue-hmong = [] nag-mundari = [] ethiopic-extended-b = [] mende-kikakui = [] indic-siyaq-numbers = [] ottoman-siyaq-numbers = [] arabic-mathematical-alphabetic-symbols = [] mahjong-tiles = [] domino-tiles = [] playing-cards = [] enclosed-alphanumeric-supplement = [] enclosed-ideographic-supplement = [] miscellaneous-symbols-and-pictographs = [] emoticons-emoji = [] ornamental-dingbats = [] transport-and-map-symbols = [] alchemical-symbols = [] geometric-shapes-extended = [] supplemental-arrows-c = [] supplemental-symbols-and-pictographs = [] chess-symbols = [] symbols-and-pictographs-extended-a = [] symbols-for-legacy-computing = [] cjk-unified-ideographs-extension-b = [] cjk-unified-ideographs-extension-c = [] cjk-unified-ideographs-extension-d = [] cjk-unified-ideographs-extension-e = [] cjk-unified-ideographs-extension-f = [] cjk-unified-ideographs-extension-i = [] cjk-compatibility-ideographs-supplement = [] cjk-unified-ideographs-extension-g = [] cjk-unified-ideographs-extension-h = [] variation-selectors-supplement = []