use std::{borrow::Cow, fmt::Display, hash::Hash}; #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)] #[serde(untagged)] pub enum NGram { Unigram(String), Bigram(String, String), } impl PartialEq for NGram { fn eq(&self, other: &Self) -> bool { match (self, other) { (NGram::Unigram(self_token), NGram::Unigram(other_token)) => self_token == other_token, ( NGram::Bigram(self_token_a, self_token_b), NGram::Bigram(other_token_a, other_token_b), ) => self_token_a == other_token_a && self_token_b == other_token_b, _ => false, } } } impl Hash for NGram { fn hash(&self, state: &mut H) { match self { NGram::Unigram(token) => { 0usize.hash(state); token.hash(state) } NGram::Bigram(token_a, token_b) => { 1usize.hash(state); token_a.hash(state); token_b.hash(state); } } } } impl Display for NGram { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { NGram::Unigram(token) => { write!(f, "{}", token) } NGram::Bigram(token_a, token_b) => { write!(f, "{} {}", token_a, token_b) } } } } #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialOrd, Ord)] pub enum NGramRef<'a> { Unigram(Cow<'a, str>), Bigram(Cow<'a, str>, Cow<'a, str>), } impl<'a> PartialEq for NGramRef<'a> { fn eq(&self, other: &Self) -> bool { match (self, other) { (NGramRef::Unigram(self_token), NGramRef::Unigram(other_token)) => { self_token == other_token } ( NGramRef::Bigram(self_token_a, self_token_b), NGramRef::Bigram(other_token_a, other_token_b), ) => self_token_a == other_token_a && self_token_b == other_token_b, _ => false, } } } impl<'a> Hash for NGramRef<'a> { fn hash(&self, state: &mut H) { match self { NGramRef::Unigram(token) => { 0usize.hash(state); token.hash(state) } NGramRef::Bigram(token_a, token_b) => { 1usize.hash(state); token_a.hash(state); token_b.hash(state); } } } } impl<'a> indexmap::Equivalent for NGramRef<'a> { fn equivalent(&self, key: &NGram) -> bool { match (self, key) { (NGramRef::Unigram(unigram_ref), NGram::Unigram(unigram)) => unigram_ref == unigram, (NGramRef::Bigram(bigram_a_ref, bigram_b_ref), NGram::Bigram(bigram_a, bigram_b)) => { bigram_a_ref == bigram_a && bigram_b_ref == bigram_b } _ => false, } } } impl<'a> NGramRef<'a> { pub fn to_ngram(&self) -> NGram { match self { NGramRef::Unigram(token) => NGram::Unigram(token.as_ref().to_owned()), NGramRef::Bigram(token_a, token_b) => { NGram::Bigram(token_a.as_ref().to_owned(), token_b.as_ref().to_owned()) } } } } #[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub enum NGramType { Unigram, Bigram, }