Crates.io | fcsd |
lib.rs | fcsd |
version | 0.2.0 |
source | src |
created_at | 2021-09-24 11:38:25.269699 |
updated_at | 2022-03-27 11:21:49.799482 |
description | Front-coding string dictionary. |
homepage | https://github.com/kampersanda/fcsd |
repository | https://github.com/kampersanda/fcsd |
max_upload_size | |
id | 455837 |
size | 42,129 |
This is a Rust library to store an indexed set of strings and support fast queires. The data structure is a plain front-coded string dictionary described in MartÃnez-Prieto et al., Practical compressed string dictionaries, INFOSYS 2016.
n
strings in the set are indexed with integers from [0..n-1]
and assigned in the lexicographical order.use fcsd::Set;
// Input string keys should be sorted and unique.
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
// Builds an indexed set.
let set = Set::new(keys).unwrap();
assert_eq!(set.len(), keys.len());
// Gets indexes associated with given keys.
let mut locator = set.locator();
assert_eq!(locator.run(b"ICML"), Some(1));
assert_eq!(locator.run(b"SIGMOD"), Some(4));
assert_eq!(locator.run(b"SIGSPATIAL"), None);
// Decodes string keys from given indexes.
let mut decoder = set.decoder();
assert_eq!(decoder.run(0), b"ICDM".to_vec());
assert_eq!(decoder.run(3), b"SIGKDD".to_vec());
// Enumerates indexes and keys stored in the set.
let mut iter = set.iter();
assert_eq!(iter.next(), Some((0, b"ICDM".to_vec())));
assert_eq!(iter.next(), Some((1, b"ICML".to_vec())));
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), Some((3, b"SIGKDD".to_vec())));
assert_eq!(iter.next(), Some((4, b"SIGMOD".to_vec())));
assert_eq!(iter.next(), None);
// Enumerates indexes and keys starting with a prefix.
let mut iter = set.predictive_iter(b"SIG");
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), Some((3, b"SIGKDD".to_vec())));
assert_eq!(iter.next(), Some((4, b"SIGMOD".to_vec())));
assert_eq!(iter.next(), None);
// Serialization / Deserialization
let mut data = Vec::<u8>::new();
set.serialize_into(&mut data).unwrap();
assert_eq!(data.len(), set.size_in_bytes());
let other = Set::deserialize_from(&data[..]).unwrap();
assert_eq!(data.len(), other.size_in_bytes());
This library is free software provided under MIT.