haoxue-dict

Crates.iohaoxue-dict
lib.rshaoxue-dict
version0.1.7
sourcesrc
created_at2023-12-22 18:32:35.28574
updated_at2024-09-07 12:10:36.150746
descriptionChinese dictionary and word segmenter
homepage
repository
max_upload_size
id1078579
size14,293,044
David Himmelstrup (lemmih)

documentation

README

haoxue-dict

A Chinese dictionary and word segmenter.

Dictionary usage

use haoxue_dict::DICTIONARY;

let entry = DICTIONARY.get_entry("你好").unwrap();
assert_eq!(entry.simplified(), "你好");
assert_eq!(entry.pinyin(), "ni3 hao3");
assert_eq!(prettify_pinyin::prettify(entry.pinyin()), "nǐ hǎo");
use haoxue_dict::DICTIONARY;

let entry = DICTIONARY.get_entry("们").unwrap();
assert_eq!(entry.traditional(), "們");
assert_eq!(entry.pinyin(), "men5");
assert_eq!(prettify_pinyin::prettify(entry.pinyin()), "men");
use haoxue_dict::DICTIONARY;

// 们 is more common than 大学
assert!(DICTIONARY.frequency("们") > DICTIONARY.frequency("大学"));

Segmenter usage

use haoxue_dict::{DICTIONARY, DictEntry};
use either::Either;

let segments = DICTIONARY.segment("明天我会去图书馆。")
                .iter()
                .map(|segment| segment.map_left(DictEntry::simplified))
                .collect::<Vec<_>>();
assert_eq!(segments, vec![
    Either::Left("明天"),
    Either::Left("我"),
    Either::Left("会"),
    Either::Left("去"),
    Either::Left("图书馆"),
    Either::Right("。")
]);

Feature flags

  • embed-dict: Embed the dictionary in the binary. This is the default feature and adds about 12.4 MiB to the binary size.
Commit count: 0

cargo fmt