| Crates.io | wordcutw |
| lib.rs | wordcutw |
| version | 0.5.0 |
| created_at | 2021-04-23 02:59:34.024871+00 |
| updated_at | 2025-08-01 17:32:52.276573+00 |
| description | A C-interface wrapper for Wordcut - a Lao/Thai word segmentation/breaking library |
| homepage | https://codeberg.org/mekong-lang/wordcutw |
| repository | https://codeberg.org/mekong-lang/wordcutw |
| max_upload_size | |
| id | 388399 |
| size | 47,379 |
A C-interface wrapper for Wordcut - a Lao/Thai word segmentation/breaking library
git clone git@github.com:veer66/wordcutw.git
cd wordcutw
cargo build --release
sudo cp target/release/libwordcutw.so /usr/local/lib
#include <stdio.h>
#include "wordcutw.h"
int
main()
{
Wordcut *wordcut = wordcut_new_with_dict("../test_data/thai.txt");
size_t range_count = 0;
TextRange* text_ranges = wordcut_into_text_ranges(wordcut, "ลากา", &range_count);
printf("COUNT = %zu\n", range_count);
printf("R0 %zu_%zu\n", text_ranges[0].s, text_ranges[0].e);
printf("R1 %zu_%zu\n", text_ranges[1].s, text_ranges[1].e);
delete_text_ranges(text_ranges, range_count);
size_t string_count = 0;
char **tokenized_strings = wordcut_into_strings(wordcut, "ลากา", &string_count);
size_t i;
for (i = 0; i < string_count; i++)
{
printf("String #%zu: %s\n", i, tokenized_strings[i]);
}
delete_strings(tokenized_strings, string_count);
char *tokenized_text = wordcut_put_delimiters(wordcut, "ลากา", "---");
printf("Tokenized text = %s\n", tokenized_text);
free(tokenized_text);
delete_wordcut(wordcut);
return 0;
}