Crates.io | strizer |
lib.rs | strizer |
version | 0.1.0 |
source | src |
created_at | 2021-04-14 06:03:19.641977 |
updated_at | 2021-04-14 06:03:19.641977 |
description | minimal and fast library for text tokenization |
homepage | https://github.com/aleics/strizer |
repository | https://github.com/aleics/strizer |
max_upload_size | |
id | 383656 |
size | 22,291 |
strizer is a minimal and fast library for text tokenization.
Add this to your Cargo.toml
:
[dependencies]
strizer = "0.1.0"
use std::fs::File;
use std::io::BufReader;
use strizer::{StreamTokenizer, Token, TokenKind};
fn main() -> std::io::Result<()> {
// read contest to a reader buffer
let file = File::open("log.txt")?;
let mut reader = BufReader::new(file);
// tokenize BufRead, and count number of "ERROR" words
let error_count = StreamTokenizer::new(&mut reader, &[])
.filter(|(_, _, slice)| slice == "ERROR")
.count();
println!("number of error logs: {}", error_count);
Ok(())
}
use strizer::StringTokenizer;
fn main() -> std::io::Result<()> {
// tokenize input string and count the amount of words
let token_count = StringTokenizer::new("hello world", &[]).count();
println!("number of words: {}", token_count);
Ok(())
}