| Crates.io | utf8-rune |
| lib.rs | utf8-rune |
| version | 0.0.2 |
| created_at | 2025-07-09 03:31:07.404098+00 |
| updated_at | 2025-07-10 05:42:51.257046+00 |
| description | Lightweight crate that aims at being a building block for libraries that work with UTF-8 data. This crate provides the struct Rune which can thought of in some cases as a drop-in replacement to Rust's char type. This crate also provides a few low-level tools to work with raw pointers of bytes and work with a sequence of bytes to produce valid UTF-8 data. The idea of Rune both borrows from and expands Golang's notion of rune such that rather than representing one 32 bits integer, each `utf8_rune::Rune` represents a set of bytes that, when displayed together represent a single visible UTF-8 character. |
| homepage | https://github.com/gabrielfalcao/utf8-rune |
| repository | |
| max_upload_size | |
| id | 1744095 |
| size | 121,179 |
Lightweight crate that aims at being a building block for libraries that work with UTF-8 data.
This crate provides the struct Rune which can thought of in some cases as a drop-in replacement to Rust's char type.
This crate also provides a few low-level tools to work with raw pointers of bytes and work with a sequence of bytes to produce valid UTF-8 data.
The idea of Rune both borrows from and expands Golang's notion of rune
such that rather than representing one 32 bits integer, each
utf8_rune::Rune represents a set of bytes that, when displayed
together represent a single visible UTF-8 character.
utf8_rune::Runeuse utf8_rune::Rune;
let rune = Rune::new("π©π»βπ");
assert_eq!(rune.len(), 15);
assert_eq!(rune.as_str(), "π©π»βπ");
assert_eq!(rune.as_bytes(), "π©π»βπ".as_bytes());
assert_eq!(rune.as_bytes(), *&rune);
utf8_rune::Runesuse utf8_rune::Runes;
let parts = Runes::new("π©π»βπππΏπ§π½βππ¨βππΆοΈπΉππ₯β€οΈβπ₯β€οΈβπ©Ή");
assert_eq!(
parts
.to_vec()
.iter()
.map(|rune| rune.to_string())
.collect::<Vec<String>>(),
vec![
"π©π»βπ",
"ππΏ",
"π§π½βπ",
"π¨βπ",
"πΆοΈ",
"πΉ",
"π",
"π₯",
"β€οΈβπ₯",
"β€οΈβπ©Ή",
]
);
use utf8_rune::Runes;
let runes = Runes::new("πππ»ππΌππ½ππΎππΏ");
assert_eq!(runes.rune_indexes(), vec![
(0, 4),
(4, 8),
(12, 8),
(20, 8),
(28, 8),
(36, 8),
]);
assert_eq!(runes.len(), 6);
assert_eq!(runes[0], "π");
assert_eq!(runes[1], "ππ»");
assert_eq!(runes[2], "ππΌ");
assert_eq!(runes[3], "ππ½");
assert_eq!(runes[4], "ππΎ");
assert_eq!(runes[5], "ππΏ");
utf8_rune::RunePartsuse utf8_rune::{RuneParts, Rune, Runes};
let parts = RuneParts::new("πππ»ππΌππ½ππΎππΏ");
assert_eq!(parts.len(), 44);
assert_eq!(parts.as_str(), "πππ»ππΌππ½ππΎππΏ");
assert_eq!(parts.as_bytes(), "πππ»ππΌππ½ππΎππΏ".as_bytes());
let runes = parts.into_runes();
assert_eq!(runes.len(), 6);
assert_eq!(runes[0], "π");
assert_eq!(runes[1], "ππ»");
assert_eq!(runes[2], "ππΌ");
assert_eq!(runes[3], "ππ½");
assert_eq!(runes[4], "ππΎ");
assert_eq!(runes[5], "ππΏ");
utf8_rune::heuristicuse utf8_rune::get_rune_cutoff_at_index;
let bytes = "π©π»βπππΏπ§π½βππ¨βππΆοΈπΉππ₯β€οΈβπ₯β€οΈβπ©Ή".as_bytes();
let length = bytes.len();
let ptr = bytes.as_ptr();
let index = 56;
let cutoff = get_rune_cutoff_at_index(ptr, length, index).unwrap();
assert_eq!(std::str::from_utf8(&bytes[index..cutoff]), Ok("πΉ"));