utf8-rune

Crates.ioutf8-rune
lib.rsutf8-rune
version0.0.2
created_at2025-07-09 03:31:07.404098+00
updated_at2025-07-10 05:42:51.257046+00
descriptionLightweight crate that aims at being a building block for libraries that work with UTF-8 data. This crate provides the struct Rune which can thought of in some cases as a drop-in replacement to Rust's char type. This crate also provides a few low-level tools to work with raw pointers of bytes and work with a sequence of bytes to produce valid UTF-8 data. The idea of Rune both borrows from and expands Golang's notion of rune such that rather than representing one 32 bits integer, each `utf8_rune::Rune` represents a set of bytes that, when displayed together represent a single visible UTF-8 character.
homepagehttps://github.com/gabrielfalcao/utf8-rune
repository
max_upload_size
id1744095
size121,179
Gabriel FalcΓ£o (gabrielfalcao)

documentation

https://docs.rs/utf8-rune

README

UTF8 Rune

Lightweight crate that aims at being a building block for libraries that work with UTF-8 data.

This crate provides the struct Rune which can thought of in some cases as a drop-in replacement to Rust's char type.

This crate also provides a few low-level tools to work with raw pointers of bytes and work with a sequence of bytes to produce valid UTF-8 data.

The idea of Rune both borrows from and expands Golang's notion of rune such that rather than representing one 32 bits integer, each utf8_rune::Rune represents a set of bytes that, when displayed together represent a single visible UTF-8 character.

Examples

utf8_rune::Rune

use utf8_rune::Rune;

let rune = Rune::new("πŸ‘©πŸ»β€πŸš’");

assert_eq!(rune.len(), 15);
assert_eq!(rune.as_str(), "πŸ‘©πŸ»β€πŸš’");
assert_eq!(rune.as_bytes(), "πŸ‘©πŸ»β€πŸš’".as_bytes());
assert_eq!(rune.as_bytes(), *&rune);

utf8_rune::Runes

use utf8_rune::Runes;

let parts = Runes::new("πŸ‘©πŸ»β€πŸš’πŸ‘ŒπŸΏπŸ§‘πŸ½β€πŸš’πŸ‘¨β€πŸš’πŸŒΆοΈπŸŽΉπŸ’”πŸ”₯❀️‍πŸ”₯β€οΈβ€πŸ©Ή");

assert_eq!(
    parts
        .to_vec()
        .iter()
        .map(|rune| rune.to_string())
        .collect::<Vec<String>>(),
    vec![
        "πŸ‘©πŸ»β€πŸš’",
        "πŸ‘ŒπŸΏ",
        "πŸ§‘πŸ½β€πŸš’",
        "πŸ‘¨β€πŸš’",
        "🌢️",
        "🎹",
        "πŸ’”",
        "πŸ”₯",
        "❀️‍πŸ”₯",
        "β€οΈβ€πŸ©Ή",
    ]
);
use utf8_rune::Runes;

let runes = Runes::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");

assert_eq!(runes.rune_indexes(), vec![
    (0, 4),
    (4, 8),
    (12, 8),
    (20, 8),
    (28, 8),
    (36, 8),
]);

assert_eq!(runes.len(), 6);
assert_eq!(runes[0], "πŸ‘Œ");
assert_eq!(runes[1], "πŸ‘ŒπŸ»");
assert_eq!(runes[2], "πŸ‘ŒπŸΌ");
assert_eq!(runes[3], "πŸ‘ŒπŸ½");
assert_eq!(runes[4], "πŸ‘ŒπŸΎ");
assert_eq!(runes[5], "πŸ‘ŒπŸΏ");

utf8_rune::RuneParts

use utf8_rune::{RuneParts, Rune, Runes};

let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");

assert_eq!(parts.len(), 44);
assert_eq!(parts.as_str(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
assert_eq!(parts.as_bytes(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ".as_bytes());

let runes = parts.into_runes();

assert_eq!(runes.len(), 6);
assert_eq!(runes[0], "πŸ‘Œ");
assert_eq!(runes[1], "πŸ‘ŒπŸ»");
assert_eq!(runes[2], "πŸ‘ŒπŸΌ");
assert_eq!(runes[3], "πŸ‘ŒπŸ½");
assert_eq!(runes[4], "πŸ‘ŒπŸΎ");
assert_eq!(runes[5], "πŸ‘ŒπŸΏ");

utf8_rune::heuristic

use utf8_rune::get_rune_cutoff_at_index;

let bytes = "πŸ‘©πŸ»β€πŸš’πŸ‘ŒπŸΏπŸ§‘πŸ½β€πŸš’πŸ‘¨β€πŸš’πŸŒΆοΈπŸŽΉπŸ’”πŸ”₯❀️‍πŸ”₯β€οΈβ€πŸ©Ή".as_bytes();
let length = bytes.len();
let ptr = bytes.as_ptr();

let index = 56;
let cutoff = get_rune_cutoff_at_index(ptr, length, index).unwrap();
assert_eq!(std::str::from_utf8(&bytes[index..cutoff]), Ok("🎹"));
Commit count: 0

cargo fmt