Crates.io | base256u |
lib.rs | base256u |
version | 1.1.1 |
source | src |
created_at | 2022-06-17 20:33:00.098449 |
updated_at | 2024-02-21 18:48:39.031378 |
description | Simple mapping between bytes and Unicode codepoints |
homepage | |
repository | https://codeberg.org/Taywee/base256u |
max_upload_size | |
id | 608181 |
size | 17,947 |
Just a simple Rust crate and CLI program to map between bytes and unicode
glyphs. Includes reference printable-ascii-preserved Unicode (papu) encoder
and decoder functions, as well as emoji ones. The papu encoding will preserve
all text that is already only printable ascii characters and all the other bytes
map to single-codepoint non-combining printable glyphs, skipping odd things like
NBSP
and SHY
.
$ cargo install base256u-cli
...
$ python -c 'import sys; sys.stdout.buffer.write(bytes(list(range(256))))' | base256u
°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJK
LMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖė
ĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţ
ŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ
$ python -c 'import sys; sys.stdout.buffer.write(bytes(list(range(256))))' | base256u -ew 36
🌰🌱🌲🌳🌴🌵🌶🌷🌸🌹🌺🌻🌼🌽🌾🌿🍀🍁🍂🍃🍄🍅🍆🍇🍈🍉🍊🍋🍌🍍🍎🍏🍐🍑🍒🍓
🍔🍕🍖🍗🍘🍙🍚🍛🍜🍝🍞🍟🍠🍡🍢🍣🍤🍥🍦🍧🍨🍩🍪🍫🍬🍭🍮🍯🍰🍱🍲🍳🍴🍵🍶🍷
🍸🍹🍺🍻🍼🍽🍾🍿🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛
🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬🐭🐮🐯🐰🐱🐲🐳🐴🐵🐶🐷🐸🐹🐺🐻🐼🐽🐾🐿
😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣
😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷😸😹😺😻😼😽😾😿🙀🙁🙂🙃🙄🙅🙆🙇
🙈🙉🙊🙋🙌🙍🙎🙏🤐🤑🤒🤓🤔🤕🤖🤗🤘🤙🤚🤛🤜🤝🤞🤟🤠🤡🤢🤣🤤🤥🤦🤧🤨🤩🤪🤫
🤬🤭🤮🤯
$ echo Pack my box with five dozen liquor jugs. | base256u
Pack my box with five dozen liquor jugs.º
$ echo Pack my box with five dozen liquor jugs. | base256u | base256u -d
Pack my box with five dozen liquor jugs.
$ echo Pack my box with five dozen liquor jugs. | base256u -e
🐀🐑🐓🐛🍐🐝🐩🍐🐒🐟🐨🍐🐧🐙🐤🐘🍐🐖🐙🐦🐕🍐🐔🐟🐪🐕🐞🍐🐜🐙🐡🐥🐟🐢🍐🐚🐥🐗🐣🍞🌺
$ echo Pack my box with five dozen liquor jugs. | base256u -e | base256u -de
Pack my box with five dozen liquor jugs.
$ dd if=/dev/urandom bs=36 count=1 2>/dev/null | base256u
B^$Éıŷ/āűũļşÎ%Ăż'żşAtiŮqHtÀŔwĸÂWŲŋŪķ
$ dd if=/dev/urandom bs=36 count=1 2>/dev/null | base256u -e
😵😟🌻🤖🍮🤢😗🐣😡🍦🐖🐾🍺🐊🌻🍽🐹🍳😣🐈🙌🐿🙎🍪🤙🤝🍫🐰🐮🙈😆🍗🍍🤨🐺🍗
You can find the documentation in the usual place.
Using this crate is as simple as use base256u::{Decode, Encode};
and then
calling the base256u()
method or base256u_papu()
to get the default papu
encoding.
use crate::{Decode, Encode};
#[test]
fn encoding() {
let encoded: String = (u8::MIN..=u8::MAX).base256u_papu().collect();
assert_eq!(encoded, "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ");
let encoded: String = b"Pack my box with five dozen liquor jugs."
.into_iter()
.copied()
.base256u_papu()
.collect();
assert_eq!(encoded, "Pack my box with five dozen liquor jugs.");
}
#[test]
fn decoding() {
let decoded: Vec<Option<u8>> = "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƝʼn".chars().base256u_papu().collect();
let mut matcher: Vec<Option<u8>> = (u8::MIN..=u8::MAX).map(|b| Some(b)).collect();
matcher.push(None);
matcher.push(None);
assert_eq!(decoded, matcher);
let decoded: Vec<u8> = "Pack my box with five dozen liquor jugs."
.chars()
.base256u_papu()
.map(|c| c.unwrap())
.collect();
assert_eq!(
String::from_utf8(decoded).unwrap(),
"Pack my box with five dozen liquor jugs."
);
}