Crates.io | add_space |
lib.rs | add_space |
version | 0.1.12 |
created_at | 2025-08-15 10:39:40.104123+00 |
updated_at | 2025-08-22 04:51:47.563782+00 |
description | add_space |
homepage | https://github.com/i18n-site/rust/tree/dev/add_space |
repository | https://github.com/i18n-site/rust.git |
max_upload_size | |
id | 1796548 |
size | 37,849 |
use unicode_script::{Script, UnicodeScript};
pub fn state(c: char) -> State {
if c.is_whitespace() {
return State::Space;
}
if matches!(
c.script(),
Script::Han
| Script::Hiragana
| Script::Katakana
| Script::Thai
| Script::Lao
| Script::Khmer
| Script::Myanmar
| Script::Tibetan
) || ('0'..='9').contains(&c)
{
return State::Char;
}
if r##"!"#%\'*+,-.:<=>?@^`·—‘’“”…、。「」『』!,:?;()"##.contains(c)
|| (c.len_utf8() > 1 && unic_emoji_char::is_emoji(c))
{
return State::Punctuation;
}
State::Letter
}
fn push_stack(c: char, stack: &mut Vec<char>) {
if "[({".contains(c) {
stack.push(c);
}
}
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum State {
Space,
Char,
Letter,
Punctuation,
}
pub fn add_space(txt: impl AsRef<str>) -> String {
let txt = txt.as_ref();
let mut r = String::new();
let mut iter = txt.chars();
if let Some(c) = iter.next() {
r.push(c);
let mut is_escape = c == '\\';
let mut pre = state(c);
let mut pre_c = c;
let mut stack = Vec::new();
push_stack(c, &mut stack);
for c in iter {
if is_escape {
is_escape = false;
r.push(c);
continue;
}
let s = state(c);
push_stack(c, &mut stack);
match s {
State::Char => {
if pre == State::Letter && !"[({".contains(pre_c) {
r.push(' ');
}
r.push(c);
}
State::Letter => {
is_escape = c == '\\';
if let Some(stack_last) = stack.last() {
if matches!((stack_last, c), ('[', ']') | ('(', ')') | ('{', '}')) {
stack.pop();
}
} else if (!is_escape && pre == State::Char)
|| (",?!…".contains(pre_c))
|| (pre_c == '.' && c.is_uppercase())
{
r.push(' ');
}
r.push(c);
}
_ => r.push(c),
}
pre = s;
pre_c = c;
}
}
r
}
This project is an open-source component of i18n.site ⋅ Internationalization Solution.
i18 : MarkDown Command Line Translation Tool
The translation perfectly maintains the Markdown format.
It recognizes file changes and only translates the modified files.
The translated Markdown content is editable; if you modify the original text and translate it again, manually edited translations will not be overwritten (as long as the original text has not been changed).
i18n.site : MarkDown Multi-language Static Site Generator
Optimized for a better reading experience
本项目为 i18n.site ⋅ 国际化解决方案 的开源组件。
翻译能够完美保持 Markdown 的格式。能识别文件的修改,仅翻译有变动的文件。
Markdown 翻译内容可编辑;如果你修改原文并再次机器翻译,手动修改过的翻译不会被覆盖 ( 如果这段原文没有被修改 )。
i18n.site : MarkDown 多语言静态站点生成器 为阅读体验而优化。