add_space

Crates.ioadd_space
lib.rsadd_space
version0.1.12
created_at2025-08-15 10:39:40.104123+00
updated_at2025-08-22 04:51:47.563782+00
descriptionadd_space
homepagehttps://github.com/i18n-site/rust/tree/dev/add_space
repositoryhttps://github.com/i18n-site/rust.git
max_upload_size
id1796548
size37,849
i18n.site (i18nsite)

documentation

README

add_space

use unicode_script::{Script, UnicodeScript};

pub fn state(c: char) -> State {
  if c.is_whitespace() {
    return State::Space;
  }
  if matches!(
    c.script(),
    Script::Han
      | Script::Hiragana
      | Script::Katakana
      | Script::Thai
      | Script::Lao
      | Script::Khmer
      | Script::Myanmar
      | Script::Tibetan
  ) || ('0'..='9').contains(&c)
  {
    return State::Char;
  }
  if r##"!"#%\'*+,-.:<=>?@^`·—‘’“”…、。「」『』!,:?;()"##.contains(c)
    || (c.len_utf8() > 1 && unic_emoji_char::is_emoji(c))
  {
    return State::Punctuation;
  }

  State::Letter
}

fn push_stack(c: char, stack: &mut Vec<char>) {
  if "[({".contains(c) {
    stack.push(c);
  }
}

#[derive(PartialEq, Debug, Copy, Clone)]
pub enum State {
  Space,
  Char,
  Letter,
  Punctuation,
}

pub fn add_space(txt: impl AsRef<str>) -> String {
  let txt = txt.as_ref();
  let mut r = String::new();
  let mut iter = txt.chars();

  if let Some(c) = iter.next() {
    r.push(c);
    let mut is_escape = c == '\\';
    let mut pre = state(c);
    let mut pre_c = c;
    let mut stack = Vec::new();

    push_stack(c, &mut stack);
    for c in iter {
      if is_escape {
        is_escape = false;
        r.push(c);
        continue;
      }
      let s = state(c);
      push_stack(c, &mut stack);
      match s {
        State::Char => {
          if pre == State::Letter && !"[({".contains(pre_c) {
            r.push(' ');
          }
          r.push(c);
        }
        State::Letter => {
          is_escape = c == '\\';
          if let Some(stack_last) = stack.last() {
            if matches!((stack_last, c), ('[', ']') | ('(', ')') | ('{', '}')) {
              stack.pop();
            }
          } else if (!is_escape && pre == State::Char)
            || (",?!…".contains(pre_c))
            || (pre_c == '.' && c.is_uppercase())
          {
            r.push(' ');
          }
          r.push(c);
        }
        _ => r.push(c),
      }
      pre = s;
      pre_c = c;
    }
  }
  r
}

About

This project is an open-source component of i18n.site ⋅ Internationalization Solution.

关于

本项目为 i18n.site ⋅ 国际化解决方案 的开源组件。

Commit count: 68

cargo fmt