| Crates.io | shlesha |
| lib.rs | shlesha |
| version | 0.5.1 |
| created_at | 2025-06-30 10:44:03.445241+00 |
| updated_at | 2025-07-24 13:06:39.726149+00 |
| description | High-performance extensible transliteration library with hub-and-spoke architecture |
| homepage | |
| repository | https://github.com/udapaana/shlesha |
| max_upload_size | |
| id | 1731685 |
| size | 1,296,862 |
A transliteration library for Sanskrit and Indic scripts using schema-driven architecture. Built with compile-time optimization and runtime schema loading.
Setup command:
./scripts/quick-start.sh
This sets up everything: Rust environment, Python bindings, WASM support, and runs all tests.
For detailed setup instructions, see DEVELOPER_SETUP.md.
Documentation: See DOCUMENTATION_INDEX.md for guides and references.
Converters are generated at compile-time from declarative schemas:
# schemas/slp1.yaml - Generates optimized SLP1 converter
metadata:
name: "slp1"
script_type: "roman"
description: "Sanskrit Library Phonetic Basic"
target: "iso15919"
mappings:
vowels:
"A": "ā"
"I": "ī"
"U": "ū"
# ... more mappings
# schemas/bengali.yaml - Generates optimized Bengali converter
metadata:
name: "bengali"
script_type: "brahmic"
description: "Bengali/Bangla script"
mappings:
vowels:
"অ": "अ" # Bengali A → Devanagari A
"আ": "आ" # Bengali AA → Devanagari AA
# ... more mappings
The build system automatically generates highly optimized converters:
# Build output showing schema processing
warning: Processing YAML schemas...
warning: Generating optimized converters with Handlebars templates...
warning: Created 18 schema-generated converters with O(1) lookups
The system determines the conversion path:
// Direct passthrough - zero conversion cost
transliterator.transliterate("धर्म", "devanagari", "devanagari")?; // instant
// Single hub - one conversion
transliterator.transliterate("धर्म", "devanagari", "iso")?; // deva→iso
// Cross-hub - optimized path
transliterator.transliterate("dharma", "itrans", "bengali")?; // itrans→iso→deva→bengali
devanagari, deva) - Sanskrit, Hindi, Marathibengali, bn) - Bengali/Bangla scripttamil, ta) - Tamil scripttelugu, te) - Telugu scriptgujarati, gu) - Gujarati scriptkannada, kn) - Kannada scriptmalayalam, ml) - Malayalam scriptodia, od) - Odia/Oriya scriptgurmukhi, pa) - Punjabi scriptsinhala, si) - Sinhala scriptsharada, shrd) - Historical script of Kashmir, crucial for Vedic manuscriptstibetan, tibt, bo) - Important for Buddhist Vedic transmissionthai, th) - Adapted from Grantha for Buddhist Vedic textsiso15919, iso) - International standarditrans) - Indian languages TRANSliterationslp1) - Sanskrit Library Phonetic Basicharvard_kyoto, hk) - ASCII-based schemevelthuis) - TeX-compatible schemewx) - ASCII-based notationiast) - International Alphabet of Sanskrit Transliterationkolkata) - Regional romanization schemegrantha) - Classical Sanskrit scriptuse shlesha::Shlesha;
let transliterator = Shlesha::new();
// High-performance cross-script conversion
let result = transliterator.transliterate("धर्म", "devanagari", "gujarati")?;
println!("{}", result); // "ધર્મ"
// Roman to Indic conversion
let result = transliterator.transliterate("dharmakṣetra", "slp1", "tamil")?;
println!("{}", result); // "தர்மக்ஷேத்ர"
// Schema-generated converters in action
let result = transliterator.transliterate("dharmakSetra", "slp1", "iast")?;
println!("{}", result); // "dharmakśetra"
import shlesha
# Create transliterator with all schema-generated converters
transliterator = shlesha.Shlesha()
# Fast schema-based conversion
result = transliterator.transliterate("ধর্ম", "bengali", "telugu")
print(result) # "ధర్మ"
# Performance with metadata tracking
result = transliterator.transliterate_with_metadata("धर्मkr", "devanagari", "iast")
print(f"Output: {result.output}") # "dharmakr"
print(f"Unknown tokens: {len(result.metadata.unknown_tokens)}")
# Runtime extensibility
scripts = shlesha.get_supported_scripts()
print(f"Supports {len(scripts)} scripts: {scripts}")
# Schema-generated high-performance conversion
shlesha transliterate --from slp1 --to devanagari "dharmakSetra"
# Output: धर्मक्षेत्र
# Cross-script conversion via dual hubs
shlesha transliterate --from itrans --to tamil "dharma"
# Output: தர்ம
# List all schema-generated + hand-coded scripts
shlesha scripts
# Output: bengali, devanagari, gujarati, harvard_kyoto, iast, iso15919, itrans, ...
import init, { WasmShlesha } from './pkg/shlesha.js';
async function demo() {
await init();
const transliterator = new WasmShlesha();
// Schema-generated converter performance in browser
const result = transliterator.transliterate("કર્મ", "gujarati", "devanagari");
console.log(result); // "कर्म"
// Runtime script discovery
const scripts = transliterator.listSupportedScripts();
console.log(`${scripts.length} scripts available`);
}
Shlesha supports runtime schema loading across all APIs to add custom scripts without recompilation.
use shlesha::Shlesha;
let mut transliterator = Shlesha::new();
// Load custom schema from YAML content
let custom_schema = r#"
metadata:
name: "my_custom_script"
script_type: "roman"
has_implicit_a: false
description: "My custom transliteration scheme"
target: "iso15919"
mappings:
vowels:
"a": "a"
"e": "ē"
consonants:
"k": "k"
"t": "ṭ"
"#;
// Load the schema at runtime
transliterator.load_schema_from_string(custom_schema, "my_custom_script")?;
// Use immediately without recompilation
let result = transliterator.transliterate("kate", "my_custom_script", "devanagari")?;
println!("{}", result); // "काटे"
// Schema management
let info = transliterator.get_schema_info("my_custom_script").unwrap();
println!("Loaded {} with {} mappings", info.name, info.mapping_count);
import shlesha
transliterator = shlesha.Shlesha()
# Load schema from YAML string
yaml_content = """
metadata:
name: "custom_script"
script_type: "roman"
has_implicit_a: false
description: "Custom transliteration"
target: "iso15919"
mappings:
vowels:
"a": "a"
consonants:
"k": "k"
"""
# Runtime loading
transliterator.load_schema_from_string(yaml_content, "custom_script")
# Immediate usage
result = transliterator.transliterate("ka", "custom_script", "devanagari")
print(result) # "क"
# Schema info
info = transliterator.get_schema_info("custom_script")
print(f"Script: {info['name']}, Mappings: {info['mapping_count']}")
# Schema management
transliterator.remove_schema("custom_script")
transliterator.clear_runtime_schemas()
import init, { WasmShlesha } from './pkg/shlesha.js';
async function loadCustomScript() {
await init();
const transliterator = new WasmShlesha();
// Define custom schema
const yamlContent = `
metadata:
name: "custom_script"
script_type: "roman"
has_implicit_a: false
description: "Custom script"
target: "iso15919"
mappings:
vowels:
"a": "a"
consonants:
"k": "k"
`;
// Load at runtime
transliterator.loadSchemaFromString(yamlContent, "custom_script");
// Use immediately
const result = transliterator.transliterate("ka", "custom_script", "devanagari");
console.log(result); // "क"
// Get schema information
const info = transliterator.getSchemaInfo("custom_script");
console.log(`Name: ${info.name}, Mappings: ${info.mapping_count}`);
}
Development & Testing
// Test schema variations quickly
transliterator.load_schema_from_string(variant_a, "test_a")?;
transliterator.load_schema_from_string(variant_b, "test_b")?;
// Compare results immediately
Dynamic Applications
# User uploads custom transliteration scheme
user_schema = request.files['schema'].read().decode('utf-8')
transliterator.load_schema_from_string(user_schema, user_id)
# Use immediately in application
Configuration-Driven Systems
// Load schemas from configuration
config.schemas.forEach(schema => {
transliterator.loadSchemaFromString(schema.content, schema.name);
});
Shlesha uses a hub-and-spoke architecture with schema-generated converters, trading some performance for extensibility compared to direct conversion approaches.
| Aspect | Shlesha | Vidyut |
|---|---|---|
| Performance | Hub-based | Direct conversion |
| Extensibility | Runtime schemas | Compile-time only |
| Script Support | 15+ (easily expandable) | Limited |
| Architecture | Hub-and-spoke | Direct conversion |
| Bindings | Rust/Python/WASM/CLI | Rust only |
Adding support for new scripts with schemas:
# schemas/new_script.yaml
metadata:
name: "NewScript"
description: "Description of the script"
unicode_block: "NewScript"
has_implicit_vowels: true
mappings:
vowels:
- source: "𑀅" # New script character
target: "अ" # Devanagari equivalent
# ... add more mappings
# Rebuild to include new script
cargo build
# New script automatically available!
Converters are generated using Handlebars templates for consistency:
{{!-- templates/indic_converter.hbs --}}
/// {{metadata.description}} converter generated from schema
pub struct {{pascal_case metadata.name}}Converter {
{{snake_case metadata.name}}_to_deva_map: HashMap<char, char>,
deva_to_{{snake_case metadata.name}}_map: HashMap<char, char>,
}
impl {{pascal_case metadata.name}}Converter {
pub fn new() -> Self {
// Generated O(1) lookup tables
let mut {{snake_case metadata.name}}_to_deva = HashMap::new();
{{#each character_mappings}}
{{snake_case ../metadata.name}}_to_deva.insert('{{this.source}}', '{{this.target}}');
{{/each}}
// ... template continues
}
}
# Test schema-generated converters maintain performance
cargo test --lib
# Verify all conversions work
cargo test comprehensive_bidirectional_tests
# Performance benchmarks
cargo run --example shlesha_vs_vidyut_benchmark
# Default: Schema-generated + hand-coded converters
cargo build
# Development mode with schema recompilation
cargo build --features "schema-dev"
# Minimal build (hand-coded only)
cargo build --no-default-features --features "hand-coded-only"
# All features (Python + WASM + CLI)
cargo build --features "python,wasm,cli"
let mut transliterator = Shlesha::new();
// Load additional schemas at runtime (future feature)
transliterator.load_schema("path/to/new_script.yaml")?;
// Schema registry access
let scripts = transliterator.list_supported_scripts();
println!("Dynamically loaded: {:?}", scripts);
// Track unknown characters and conversion details
let result = transliterator.transliterate_with_metadata("धर्मkr", "devanagari", "iast")?;
if let Some(metadata) = result.metadata {
println!("Conversion: {} → {}", metadata.source_script, metadata.target_script);
for unknown in metadata.unknown_tokens {
println!("Unknown '{}' at position {}", unknown.token, unknown.position);
}
}
// Schema-aware script properties
let registry = ScriptConverterRegistry::default();
// Indic scripts have implicit vowels
assert!(registry.script_has_implicit_vowels("bengali").unwrap());
assert!(registry.script_has_implicit_vowels("devanagari").unwrap());
// Roman schemes don't
assert!(!registry.script_has_implicit_vowels("itrans").unwrap());
assert!(!registry.script_has_implicit_vowels("slp1").unwrap());
// Fine-grained control over conversion paths
let hub = Hub::new();
// Direct hub operations
let iso_text = hub.deva_to_iso("धर्म")?; // Devanagari → ISO
let deva_text = hub.iso_to_deva("dharma")?; // ISO → Devanagari
// Cross-hub conversion with metadata
let result = hub.deva_to_iso_with_metadata("धर्म")?;
# Generate documentation
cargo doc --open
# Run all examples
cargo run --example shlesha_vs_vidyut_benchmark
cargo run --example roman_allocation_analysis
# Performance testing
cargo bench
Shlesha uses an automated release system for publishing to package registries:
# Guided release process
./scripts/release.sh
# Python (PyPI)
pip install shlesha
# WASM (npm)
npm install shlesha-wasm
# Rust (crates.io)
cargo add shlesha
See DEPLOYMENT.md for complete release documentation.
Contributions are welcome. The schema-driven architecture simplifies adding new scripts:
See CONTRIBUTING.md for detailed guidelines.
This project is licensed under the MIT License - see the LICENSE file for details.