| Crates.io | lnmp-codec |
| lib.rs | lnmp-codec |
| version | 0.5.16 |
| created_at | 2025-11-19 11:12:53.604262+00 |
| updated_at | 2025-12-19 10:12:32.536333+00 |
| description | Parser and encoder for LNMP (LLM Native Minimal Protocol) text format |
| homepage | |
| repository | https://github.com/lnmplang/lnmp-protocol |
| max_upload_size | |
| id | 1939911 |
| size | 1,185,740 |
Parser and encoder implementations for LNMP (LLM Native Minimal Protocol) v0.5 text format and binary format.
FID Registry: All examples in this documentation use official Field IDs from
registry/fids.yaml.
Maintainers note: when shipping codec changes, remember to run the workspace Release workflow (see
CONTRIBUTING.md) so the meta crate picks up the new version.
:i, :f, :b, :s, :sa, :r, :ra)IntArray, FloatArray, BoolArray handling in parsing/encodingLnmpProfile (Loose, Standard, Strict) for validation and canonical enforcementlnmp-sfe dictionaries during parse/encode to map values to canonical equivalentslnmp-sanitize for LLM-facing inputsparse(encode(parse(x))) == parse(encode(x))use lnmp_codec::{Parser, Encoder};
// Parse LNMP text
let input = "F12=14532\nF7=1\nF23=[admin,dev]";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
// Encode to canonical format
let encoder = Encoder::new();
let output = encoder.encode(&record);
// Output: F7=1\nF12=14532\nF23=[admin,dev] (sorted by FID)
use lnmp_codec::{Parser, Encoder};
use lnmp_sfe::SemanticDictionary;
// Build a dictionary: map Admin/ADMIN -> admin for field 23
let mut dict = SemanticDictionary::new();
dict.add_equivalence(23, "Admin".to_string(), "admin".to_string());
// Parse with dictionary (applies equivalence during parse)
let mut parser = Parser::with_config(
"F23=[Admin]",
lnmp_codec::config::ParserConfig {
semantic_dictionary: Some(dict.clone()),
..Default::default()
},
)
.unwrap();
let record = parser.parse_record().unwrap();
// Encode with the same dictionary (ensures canonical output)
let encoder = Encoder::with_config(
lnmp_codec::config::EncoderConfig::new().with_semantic_dictionary(dict),
);
let output = encoder.encode(&record);
assert_eq!(output, "F23=[admin]");
use lnmp_codec::binary::{BinaryEncoder, BinaryDecoder};
// Encode text to binary
let text = "F7=1\nF12=14532\nF23=[admin,dev]";
let encoder = BinaryEncoder::new();
let binary = encoder.encode_text(text).unwrap();
// Decode binary to text
let decoder = BinaryDecoder::new();
let decoded_text = decoder.decode_to_text(&binary).unwrap();
// Output: F7=1\nF12=14532\nF23=[admin,dev] (canonical format)
// Round-trip conversion maintains data integrity
assert_eq!(text, decoded_text);
use lnmp_codec::{Parser, Encoder};
// Parse nested record (F70=nested_data from registry)
let input = "F70={F12=1;F7=1}";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
// Parse record array (F71=record_list from registry)
let input = "F71=[{F20=alice},{F20=bob}]";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
// Encode with checksums
use lnmp_codec::EncoderConfig;
let config = EncoderConfig {
enable_checksums: true,
..Default::default()
};
let encoder = Encoder::with_config(config);
let output = encoder.encode(&record);
// Output: F12=14532#36AAE667 (with checksum)
use lnmp_codec::{Parser, TextInputMode, ParsingMode};
use lnmp_codec::binary::BinaryEncoder;
let messy = r#"F20=hello "world"; F40 = 3.14;F30=00042"#;
// Parser profile geared for LLM output
let mut parser = Parser::with_config(
messy,
lnmp_codec::config::ParserConfig {
text_input_mode: TextInputMode::Lenient,
mode: ParsingMode::Loose,
normalize_values: true,
..Default::default()
},
).unwrap();
let record = parser.parse_record().unwrap();
// Binary encoder also provides lenient/strict helpers
let encoder = BinaryEncoder::new();
let bytes = encoder.encode_text_llm_profile(messy).unwrap();
// For M2M strict flows use `Parser::new_strict` or `encode_text_strict_profile`.
Fields are always sorted by FID, ensuring consistent output:
let mut record = LnmpRecord::new();
record.add_field(LnmpField { fid: 30, value: LnmpValue::Int(3) }); // F30=count
record.add_field(LnmpField { fid: 7, value: LnmpValue::Int(1) }); // F7=is_active
record.add_field(LnmpField { fid: 12, value: LnmpValue::Int(2) }); // F12=user_id
let encoder = Encoder::new();
let output = encoder.encode(&record);
// Output: F7=1\nF12=2\nF30=3 (sorted by FID)
[...]
Note: Text parsing now honors typed hints—
:ia,:fa, and:baforce integer, float, and boolean arrays respectively. Without a hint, the parser treats the array asStringArray, mirroring the binary encoder/decoder semantics.
Optional type annotations for explicit typing:
use lnmp_codec::{Encoder, EncoderConfig};
let config = EncoderConfig {
include_type_hints: true,
canonical: true,
};
let encoder = Encoder::with_config(config);
let output = encoder.encode(&record);
// Output: F12:i=14532\nF5:f=3.14\nF7:b=1
use lnmp_codec::{Parser, ParsingMode};
// Loose mode (default): accepts format variations
let mut parser = Parser::new("F20=test;F7=1").unwrap(); // Unsorted, semicolons OK
// Strict mode: requires canonical format
let mut parser = Parser::with_mode("F7=1\nF20=test", ParsingMode::Strict).unwrap();
// Strict input mode (no sanitizer)
let mut strict_input_parser = Parser::new_strict("F7=1\nF20=test").unwrap();
Parse and encode hierarchical data:
use lnmp_codec::{Parser, Encoder};
// Nested record: F70={F12=1;F7=1} (F70=nested_data)
let input = "F70={F12=1;F7=1}";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
// Record array: F71=[{F20=alice},{F20=bob}] (F71=record_list, F20=name)
let input = "F71=[{F20=alice},{F20=bob}]";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
// Deep nesting (F70=nested_data)
let input = "F70={F20=user;F70={F30=nested;F31=data}}";
let mut parser = Parser::new(input).unwrap();
let record = parser.parse_record().unwrap();
tests/compliance/rust contains the cross-language suite for strict flows.tests/compliance/rust/test-cases-lenient.yaml mirrors the shared sanitizer behavior (auto-quote, comment trimming, nested repairs).cargo test -p lnmp-codec --tests test-driver -- --nocapture to execute both strict and lenient suites.The lenient path uses the lnmp-sanitize crate under the hood so SDKs (Rust/TS/Go/Python) can apply identical repair logic before calling strict parsers.
| Profile | Parser Config | Binary Encoder | Intended Use |
|---|---|---|---|
| LLM-facing | text_input_mode = Lenient, mode = ParsingMode::Loose, normalize_values = true |
encode_text_llm_profile |
Repair user/LLM text before strict parsing |
| M2M strict | Parser::new_strict() or ParserConfig { text_input_mode = Strict, mode = ParsingMode::Strict } |
encode_text_strict_profile |
Deterministic machine-to-machine pipelines |
Parser::new_lenient, Parser::new_strict, binary profile methods).LLMProfile (Lenient+Loose) for agent/model traffic and M2MProfile (Strict+Strict) for canonical pipelines.lnmp-sanitize, ensuring identical repairs across languages.Nested Structure Rules:
{...} syntax with semicolon separators[{...},{...}] syntaxEnable checksums for drift prevention:
use lnmp_codec::{Encoder, EncoderConfig};
let config = EncoderConfig {
enable_checksums: true,
..Default::default()
};
let encoder = Encoder::with_config(config);
let output = encoder.encode(&record);
// Output: F12:i=14532#36AAE667
// Parse and validate checksums
use lnmp_codec::{Parser, ParserConfig};
let config = ParserConfig {
validate_checksums: true,
..Default::default()
};
let mut parser = Parser::with_config(input, config).unwrap();
let record = parser.parse_record().unwrap(); // Validates checksums
Canonical value transformations:
use lnmp_codec::{ValueNormalizer, NormalizationConfig};
let config = NormalizationConfig {
string_case: StringCaseRule::Lower,
remove_trailing_zeros: true,
..Default::default()
};
let normalizer = ValueNormalizer::new(config);
// Normalizes: true → 1, -0.0 → 0.0, 3.140 → 3.14
let normalized = normalizer.normalize(&value);
Synonym recognition:
use lnmp_codec::EquivalenceMapper;
let mut mapper = EquivalenceMapper::new();
mapper.add_mapping(7, "yes".to_string(), "1".to_string());
mapper.add_mapping(7, "true".to_string(), "1".to_string());
// Maps "yes" → "1" for field 7
let canonical = mapper.map(7, "yes"); // Some("1")
v0.3 canonical format:
#XXXXXXXX when enabledpub struct EncoderConfig {
pub canonical: bool, // Use canonical format
pub include_type_hints: bool, // Add type hints
pub enable_checksums: bool, // Append SC32 checksums (v0.3)
pub normalization_config: Option<NormalizationConfig>, // Value normalization (v0.3)
pub equivalence_mapper: Option<EquivalenceMapper>, // Synonym mapping (v0.3)
}
pub struct ParserConfig {
pub mode: ParsingMode, // Strict or Loose
pub validate_checksums: bool, // Validate SC32 checksums (v0.3)
pub equivalence_mapper: Option<EquivalenceMapper>, // Synonym mapping (v0.3)
}
pub struct NormalizationConfig {
pub string_case: StringCaseRule, // Lower, Upper, None
pub float_precision: Option<usize>, // Decimal places
pub remove_trailing_zeros: bool, // Remove trailing zeros
}
v0.3 is backward compatible with v0.2. New features:
| Feature | v0.2 | v0.3 |
|---|---|---|
| Nested structures | Not supported | Supported |
| Checksums | Not supported | Optional SC32 |
| Value normalization | Not supported | Configurable |
| Equivalence mapping | Not supported | Configurable |
| Type hints | :i, :f, :b, :s, :sa |
+ :r, :ra |
NestedRecord and NestedArray variants// v0.2 code (still works)
let encoder = Encoder::new();
// v0.3 code with new features
let config = EncoderConfig {
enable_checksums: true,
normalization_config: Some(NormalizationConfig::default()),
..Default::default()
};
let encoder = Encoder::with_config(config);
┌─────────┬─────────┬─────────────┬──────────────────────┐
│ VERSION │ FLAGS │ ENTRY_COUNT │ ENTRIES... │
│ (1 byte)│(1 byte) │ (VarInt) │ (variable) │
└─────────┴─────────┴─────────────┴──────────────────────┘
Each entry contains:
┌──────────┬──────────┬──────────────────┐
│ FID │ THTAG │ VALUE │
│ (2 bytes)│ (1 byte) │ (variable) │
└──────────┴──────────┴──────────────────┘
use lnmp_codec::binary::BinaryEncoder;
use lnmp_core::{LnmpRecord, LnmpField, LnmpValue};
let mut record = LnmpRecord::new();
record.add_field(LnmpField {
fid: 7,
value: LnmpValue::Bool(true),
});
record.add_field(LnmpField {
fid: 12,
value: LnmpValue::Int(14532),
});
let encoder = BinaryEncoder::new();
let binary = encoder.encode(&record).unwrap();
// Binary format: [0x04, 0x00, 0x02, ...] (version, flags, entry count, entries)
use lnmp_codec::binary::{BinaryEncoder, BinaryDecoder, EncoderConfig, DecoderConfig};
// Encoder configuration
let encoder_config = EncoderConfig::new()
.with_validate_canonical(true)
.with_sort_fields(true);
let encoder = BinaryEncoder::with_config(encoder_config);
// Decoder configuration
let decoder_config = DecoderConfig::new()
.with_validate_ordering(true) // Enforce canonical field order
.with_strict_parsing(true); // Detect trailing data
let decoder = BinaryDecoder::with_config(decoder_config);
The decode_view() API enables zero-copy parsing for high-throughput scenarios like routing, filtering, and logging. Instead of allocating owned values, it borrows directly from the input buffer.
| Use Case | API | Reason |
|---|---|---|
| Routing/Filtering | decode_view() |
Decision based on field values without full parse |
| Logging/Monitoring | decode_view() |
Extract trace ID, timestamps without allocation |
| Proxying/Forwarding | decode_view() |
Inspect headers, forward payload unchanged |
| Processing/Storage | decode() |
Need to mutate, persist, or own the data |
Based on cargo bench --bench zero_copy_bench (v0.5.15):
| Payload Type | decode() |
decode_view() |
Speedup |
|---|---|---|---|
| Small (3 fields, ~22 bytes) | ~248 ns | ~92 ns | 2.70x |
| Medium (strings + ints, ~100 bytes) | ~610 ns | ~164 ns | 3.71x |
| Large (10KB string + embedding) | ~18.3 μs | ~2.1 μs | 8.91x |
Throughput Comparison:
| Scenario | Standard | Zero-Copy | Improvement |
|---|---|---|---|
| Small records | 0.09 GiB/s | 0.24 GiB/s | +170% |
| Medium records | 0.16 GiB/s | 0.59 GiB/s | +271% |
| Large records | 0.12 GiB/s | 1.04 GiB/s | +791% |
Batch Processing (1000 records):
use lnmp_codec::binary::BinaryDecoder;
use lnmp_core::LnmpValueView;
let decoder = BinaryDecoder::new();
let bytes = vec![...]; // From network/file
// Create zero-copy view (borrows from 'bytes')
let view = decoder.decode_view(&bytes).unwrap();
// Access fields without allocation
for field in view.fields() {
match &field.value {
LnmpValueView::String(s) => {
println!("String: {}", s); // s is &str (borrowed!)
}
LnmpValueView::Embedding(raw) => {
println!("Embedding: {} bytes", raw.len()); // raw is &[u8]
}
_ => {}
}
}
use lnmp_codec::binary::BinaryDecoder;
use lnmp_core::LnmpValueView;
let decoder = BinaryDecoder::new();
// Process 1M+ messages/sec without allocation
for payload in incoming_messages {
let view = decoder.decode_view(&payload)?;
// Zero-copy field inspection
if let Some(field) = view.get_field(50) { // F50: status
match &field.value {
LnmpValueView::String(status) if *status == "critical" => {
route_to_llm(&payload)?;
}
_ => route_locally(&payload)?
}
}
}
use lnmp_codec::binary::BinaryDecoder;
use lnmp_core::LnmpValueView;
fn extract_trace_id(bytes: &[u8]) -> Option<&str> {
let decoder = BinaryDecoder::new();
let view = decoder.decode_view(bytes).ok()?;
// Zero-copy trace ID access (F80)
if let Some(field) = view.get_field(80) {
if let LnmpValueView::String(trace_id) = &field.value {
return Some(trace_id); // Returns &str (zero-copy!)
}
}
None
}
// Usage in HTTP middleware
let trace_id = extract_trace_id(&request_body)?;
println!("Trace-ID: {}", trace_id); // No allocation
| Type | Zero-Copy | Notes |
|---|---|---|
| String | ✅ Full | Returns &str borrowed from input |
| StringArray | ✅ Full | Returns Vec<&str> (only refs allocated) |
| Embedding | ✅ Lazy | Returns &[u8] raw bytes (parse on demand) |
| Int/Float/Bool | ✅ Natural | Scalars copied (4-8 bytes, negligible) |
| IntArray | ❌ Allocates | VarInt encoding requires parse → Vec<i64> |
| FloatArray | ❌ Allocates | VarInt length + values → Vec<f64> |
| BoolArray | ❌ Allocates | Byte-per-bool → Vec<bool> |
| Nested | ⚠️ Partial | Currently allocates (future: zero-copy traversal) |
Why IntArray allocates:
VarInt encoding stores integers as variable-length sequences. To access [1, 2, 3], the decoder must parse each VarInt, which requires allocation. Future versions will support packed (fixed-width) arrays for zero-copy access.
See examples/zero_copy_routing.rs for a complete routing example with benchmarks.
cargo run -p lnmp-codec --example zero_copy_routing
Expected Output:
=== Zero-Copy Routing Demo ===
Critical message → ROUTE_TO_LLM
Normal message → ROUTE_LOCALLY
=== Performance (100k iterations) ===
Standard decode: 210ms (2.10 μs/iter)
Zero-copy view: 90ms (0.90 μs/iter)
Speedup: 2.33x faster
Before (Standard decode):
let record = decoder.decode(&bytes)?;
if let Some(field) = record.get_field(50) {
match &field.value {
LnmpValue::String(s) => process(s), // s is String (owned)
_ => {}
}
}
After (Zero-copy view):
let view = decoder.decode_view(&bytes)?;
if let Some(field) = view.get_field(50) {
match &field.value {
LnmpValueView::String(s) => process(s), // s is &str (borrowed)
_ => {}
}
}
Key Difference:
LnmpValue::String(String) → LnmpValueView::String(&str)s.to_string()Use decode_view() for maximum throughput. Use decode() when you need to own/mutate the data.
Query and synchronize FID registries between peers at runtime:
use lnmp_codec::binary::{SchemaNegotiator, FidDefinition, FidDefStatus, TypeTag};
// Create registry-aware negotiator
let mut negotiator = SchemaNegotiator::v0_5()
.with_registry_version("1.0.0".into());
// Request peer's FID registry
let request = negotiator.request_registry(None);
// Handle registry response
let fids = vec![
FidDefinition {
fid: 12,
name: "user_id".into(),
type_tag: TypeTag::Int,
status: FidDefStatus::Active,
since: "0.1.0".into(),
},
];
negotiator.handle_registry_response("1.0.0".into(), fids);
// Check FID support
if negotiator.peer_supports_fid(12) {
println!("Peer understands user_id field");
}
New Message Types:
RequestRegistry - Query peer FID definitionsRegistryResponse - Full registry responseRegistryDelta - Incremental syncValidate fields before encoding:
use lnmp_codec::{Encoder, EncoderConfig};
use lnmp_core::registry::{embedded_registry, ValidationMode};
let config = EncoderConfig::new()
.with_fid_registry(embedded_registry())
.with_fid_validation_mode(ValidationMode::Error);
let encoder = Encoder::with_config(config);
let result = encoder.encode_validated(&record); // Returns error on invalid FID
This crate includes several examples in the examples/ directory:
Run examples with:
cargo run --example parse_simple -p lnmp-codec
cargo run --example encode_with_hints -p lnmp-codec
See the root examples/ directory for integration examples and v0.4 binary format demos.
MIT OR Apache-2.0