| Crates.io | minillmlib |
| lib.rs | minillmlib |
| version | 0.2.2 |
| created_at | 2025-12-13 20:37:57.217199+00 |
| updated_at | 2026-01-24 00:14:17.642116+00 |
| description | A minimalist, async-first Rust library for LLM interactions with streaming support |
| homepage | https://github.com/qfeuilla/MiniLLMLibRS |
| repository | https://github.com/qfeuilla/MiniLLMLibRS |
| max_upload_size | |
| id | 1983387 |
| size | 356,606 |
A minimalist, async-first Rust library for LLM interactions with streaming support.
ChatNode provides tree-based conversation structure with branchingdetach(), merge(), tree iterators (depth-first, breadth-first, leaves){placeholders} in messagesAdd to your Cargo.toml:
[dependencies]
minillmlib = "0.2"
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
use minillmlib::{ChatNode, GeneratorInfo};
#[tokio::main]
async fn main() -> minillmlib::Result<()> {
// Load .env and configure logging
minillmlib::init();
// Create a generator for OpenRouter
let generator = GeneratorInfo::openrouter("google/gemini-2.0-flash-lite-001");
// Start a conversation
let root = ChatNode::root("You are a helpful assistant.");
let response = root.chat("Hello!", &generator).await?;
println!("Assistant: {}", response.text().unwrap_or_default());
Ok(())
}
Set your API key in a .env file or environment:
OPENROUTER_API_KEY=sk-or-v1-your-key-here
# Or for direct OpenAI:
OPENAI_API_KEY=sk-your-key-here
use minillmlib::{ChatNode, GeneratorInfo, CompletionParameters, NodeCompletionParameters};
let generator = GeneratorInfo::openrouter("anthropic/claude-3.5-sonnet");
let root = ChatNode::root("You are helpful.");
let user = root.add_user("What is 2+2?");
// With custom parameters
let params = NodeCompletionParameters::new()
.with_params(
CompletionParameters::new()
.with_temperature(0.0)
.with_max_tokens(100)
);
let response = user.complete(&generator, Some(¶ms)).await?;
println!("{}", response.text().unwrap());
let root = ChatNode::root("You are helpful.");
let user = root.add_user("Tell me a story.");
let mut stream = user.complete_streaming(&generator, None).await?;
while let Some(chunk) = stream.next_chunk().await {
print!("{}", chunk?.delta);
}
let root = ChatNode::root("You are helpful.");
// First turn
let response1 = root.chat("My name is Alice.", &generator).await?;
// Second turn - context is preserved
let response2 = response1.chat("What's my name?", &generator).await?;
// Response will mention "Alice"
use minillmlib::{ChatNode, GeneratorInfo, ImageData, MessageContent, Message, Role};
let generator = GeneratorInfo::openrouter("google/gemini-2.0-flash-lite-001");
let image = ImageData::from_file("./image.jpg")?;
let content = MessageContent::with_images("Describe this image.", &[image]);
let root = ChatNode::root("You are helpful.");
let user = root.add_child(ChatNode::new(Message {
role: Role::User,
content,
name: None,
tool_call_id: None,
tool_calls: None,
}));
let response = user.complete(&generator, None).await?;
use minillmlib::{AudioData, MessageContent};
let audio = AudioData::from_file("./audio.mp3")?;
let content = MessageContent::with_audio("Transcribe this audio.", &[audio]);
let params = NodeCompletionParameters::new()
.with_parse_json(true) // Enable JSON repair
.with_crash_on_refusal(true) // Retry if no valid JSON
.with_retry(3); // Number of retries
let response = user.complete(&generator, Some(¶ms)).await?;
// response.text() will contain valid, repaired JSON
let params = NodeCompletionParameters::new()
.with_retry(5)
.with_exp_back_off(true)
.with_back_off_time(1.0) // Start with 1 second
.with_max_back_off(30.0) // Max 30 seconds
.with_crash_on_empty(true); // Retry on empty responses
// Force the model to start its response with specific text
let params = NodeCompletionParameters::new()
.with_force_prepend("Score: ");
// Response will start with "Score: " followed by the model's completion
use minillmlib::{CompletionParameters, ProviderSettings};
let provider = ProviderSettings::new()
.sort_by_throughput() // or .sort_by_price()
.deny_data_collection()
.with_ignore(vec!["SambaNova".to_string()]); // Exclude providers
let params = CompletionParameters::new()
.with_provider(provider);
// Pass arbitrary parameters to the API
let params = CompletionParameters::new()
.with_extra("custom_param", serde_json::json!(42))
.with_extra("another", serde_json::json!({"nested": "value"}));
use minillmlib::{pretty_messages, format_conversation, PrettyPrintConfig};
let root = ChatNode::root("You are helpful.");
let user = root.add_user("Hello");
let assistant = user.add_assistant("Hi there!");
// Default formatting
let pretty = format_conversation(&assistant);
// Output: "SYSTEM: You are helpful.\n\nUSER: Hello\n\nASSISTANT: Hi there!"
// Custom formatting
let config = PrettyPrintConfig::new("[SYS] ", "\n[USR] ", "\n[AST] ");
let pretty = pretty_messages(&assistant, Some(&config));
use minillmlib::ChatNode;
// Create a reusable prompt template
let root = ChatNode::root("You are {bot_name}, a {style} assistant.");
root.set_format_kwarg("bot_name", "Claude");
root.set_format_kwarg("style", "helpful");
let user = root.add_user("Hi {bot_name}!");
// Get formatted messages with placeholders replaced
let formatted = user.formatted_thread();
// Messages now contain "You are Claude, a helpful assistant." etc.
use minillmlib::ChatNode;
// Build a conversation
let root = ChatNode::root("You are helpful.");
root.set_format_kwarg("name", "Alice");
let user = root.add_user("Hello {name}!");
let assistant = user.add_assistant("Hi there!");
// Save to JSON file
assistant.save_thread("conversation.json")?;
// Load from JSON file (returns root and leaf)
let (loaded_root, loaded_leaf) = ChatNode::from_thread_file("conversation.json")?;
// Or load from JSON string
let json = r#"{"prompts": [{"role": "system", "content": "Hello"}], "required_kwargs": {}}"#;
let (root, leaf) = ChatNode::from_thread_json(json)?;
use minillmlib::ChatNode;
// Navigate to root from any node
let root = some_deep_node.get_root();
// Detach a subtree
let subtree = node.detach(); // node is now a new root
// Merge trees
let merged = tree1_leaf.merge(&tree2_leaf); // tree2's root becomes child of tree1_leaf
// Iterate over tree
for node in root.iter_depth_first() {
println!("{}", node.text().unwrap_or_default());
}
// Get all leaves
let leaves = root.iter_leaves();
// Count nodes
let count = root.node_count();
use minillmlib::{ChatNode, GeneratorInfo, NodeCompletionParameters, CostInfo};
use std::sync::{Arc, Mutex};
let generator = GeneratorInfo::openrouter("google/gemini-2.0-flash-lite-001");
// Track costs across multiple requests
let total_cost = Arc::new(Mutex::new(0.0));
let cost_tracker = total_cost.clone();
let params = NodeCompletionParameters::new()
.with_openrouter_cost_tracking()
.with_cost_callback(move |info: CostInfo| {
*cost_tracker.lock().unwrap() += info.cost;
println!("Request cost: {} credits", info.cost);
println!("Tokens: {} prompt, {} completion",
info.prompt_tokens, info.completion_tokens);
});
let root = ChatNode::root("You are helpful.");
let user = root.add_user("Hello!");
let response = user.complete(&generator, Some(¶ms)).await?;
println!("Total spent: {} credits", *total_cost.lock().unwrap());
| Type | Description |
|---|---|
ChatNode |
A node in the conversation tree |
GeneratorInfo |
LLM provider configuration |
CompletionParameters |
Generation parameters (temperature, max_tokens, etc.) |
NodeCompletionParameters |
Per-request settings (retry, JSON parsing, cost tracking, etc.) |
Message |
A single message with role and content |
MessageContent |
Text or multimodal content |
ThreadData |
Serializable conversation thread with format kwargs |
CostInfo |
Cost and token usage information from completions |
CostTrackingType |
Cost tracking mode (None, OpenRouter) |
// Pre-configured providers
GeneratorInfo::openrouter(model) // OpenRouter API
GeneratorInfo::openai(model) // OpenAI API
GeneratorInfo::anthropic(model) // Anthropic API
GeneratorInfo::custom(name, url, model) // Custom endpoint
// Builder methods
.with_api_key(key)
.with_api_key_from_env("ENV_VAR")
.with_header(name, value)
.with_vision()
.with_audio()
.with_max_context(length)
.with_default_params(params)
| Parameter | Type | Default | Description |
|---|---|---|---|
max_tokens |
Option<u32> |
4096 |
Maximum tokens to generate |
temperature |
Option<f32> |
0.7 |
Sampling temperature |
top_p |
Option<f32> |
None |
Nucleus sampling |
top_k |
Option<u32> |
None |
Top-k sampling |
stop |
Option<Vec<String>> |
None |
Stop sequences |
seed |
Option<u64> |
None |
Random seed |
provider |
Option<ProviderSettings> |
None |
OpenRouter provider routing |
extra |
Option<HashMap> |
None |
Custom parameters |
| Parameter | Type | Default | Description |
|---|---|---|---|
system_prompt |
Option<String> |
None |
Override system prompt |
parse_json |
bool |
false |
Parse/repair JSON response |
force_prepend |
Option<String> |
None |
Force response prefix |
retry |
u32 |
4 |
Retry attempts |
exp_back_off |
bool |
false |
Exponential backoff |
back_off_time |
f64 |
1.0 |
Initial backoff (seconds) |
max_back_off |
f64 |
15.0 |
Max backoff (seconds) |
crash_on_refusal |
bool |
false |
Error if no JSON |
crash_on_empty_response |
bool |
false |
Error if empty |
cost_tracking |
CostTrackingType |
None |
Enable cost tracking |
cost_callback |
Option<CostCallback> |
None |
Callback for cost info |
| Parameter | Description |
|---|---|
order |
Ordered list of providers to try |
sort |
Sort by: "price", "throughput", "latency" |
ignore |
Providers to exclude |
data_collection |
"allow" or "deny" |
allow_fallbacks |
Allow fallback providers |
The library includes a CLI for JSON repair:
# Repair JSON from file
minillmlib-cli input.json
# Repair JSON from stdin
echo '{"key": "value",}' | minillmlib-cli
# Run all tests (unit + integration)
cargo test
# Run only unit tests (fast, no API calls)
cargo test --lib
# Run integration tests (requires API key)
cargo test --test integration_tests
# Run with output
cargo test -- --nocapture
MIT License - see LICENSE for details.
Contributions are welcome! Please feel free to submit a Pull Request.