| Crates.io | llm-optimizer-api-grpc |
| lib.rs | llm-optimizer-api-grpc |
| version | 0.1.1 |
| created_at | 2025-11-11 02:51:47.05044+00 |
| updated_at | 2025-11-11 02:51:47.05044+00 |
| description | High-performance gRPC API with streaming support |
| homepage | https://github.com/globalbusinessadvisors/llm-auto-optimizer |
| repository | https://github.com/globalbusinessadvisors/llm-auto-optimizer |
| max_upload_size | |
| id | 1926636 |
| size | 308,199 |
Production-ready gRPC API implementation with enterprise-grade quality for the LLM Auto Optimizer.
Optimization operations with all RPC patterns:
Configuration management:
Real-time metrics and analytics:
External integration management:
Health checks and readiness probes:
Administrative operations:
use llm_optimizer_api_grpc::{GrpcServer, GrpcServerConfig, TlsConfig};
use std::path::PathBuf;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Initialize tracing
tracing_subscriber::fmt::init();
// Configure server
let mut config = GrpcServerConfig::default();
config.addr = "0.0.0.0:50051".parse()?;
// Optional: Enable TLS
config.tls = Some(TlsConfig {
cert_path: PathBuf::from("certs/server.crt"),
key_path: PathBuf::from("certs/server.key"),
ca_cert_path: Some(PathBuf::from("certs/ca.crt")),
require_client_cert: false,
});
// Create and start server
let server = GrpcServer::new(config).await?;
server.serve().await?;
Ok(())
}
use llm_optimizer_api_grpc::proto::optimization::*;
use tonic::transport::Channel;
use tonic::metadata::MetadataValue;
use tonic::Request;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Connect to server
let channel = Channel::from_static("http://localhost:50051")
.connect()
.await?;
let mut client = optimization_service_client::OptimizationServiceClient::new(channel);
// Create optimization request
let request = CreateOptimizationRequest {
strategy: OptimizationStrategy::CostPerformanceScoring as i32,
target_services: vec!["service-1".to_string()],
changes: vec![
ConfigurationChange {
parameter: "model".to_string(),
old_value: "claude-3-opus".to_string(),
new_value: "claude-3-haiku".to_string(),
change_type: ChangeType::Replace as i32,
},
],
rationale: "Reduce costs while maintaining quality".to_string(),
expected_impact: Some(ExpectedImpact {
cost_reduction_pct: 50.0,
quality_delta_pct: -5.0,
latency_delta_pct: 10.0,
confidence: 0.85,
}),
constraints: vec![],
auto_deploy: false,
};
// Add authentication token
let mut request = Request::new(request);
let token = "your-jwt-token";
request.metadata_mut().insert(
"authorization",
MetadataValue::from_str(&format!("Bearer {}", token))?,
);
// Call service
let response = client.create_optimization(request).await?;
println!("Created optimization: {:?}", response.into_inner());
Ok(())
}
use futures::StreamExt;
// Subscribe to optimization events
let request = SubscribeOptimizationEventsRequest {
decision_ids: vec!["opt-123".to_string()],
status_filter: vec![],
};
let mut stream = client
.subscribe_optimization_events(request)
.await?
.into_inner();
// Process events as they arrive
while let Some(event) = stream.next().await {
let event = event?;
println!("Received event: {:?}", event);
}
use tokio_stream::iter;
// Create multiple optimizations in a batch
let requests = vec![
CreateOptimizationRequest { /* ... */ },
CreateOptimizationRequest { /* ... */ },
CreateOptimizationRequest { /* ... */ },
];
let stream = iter(requests);
let response = client.batch_create_optimizations(stream).await?;
println!("Created {} optimizations", response.into_inner().successful);
use tokio::sync::mpsc;
use futures::{StreamExt, SinkExt};
// Create bidirectional channel
let (tx, rx) = mpsc::channel(100);
let outbound = tokio_stream::wrappers::ReceiverStream::new(rx);
// Start session
let mut inbound = client
.optimization_session(outbound)
.await?
.into_inner();
// Send query
tx.send(OptimizationSessionMessage {
message: Some(optimization_session_message::Message::Query(
OptimizationQuery {
target_services: vec!["service-1".to_string()],
focus_areas: vec!["cost".to_string()],
context: Default::default(),
},
)),
}).await?;
// Receive suggestions
while let Some(message) = inbound.next().await {
let message = message?;
if let Some(optimization_session_message::Message::Suggestion(suggestion)) = message.message {
println!("Received suggestion: {}", suggestion.explanation);
// Send feedback
tx.send(OptimizationSessionMessage {
message: Some(optimization_session_message::Message::Feedback(
OptimizationFeedback {
suggestion_id: suggestion.suggestion_id,
accepted: true,
feedback_text: "Looks good!".to_string(),
adjustments: Default::default(),
},
)),
}).await?;
}
}
The API uses JWT tokens for authentication. Include the token in the authorization metadata:
let mut request = Request::new(your_request);
request.metadata_mut().insert(
"authorization",
MetadataValue::from_str(&format!("Bearer {}", token))?,
);
use llm_optimizer_api_grpc::{TokenManager, Permission};
let manager = TokenManager::new("your-secret", "llm-optimizer".to_string());
let token = manager.generate_token(
"user-123",
vec!["read".to_string(), "write".to_string()],
)?;
# Generate CA
openssl req -x509 -newkey rsa:4096 -days 365 -nodes \
-keyout ca.key -out ca.crt \
-subj "/CN=LLM Optimizer CA"
# Generate server certificate
openssl req -newkey rsa:4096 -nodes \
-keyout server.key -out server.csr \
-subj "/CN=localhost"
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key \
-CAcreateserial -out server.crt -days 365
Enable client certificate verification:
config.tls = Some(TlsConfig {
cert_path: PathBuf::from("certs/server.crt"),
key_path: PathBuf::from("certs/server.key"),
ca_cert_path: Some(PathBuf::from("certs/ca.crt")),
require_client_cert: true, // Enable mTLS
});
Configure rate limits per service:
use llm_optimizer_api_grpc::interceptors::RateLimitConfig;
config.rate_limit = RateLimitConfig {
requests_per_second: 100,
burst_size: 10,
per_user: true,
};
The API uses tracing for structured logging:
tracing_subscriber::fmt()
.with_env_filter("info,llm_optimizer_api_grpc=debug")
.json()
.init();
Prometheus metrics are exposed for:
OpenTelemetry integration for distributed tracing:
use opentelemetry::global;
use opentelemetry_otlp::WithExportConfig;
let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(
opentelemetry_otlp::new_exporter()
.tonic()
.with_endpoint("http://localhost:4317"),
)
.install_batch(opentelemetry_sdk::runtime::Tokio)?;
global::set_tracer_provider(tracer);
All .proto files are located in the proto/ directory:
common.proto - Common types and enumsoptimization.proto - Optimization serviceconfig.proto - Configuration servicemetrics.proto - Metrics serviceintegrations.proto - Integration servicehealth.proto - Health serviceadmin.proto - Admin serviceAll errors are mapped to appropriate gRPC status codes:
| API Error | gRPC Code |
|---|---|
| NotFound | NOT_FOUND |
| InvalidArgument | INVALID_ARGUMENT |
| PermissionDenied | PERMISSION_DENIED |
| Unauthenticated | UNAUTHENTICATED |
| RateLimitExceeded | RESOURCE_EXHAUSTED |
| Internal | INTERNAL |
Run tests:
cargo test --package llm-optimizer-api-grpc
Run with coverage:
cargo tarpaulin --package llm-optimizer-api-grpc
Benchmarks (on M1 Mac):
Apache-2.0
See the main repository CONTRIBUTING.md for guidelines.