| Crates.io | tower-resilience-bulkhead |
| lib.rs | tower-resilience-bulkhead |
| version | 0.5.2 |
| created_at | 2025-10-08 20:49:00.97308+00 |
| updated_at | 2026-01-02 19:47:23.816561+00 |
| description | Bulkhead pattern for Tower services - resource isolation and concurrency limiting |
| homepage | |
| repository | https://github.com/joshrotenberg/tower-resilience |
| max_upload_size | |
| id | 1874572 |
| size | 75,280 |
A comprehensive resilience and fault-tolerance toolkit for Tower services, inspired by Resilience4j.
Resilience patterns for Tower services, inspired by Resilience4j. Includes circuit breaker, bulkhead, retry with backoff, rate limiting, and more.
[dependencies]
tower-resilience = "0.4"
tower = "0.5"
use tower::ServiceBuilder;
use tower_resilience::prelude::*;
let circuit_breaker = CircuitBreakerLayer::builder()
.failure_rate_threshold(0.5)
.build();
let service = ServiceBuilder::new()
.layer(circuit_breaker.for_request::<()>())
.layer(BulkheadLayer::builder()
.max_concurrent_calls(10)
.build())
.service(my_service);
Note: Use
for_request::<T>()with the request typeTyour service handles so the circuit breaker can plug intoServiceBuilder. Thelayer.layer(service)method still works when you need direct control over the service value.
Prevent cascading failures by opening the circuit when error rate exceeds threshold:
use tower_resilience_circuitbreaker::CircuitBreakerLayer;
use std::time::Duration;
let layer = CircuitBreakerLayer::<String, ()>::builder()
.name("api-circuit")
.failure_rate_threshold(0.5) // Open at 50% failure rate
.sliding_window_size(100) // Track last 100 calls
.wait_duration_in_open(Duration::from_secs(60)) // Stay open 60s
.on_state_transition(|from, to| {
println!("Circuit breaker: {:?} -> {:?}", from, to);
})
.build();
let service = layer.layer(my_service);
Full examples: circuitbreaker.rs | circuitbreaker_fallback.rs | circuitbreaker_health_check.rs
Limit concurrent requests to prevent resource exhaustion:
use tower_resilience_bulkhead::BulkheadLayer;
use std::time::Duration;
let layer = BulkheadLayer::builder()
.name("worker-pool")
.max_concurrent_calls(10) // Max 10 concurrent
.max_wait_duration(Some(Duration::from_secs(5))) // Wait up to 5s
.on_call_permitted(|concurrent| {
println!("Request permitted (concurrent: {})", concurrent);
})
.on_call_rejected(|max| {
println!("Request rejected (max: {})", max);
})
.build();
let service = layer.layer(my_service);
Full examples: bulkhead.rs | bulkhead_advanced.rs
Enforce timeouts on operations with configurable cancellation:
use tower_resilience_timelimiter::TimeLimiterLayer;
use std::time::Duration;
let layer = TimeLimiterLayer::builder()
.timeout_duration(Duration::from_secs(30))
.cancel_running_future(true) // Cancel on timeout
.on_timeout(|| {
println!("Operation timed out!");
})
.build();
let service = layer.layer(my_service);
Full examples: timelimiter.rs | timelimiter_example.rs
Retry failed requests with exponential backoff and jitter:
use tower_resilience_retry::RetryLayer;
use std::time::Duration;
let layer = RetryLayer::<(), MyError>::builder()
.max_attempts(5)
.exponential_backoff(Duration::from_millis(100))
.on_retry(|attempt, delay| {
println!("Retrying (attempt {}, delay {:?})", attempt, delay);
})
.on_success(|attempts| {
println!("Success after {} attempts", attempts);
})
.build();
let service = layer.layer(my_service);
Full examples: retry.rs | retry_example.rs
Control request rate to protect downstream services:
use tower_resilience_ratelimiter::RateLimiterLayer;
use std::time::Duration;
let layer = RateLimiterLayer::builder()
.limit_for_period(100) // 100 requests
.refresh_period(Duration::from_secs(1)) // per second
.timeout_duration(Duration::from_millis(500)) // Wait up to 500ms
.on_permit_acquired(|wait| {
println!("Request permitted (waited {:?})", wait);
})
.build();
let service = layer.layer(my_service);
Full examples: ratelimiter.rs | ratelimiter_example.rs
Cache responses to reduce load on expensive operations:
use tower_resilience_cache::{CacheLayer, EvictionPolicy};
use std::time::Duration;
let layer = CacheLayer::builder()
.max_size(1000)
.ttl(Duration::from_secs(300)) // 5 minute TTL
.eviction_policy(EvictionPolicy::Lru) // LRU, LFU, or FIFO
.key_extractor(|req: &Request| req.id.clone())
.on_hit(|| println!("Cache hit!"))
.on_miss(|| println!("Cache miss"))
.build();
let service = layer.layer(my_service);
Full examples: cache.rs | cache_example.rs
Provide fallback responses when the primary service fails:
use tower_resilience_fallback::FallbackLayer;
// Return a static fallback value on error
let layer = FallbackLayer::<Request, Response, MyError>::value(
Response::default()
);
// Or compute fallback from the error
let layer = FallbackLayer::<Request, Response, MyError>::from_error(|err| {
Response::error_response(err)
});
// Or use a backup service
let layer = FallbackLayer::<Request, Response, MyError>::service(|req| async {
backup_service.call(req).await
});
let service = layer.layer(primary_service);
Reduce tail latency by firing backup requests after a delay:
use tower_resilience_hedge::HedgeLayer;
use std::time::Duration;
// Fire a hedge request if primary takes > 100ms
let layer = HedgeLayer::builder()
.delay(Duration::from_millis(100))
.max_hedged_attempts(2)
.build();
// Or fire all requests in parallel (no delay)
let layer = HedgeLayer::<(), String, MyError>::builder()
.no_delay()
.max_hedged_attempts(3)
.build();
let service = layer.layer(my_service);
Automatically reconnect on connection failures with configurable backoff:
use tower_resilience_reconnect::{ReconnectLayer, ReconnectConfig, ReconnectPolicy};
use std::time::Duration;
let layer = ReconnectLayer::new(
ReconnectConfig::builder()
.policy(ReconnectPolicy::exponential(
Duration::from_millis(100), // Start at 100ms
Duration::from_secs(5), // Max 5 seconds
))
.max_attempts(10)
.retry_on_reconnect(true) // Retry request after reconnecting
.connection_errors_only() // Only reconnect on connection errors
.on_state_change(|from, to| {
println!("Connection: {:?} -> {:?}", from, to);
})
.build()
);
let service = layer.layer(my_service);
Full examples: reconnect.rs | reconnect_basic.rs | reconnect_custom_policy.rs
Proactive health monitoring with intelligent resource selection:
use tower_resilience_healthcheck::{HealthCheckWrapper, HealthStatus, SelectionStrategy};
use std::time::Duration;
// Create wrapper with multiple resources
let wrapper = HealthCheckWrapper::builder()
.with_context(primary_db, "primary")
.with_context(secondary_db, "secondary")
.with_checker(|db| async move {
match db.ping().await {
Ok(_) => HealthStatus::Healthy,
Err(_) => HealthStatus::Unhealthy,
}
})
.with_interval(Duration::from_secs(5))
.with_selection_strategy(SelectionStrategy::RoundRobin)
.build();
// Start background health checking
wrapper.start().await;
// Get a healthy resource
if let Some(db) = wrapper.get_healthy().await {
// Use healthy database
}
Note: Health Check is not a Tower layer - it's a wrapper pattern for managing multiple resources with automatic failover.
Full examples: basic.rs
Deduplicate concurrent identical requests (singleflight pattern):
use tower_resilience_coalesce::CoalesceLayer;
use tower::ServiceBuilder;
// Coalesce by request ID - concurrent requests for same ID share one execution
let layer = CoalesceLayer::new(|req: &Request| req.id.clone());
let service = ServiceBuilder::new()
.layer(layer)
.service(my_service);
// Use with cache to prevent stampede on cache miss
let service = ServiceBuilder::new()
.layer(cache_layer) // Check cache first
.layer(coalesce_layer) // Coalesce cache misses
.service(backend);
Use cases:
Note: Response and error types must implement Clone to be shared with all waiters.
Delegate request processing to dedicated executors for parallel execution:
use tower_resilience_executor::ExecutorLayer;
use tower::ServiceBuilder;
// Use a dedicated runtime for CPU-heavy work
let compute_runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(8)
.thread_name("compute")
.build()
.unwrap();
let layer = ExecutorLayer::new(compute_runtime.handle().clone());
// Or use the current runtime
let layer = ExecutorLayer::current();
let service = ServiceBuilder::new()
.layer(layer)
.service(my_service);
Use cases:
Dynamically adjust concurrency limits based on observed latency and error rates:
use tower_resilience_adaptive::{AdaptiveLimiterLayer, Aimd, Vegas};
use tower::ServiceBuilder;
use std::time::Duration;
// AIMD: Classic TCP-style congestion control
// Increases limit on success, decreases on failure/high latency
let layer = AdaptiveLimiterLayer::new(
Aimd::builder()
.initial_limit(10)
.min_limit(1)
.max_limit(100)
.increase_by(1) // Add 1 on success
.decrease_factor(0.5) // Halve on failure
.latency_threshold(Duration::from_millis(100))
.build()
);
// Vegas: More stable, uses RTT to estimate queue depth
let layer = AdaptiveLimiterLayer::new(
Vegas::builder()
.initial_limit(10)
.alpha(3) // Increase when queue < 3
.beta(6) // Decrease when queue > 6
.build()
);
let service = ServiceBuilder::new()
.layer(layer)
.service(my_service);
Use cases:
Full examples: adaptive.rs
Inject failures and latency to test your resilience patterns:
use tower_resilience_chaos::ChaosLayer;
use std::time::Duration;
let chaos = ChaosLayer::<String, std::io::Error>::builder()
.name("test-chaos")
.error_rate(0.1) // 10% of requests fail
.error_fn(|_req| std::io::Error::new(
std::io::ErrorKind::Other, "chaos!"
))
.latency_rate(0.2) // 20% delayed
.min_latency(Duration::from_millis(50))
.max_latency(Duration::from_millis(200))
.seed(42) // Deterministic chaos
.build();
let service = chaos.layer(my_service);
WARNING: Only use in development/testing environments. Never in production.
Full examples: chaos.rs | chaos_example.rs
ResilienceError<E> provides a unified error type for composed layers:
use tower_resilience_core::ResilienceError;
type ServiceError = ResilienceError<AppError>;
let service = ServiceBuilder::new()
.layer(timeout_layer)
.layer(circuit_breaker.for_request::<()>())
.layer(bulkhead)
.service(my_service);
// Check error types
if err.is_timeout() { /* ... */ }
if err.is_rate_limited() { /* ... */ }
Stack multiple patterns for comprehensive resilience:
use tower::ServiceBuilder;
// Client-side: timeout -> circuit breaker -> retry
let client = ServiceBuilder::new()
.layer(timeout_layer)
.layer(circuit_breaker_layer.for_request::<()>())
.layer(retry_layer)
.service(http_client);
// Server-side: rate limit -> bulkhead -> timeout
let server = ServiceBuilder::new()
.layer(rate_limiter_layer)
.layer(bulkhead_layer)
.layer(timeout_layer)
.service(handler);
For comprehensive guidance on composing patterns effectively, see:
Happy path overhead (no failures triggered):
| Pattern | Overhead |
|---|---|
| Retry (no retries) | ~80-100 ns |
| Time Limiter | ~107 ns |
| Rate Limiter | ~124 ns |
| Bulkhead | ~162 ns |
| Cache (hit) | ~250 ns |
| Circuit Breaker (closed) | ~298 ns |
cargo bench --bench happy_path_overhead
cargo run --example circuitbreaker
cargo run --example bulkhead
cargo run --example retry
See examples/ for more.
cargo test --test stress -- --ignored
1.64.0 (matches Tower)
Licensed under either of:
at your option.
Contributions are welcome! Please see the contributing guidelines for more information.