| Crates.io | swe-bench-adapter |
| lib.rs | swe-bench-adapter |
| version | 0.2.0 |
| created_at | 2025-06-30 16:46:37.490328+00 |
| updated_at | 2025-06-30 16:46:37.490328+00 |
| description | SWE-Bench adapter for ruv-swarm orchestration system |
| homepage | |
| repository | https://github.com/ruv-fann/ruv-swarm |
| max_upload_size | |
| id | 1732095 |
| size | 479,489 |
A high-performance adapter for integrating SWE-Bench evaluation with the ruv-swarm orchestration system and Claude Code CLI.
The adapter consists of five main components:
loader.rs)prompts.rs)evaluation.rs)benchmarking.rs)stream_parser.rs)use swe_bench_adapter::{SWEBenchAdapter, SWEBenchConfig};
use ruv_swarm_agents::Agent;
#[tokio::main]
async fn main() -> Result<()> {
// Initialize adapter
let config = SWEBenchConfig::default();
let adapter = SWEBenchAdapter::new(config).await?;
// Create an agent
let agent = Agent::new(
AgentId::new(),
"claude-coder",
vec![AgentCapability::CodeGeneration],
);
// Evaluate a single instance
let report = adapter.evaluate_instance("django-12345", &agent).await?;
println!("Evaluation completed: {}", report.passed);
println!("Execution time: {:?}", report.execution_time);
Ok(())
}
// Evaluate multiple instances in parallel
let instance_ids = vec![
"django-12345".to_string(),
"flask-67890".to_string(),
"numpy-11111".to_string(),
];
let agents = vec![agent1, agent2, agent3];
let batch_report = adapter.evaluate_batch(
instance_ids,
agents,
true, // parallel execution
).await?;
println!("Success rate: {:.2}%", batch_report.success_rate * 100.0);
use swe_bench_adapter::{ClaudePromptGenerator, PromptConfig};
let config = PromptConfig {
max_tokens: 6000,
include_test_hints: true,
include_context_files: true,
template_style: PromptStyle::Expert,
};
let generator = ClaudePromptGenerator::new(config);
let prompt = generator.generate_prompt(&instance)?;
println!("Generated prompt ({} tokens):\n{}", prompt.token_count, prompt.content);
use swe_bench_adapter::stream_parser::{MetricsCollector, StreamAnalyzer};
// Single stream collection
let mut collector = MetricsCollector::new();
let metrics = collector.parse_stream(&claude_output)?;
println!("Tool calls: {}", metrics.tool_calls);
println!("File operations: {:?}", metrics.file_operations);
// Multi-stream analysis
let mut analyzer = StreamAnalyzer::new();
let mut rx = analyzer.add_stream("claude-1".to_string());
// Process output as it arrives
analyzer.process("claude-1", &output_chunk)?;
// Get global summary
let summary = analyzer.get_global_summary();
println!("Active streams: {}", summary.active_streams);
let config = SWEBenchConfig {
instances_path: PathBuf::from("./swe-bench-instances"),
memory_path: PathBuf::from("./swe-bench-memory"),
prompt_config: PromptConfig {
max_tokens: 4000,
include_test_hints: true,
include_context_files: true,
template_style: PromptStyle::ClaudeCode,
},
eval_config: EvalConfig {
timeout: Duration::from_secs(300),
test_command: "pytest".to_string(),
sandbox_enabled: true,
max_retries: 3,
},
benchmark_config: BenchmarkConfig {
iterations: 10,
warm_up: 3,
measure_memory: true,
profile_enabled: false,
},
};
The adapter is optimized for high-throughput evaluation:
Benchmark results on standard hardware:
The adapter generates prompts specifically optimized for Claude Code CLI:
The adapter provides comprehensive error handling:
Contributions are welcome! Please ensure:
cargo testcargo benchcargo fmtcargo clippyThis project is licensed under the MIT OR Apache-2.0 license.