| Crates.io | voirs-conversion |
| lib.rs | voirs-conversion |
| version | 0.1.0-alpha.1 |
| created_at | 2025-09-21 04:23:59.523097+00 |
| updated_at | 2025-09-21 04:23:59.523097+00 |
| description | Real-time voice conversion system for VoiRS |
| homepage | https://github.com/cool-japan/voirs |
| repository | https://github.com/cool-japan/voirs |
| max_upload_size | |
| id | 1848432 |
| size | 2,640,746 |
Real-time Voice Conversion and Audio Transformation System
This crate provides real-time voice conversion capabilities including speaker conversion, age/gender transformation, voice morphing, and streaming voice conversion for live applications.
use voirs_conversion::prelude::*;
#[tokio::main]
async fn main() -> Result<()> {
// Load source and target audio samples
let source_audio = load_audio("source_speaker.wav").await?;
let target_samples = vec![
load_audio("target_sample1.wav").await?,
load_audio("target_sample2.wav").await?,
];
// Create voice converter
let converter = VoiceConverter::builder()
.with_conversion_type(ConversionType::SpeakerToSpeaker)
.with_quality_mode(QualityMode::Balanced)
.build().await?;
// Define conversion target
let target = ConversionTarget::from_samples(target_samples)?;
// Perform conversion
let request = ConversionRequest::new(source_audio, target)
.with_preserve_prosody(true)
.with_preserve_emotion(true);
let result = converter.convert(request).await?;
// Save converted audio
result.save_audio("converted_voice.wav").await?;
println!("Conversion quality: {:.2}", result.quality_score);
Ok(())
}
use voirs_conversion::prelude::*;
use tokio::time::{interval, Duration};
#[tokio::main]
async fn main() -> Result<()> {
// Create real-time converter
let config = RealtimeConfig::builder()
.with_chunk_size(1024) // 64ms at 16kHz
.with_lookahead(512) // 32ms lookahead
.with_max_latency(50) // 50ms max latency
.build()?;
let converter = RealtimeConverter::new(config).await?;
// Set conversion target
let target_voice = load_target_voice("target.json").await?;
converter.set_target(target_voice).await?;
// Process audio stream
let mut audio_stream = create_audio_stream().await?;
let mut timer = interval(Duration::from_millis(64));
while let Some(audio_chunk) = audio_stream.next().await {
timer.tick().await;
// Convert audio chunk in real-time
let converted_chunk = converter
.process_chunk(audio_chunk)
.await?;
// Output converted audio
output_audio_chunk(converted_chunk).await?;
}
Ok(())
}
use voirs_conversion::transforms::*;
// Create age transformation
let age_transform = AgeTransform::builder()
.target_age(25) // Target age
.current_age(45) // Estimated current age
.naturalness(0.8) // Preserve naturalness
.build()?;
// Create gender transformation
let gender_transform = GenderTransform::builder()
.target_gender(Gender::Female)
.source_gender(Gender::Male)
.pitch_shift_method(PitchShiftMethod::PhaseVocoder)
.formant_adjustment(true)
.build()?;
// Apply transformations
let audio = load_audio("input.wav").await?;
let aged_audio = age_transform.apply(&audio).await?;
let gender_converted = gender_transform.apply(&aged_audio).await?;
use voirs_conversion::transforms::*;
// Create voice morpher
let morpher = VoiceMorpher::new();
// Define morph targets with weights
let morph_targets = vec![
(load_voice_characteristics("voice_a.json").await?, 0.4),
(load_voice_characteristics("voice_b.json").await?, 0.6),
];
// Morph voice characteristics
let target_characteristics = morpher
.morph_characteristics(&morph_targets)
.await?;
// Apply morphing to audio
let morphed_audio = morpher
.apply_morphing(&input_audio, &target_characteristics)
.await?;
use voirs_conversion::types::*;
// Speaker-to-speaker conversion
let speaker_config = ConversionConfig::builder()
.conversion_type(ConversionType::SpeakerToSpeaker)
.preserve_prosody(true)
.preserve_emotion(true)
.quality_mode(QualityMode::HighQuality)
.build()?;
// Age transformation
let age_config = ConversionConfig::builder()
.conversion_type(ConversionType::AgeTransformation {
target_age: 30,
naturalness_weight: 0.8,
})
.build()?;
// Real-time conversion
let realtime_config = ConversionConfig::builder()
.conversion_type(ConversionType::RealtimeConversion)
.max_latency_ms(50)
.chunk_size(1024)
.build()?;
use voirs_conversion::processing::*;
// Create processing pipeline
let pipeline = ProcessingPipeline::builder()
.add_stage(PreprocessingStage::NoiseReduction)
.add_stage(ConversionStage::FeatureExtraction)
.add_stage(ConversionStage::VoiceConversion)
.add_stage(PostprocessingStage::QualityEnhancement)
.with_parallel_processing(true)
.build()?;
// Configure audio buffer
let buffer_config = AudioBufferConfig {
sample_rate: 16000,
channels: 1,
chunk_size: 1024,
overlap: 256,
window_type: WindowType::Hann,
};
use voirs_conversion::cross_domain::*;
// Convert between different audio domains
let converter = CrossDomainConverter::builder()
.source_domain(AudioDomain::Telephone) // 8kHz, compressed
.target_domain(AudioDomain::Studio) // 48kHz, high quality
.with_super_resolution(true)
.with_noise_suppression(true)
.build().await?;
let enhanced_audio = converter
.convert_domain(&low_quality_audio)
.await?;
use voirs_conversion::batch::*;
// Process multiple files in batch
let batch_processor = BatchProcessor::builder()
.with_parallel_jobs(4)
.with_progress_reporting(true)
.build()?;
let batch_request = BatchConversionRequest {
input_files: vec![
"file1.wav".to_string(),
"file2.wav".to_string(),
"file3.wav".to_string(),
],
output_directory: "converted/".to_string(),
conversion_config: conversion_config,
};
let results = batch_processor
.process_batch(batch_request)
.await?;
for result in results {
println!("Processed: {} -> {} (quality: {:.2})",
result.input_file, result.output_file, result.quality_score);
}
use voirs_conversion::quality::*;
// Assess conversion quality
let assessor = ConversionQualityAssessor::new().await?;
let quality_metrics = assessor.assess(
&original_audio,
&converted_audio,
&target_characteristics
).await?;
println!("Overall quality: {:.2}", quality_metrics.overall_score);
println!("Target similarity: {:.2}", quality_metrics.target_similarity);
println!("Naturalness: {:.2}", quality_metrics.naturalness);
println!("Artifact level: {:.2}", quality_metrics.artifact_score);
| Configuration | Latency | RTF | CPU Usage | Memory |
|---|---|---|---|---|
| Low Quality | 25ms | 0.15ร | 15% | 200MB |
| Balanced | 35ms | 0.25ร | 25% | 400MB |
| High Quality | 50ms | 0.40ร | 35% | 600MB |
| Ultra Quality | 100ms | 0.60ร | 50% | 800MB |
use voirs_conversion::performance::*;
// Performance monitoring
let monitor = PerformanceMonitor::new();
// Optimize for your use case
let config = ConversionConfig::builder()
.optimization_target(OptimizationTarget::Latency) // or Quality, Throughput
.hardware_acceleration(true)
.memory_limit(2_000_000_000) // 2GB
.build()?;
// Monitor performance
monitor.start_monitoring();
let result = converter.convert(request).await?;
let metrics = monitor.get_metrics();
println!("Processing time: {:.2}s", metrics.processing_time);
println!("Memory usage: {:.1}MB", metrics.peak_memory_mb);
println!("CPU usage: {:.1}%", metrics.avg_cpu_usage);
use voirs_conversion::quality::*;
// Detect conversion artifacts
let artifact_detector = ArtifactDetector::new();
let artifacts = artifact_detector.detect(&converted_audio).await?;
for artifact in artifacts {
println!("Artifact type: {:?}", artifact.artifact_type);
println!("Severity: {:.2}", artifact.severity);
println!("Time range: {:.2}s - {:.2}s",
artifact.start_time, artifact.end_time);
}
use voirs_conversion::adaptive::*;
// Adaptive quality based on input characteristics
let adaptive_converter = AdaptiveConverter::builder()
.with_quality_threshold(0.8)
.with_automatic_adjustment(true)
.build().await?;
// Converter automatically adjusts parameters
let result = adaptive_converter.convert(request).await?;
println!("Used configuration: {:?}", result.used_config);
println!("Adaptation reason: {:?}", result.adaptation_reason);
# Run voice conversion tests
cargo test --package voirs-conversion
# Run real-time processing tests
cargo test --package voirs-conversion realtime
# Run transformation tests
cargo test --package voirs-conversion transforms
# Run quality assessment tests
cargo test --package voirs-conversion quality
# Run performance benchmarks
cargo bench --package voirs-conversion
use voirs_conversion::cloning::*;
// Integration with voice cloning
let cloning_adapter = CloningIntegrationAdapter::new();
let cloned_voice = cloning_adapter
.adapt_cloned_voice(&cloned_voice_model)
.await?;
let converter = VoiceConverter::builder()
.with_cloned_voice_target(cloned_voice)
.build().await?;
use voirs_conversion::acoustic::*;
// Direct acoustic model integration
let acoustic_converter = AcousticModelConverter::new();
let converted_features = acoustic_converter
.convert_acoustic_features(&features, &target_speaker)
.await?;
See the examples/ directory for comprehensive usage examples:
voice_conversion_example.rs - Basic conversionrealtime_conversion.rs - Streaming conversionbatch_conversion.rs - Batch processingage_gender_transform.rs - Transformation effectsLicensed under either of Apache License 2.0 or MIT License at your option.
Part of the VoiRS neural speech synthesis ecosystem.