| Crates.io | scirs2-cluster |
| lib.rs | scirs2-cluster |
| version | 0.1.0-beta.2 |
| created_at | 2025-04-12 19:44:11.186627+00 |
| updated_at | 2025-09-20 08:52:57.590423+00 |
| description | Clustering algorithms module for SciRS2 (scirs2-cluster) |
| homepage | |
| repository | https://github.com/cool-japan/scirs |
| max_upload_size | |
| id | 1631184 |
| size | 2,772,030 |
A comprehensive clustering module for the SciRS2 scientific computing library in Rust. This crate provides production-ready implementations of various clustering algorithms with a focus on performance, SciPy compatibility, and idiomatic Rust code.
🎯 Version 0.1.0-beta.2 is the first beta release, ready for production use with:
Vector Quantization
Hierarchical Clustering
Density-Based Clustering
Other Algorithms
Evaluation Metrics
Add this to your Cargo.toml:
[dependencies]
scirs2-cluster = "0.1.0-beta.2"
ndarray = "0.15"
To enable optimizations through the core module, add feature flags:
[dependencies]
scirs2-cluster = { version = "0.1.0-beta.2", features = ["parallel", "simd"] }
use ndarray::Array2;
use scirs2_cluster::vq::{kmeans, KMeansOptions, KMeansInit};
// Create a dataset
let data = Array2::from_shape_vec((6, 2), vec![
1.0, 2.0,
1.2, 1.8,
0.8, 1.9,
3.7, 4.2,
3.9, 3.9,
4.2, 4.1,
]).unwrap();
// Configure K-means
let options = KMeansOptions {
init_method: KMeansInit::KMeansPlusPlus,
max_iter: 300,
..Default::default()
};
// Run k-means with k=2
let (centroids, labels) = kmeans(data.view(), 2, Some(options)).unwrap();
println!("Centroids: {:?}", centroids);
println!("Cluster assignments: {:?}", labels);
use scirs2_cluster::vq::{kmeans2, MinitMethod, MissingMethod, whiten};
// Whiten the data for better clustering
let whitened_data = whiten(&data).unwrap();
// Run kmeans2 with different initialization methods
let (centroids, labels) = kmeans2(
whitened_data.view(),
3, // k clusters
Some(10), // iterations
Some(1e-4), // threshold
Some(MinitMethod::PlusPlus), // K-means++ initialization
Some(MissingMethod::Warn), // warn on empty clusters
Some(true), // check finite values
Some(42), // random seed
).unwrap();
use scirs2_cluster::vq::{minibatch_kmeans, MiniBatchKMeansOptions};
// Configure mini-batch K-means
let options = MiniBatchKMeansOptions {
batch_size: 1024,
max_iter: 100,
..Default::default()
};
// Run clustering on large dataset
let (centroids, labels) = minibatch_kmeans(large_data.view(), 5, Some(options)).unwrap();
use ndarray::Array2;
use scirs2_cluster::hierarchy::{linkage, fcluster, LinkageMethod};
// Create a dataset
let data = Array2::from_shape_vec((6, 2), vec![
1.0, 2.0,
1.2, 1.8,
0.8, 1.9,
3.7, 4.2,
3.9, 3.9,
4.2, 4.1,
]).unwrap();
// Calculate linkage matrix using Ward's method
let linkage_matrix = linkage(data.view(), LinkageMethod::Ward, None).unwrap();
// Form flat clusters by cutting the dendrogram
let num_clusters = 2;
let labels = fcluster(&linkage_matrix, num_clusters, None).unwrap();
println!("Cluster assignments: {:?}", labels);
use scirs2_cluster::metrics::{silhouette_score, davies_bouldin_score, calinski_harabasz_score};
// Evaluate clustering quality
let silhouette = silhouette_score(data.view(), labels.view()).unwrap();
let db_score = davies_bouldin_score(data.view(), labels.view()).unwrap();
let ch_score = calinski_harabasz_score(data.view(), labels.view()).unwrap();
println!("Silhouette score: {}", silhouette);
println!("Davies-Bouldin score: {}", db_score);
println!("Calinski-Harabasz score: {}", ch_score);
use ndarray::Array2;
use scirs2_cluster::density::{dbscan, labels};
// Create a dataset with clusters and noise
let data = Array2::from_shape_vec((8, 2), vec![
1.0, 2.0, // Cluster 1
1.5, 1.8, // Cluster 1
1.3, 1.9, // Cluster 1
5.0, 7.0, // Cluster 2
5.1, 6.8, // Cluster 2
5.2, 7.1, // Cluster 2
0.0, 10.0, // Noise
10.0, 0.0, // Noise
]).unwrap();
// Run DBSCAN with eps=0.8 and min_samples=2
let cluster_labels = dbscan(data.view(), 0.8, 2, None).unwrap();
// Count noise points
let noise_count = cluster_labels.iter().filter(|&&label| label == labels::NOISE).count();
println!("Cluster assignments: {:?}", cluster_labels);
println!("Number of noise points: {}", noise_count);
This project is dual-licensed under:
You can choose to use either license. See the LICENSE file for details.
Contributions are welcome! Please see the project's CONTRIBUTING.md file for guidelines.