Crates.io | batch-processing |
lib.rs | batch-processing |
version | 0.1.17 |
source | src |
created_at | 2024-04-02 03:30:25.164584 |
updated_at | 2024-10-15 18:21:50.535963 |
description | A batch library for processing a list of items in parallel |
homepage | |
repository | https://github.com/luiz-eduardo14/batch-processing |
max_upload_size | |
id | 1193183 |
size | 92,888 |
A batch library for processing a list of items in parallel.
The idea is to process a large mass of data with low memory usage and high performance. The concept involves processing a list of items in parallel, with a maximum number of items being processed at the same time.
This is a simple example of batch processing using asynchronous programming, processing a CSV file and inserting the data in database.
let step_builder: AsyncComplexStepBuilder<Result<StringRecord, csv_async::Error>, CarPrice> = AsyncComplexStepBuilder::get("csv_transfer".to_string())
.reader(
Box::new(move || {
let csv_file = csv_path.clone();
return Box::pin(async move {
let csv_file = tokio::fs::File::open(csv_file).await.expect("Error opening file");
let reader = csv_async::AsyncReader::from_reader(csv_file);
let records = reader.into_records();
let stream: Box<dyn Stream<Item=Result<StringRecord, csv_async::Error>> + Send + Unpin>
= Box::new(records);
stream
});
})
).processor(
Box::new(
|csv_line: Result<StringRecord, csv_async::Error>| {
let car_price = csv_line.unwrap();
return Box::pin(
async move {
let car_price = CarPrice {
id: None,
year: car_price.get(1).unwrap().parse::<i32>().ok(),
make: car_price.get(2).map(|s| Some(s.to_string())).unwrap_or(None),
model: car_price.get(3).map(|s| Some(s.to_string())).unwrap_or(None),
trim: car_price.get(4).map(|s| Some(s.to_string())).unwrap_or(None),
body: car_price.get(5).map(|s| Some(s.to_string())).unwrap_or(None),
transmission: car_price.get(6).map(|s| Some(s.to_string())).unwrap_or(None),
vin: car_price.get(7).map(|s| Some(s.to_string())).unwrap_or(None),
state: car_price.get(8).map(|s| Some(s.to_string())).unwrap_or(None),
condition: car_price.get(9).map(|s| s.parse::<i32>().ok()).unwrap_or(None),
odometer: car_price.get(10).map(|s| s.parse::<i32>().ok()).unwrap_or(None),
color: car_price.get(11).map(|s| Some(s.to_string())).unwrap_or(None),
interior: car_price.get(12).map(|s| Some(s.to_string())).unwrap_or(None),
seller: car_price.get(13).map(|s| Some(s.to_string())).unwrap_or(None),
nmr: car_price.get(14).map(|s| s.parse::<i32>().ok()).unwrap_or(None),
sellingprice: car_price.get(15).map(|s| s.parse::<i32>().ok()).unwrap_or(None),
saledate: None,
};
car_price
}
);
}
)
).writer(
Box::new(
move |vec_car_price: Vec<CarPrice>| {
let pool = Arc::clone(&pool);
let all_memory_usage = Arc::clone(&all_memory_usage);
return Box::pin(
async move {
let mut conn = pool.get().await.expect("Error getting connection");
insert_into(car_prices::table)
.values(vec_car_price)
.execute(&mut conn)
.await
.expect("Error inserting data");
let current_mem = PEAK_ALLOC.current_usage_as_mb() as i32;
all_memory_usage.lock().await.push(current_mem);
}
);
}
)
).chunk_size(2000);
let step = step_builder.build();
step.run().await;
docker-compose up -d
to start the databasecargo test -- --ignored
to run the integration testsdocker-compose down
to stop the database