#![allow(dead_code)] #![allow(unused_variables)] #![allow(unused_imports)] extern crate csv; extern crate rand; extern crate sbr; extern crate wyrm; #[macro_use] extern crate serde_derive; extern crate serde; extern crate serde_json; use serde::{Deserialize, Serialize}; use std::fs::File; use std::io::{BufReader, Read}; use std::collections::HashSet; use std::time::{Duration, Instant}; use sbr::data::{user_based_split, CompressedInteractions, Interaction, Interactions}; use sbr::evaluation::mrr_score; use sbr::models::lstm; #[derive(Deserialize, Serialize)] struct GoodbooksInteraction { user_id: usize, book_id: usize, rating: usize, } fn load_goodbooks(path: &str) -> Interactions { let mut reader = csv::Reader::from_path(path).unwrap(); let mut interactions: Vec = reader .deserialize::() .map(|x| x.unwrap()) .enumerate() .map(|(i, x)| Interaction::new(x.user_id, x.book_id, i)) .collect(); interactions.sort_by_key(|x| x.user_id()); Interactions::from(interactions[..1_000_000].to_owned()) } fn load_dummy() -> Interactions { let num_users = 100; let num_items = 50; let mut interactions = Vec::new(); for user in 0..num_users { for item in 0..num_items { interactions.push(Interaction::new(user, 1000 + item, item)); } } Interactions::from(interactions) } #[derive(Debug, Serialize, Deserialize)] struct Result { test_mrr: f32, train_mrr: f32, elapsed: Duration, hyperparameters: lstm::Hyperparameters, } fn load_movielens(path: &str) -> Interactions { let mut reader = csv::Reader::from_path(path).unwrap(); let interactions: Vec = reader.deserialize().map(|x| x.unwrap()).collect(); let interactions = rand::seq::sample_slice(&mut rand::thread_rng(), &interactions, 100000); Interactions::from(interactions) } fn fit(train: &CompressedInteractions, hyper: lstm::Hyperparameters) -> lstm::ImplicitLSTMModel { let mut model = hyper.build(); model.fit(train).unwrap(); model } fn main() { // let data = load_goodbooks("ratings.csv"); let data = load_movielens("data.csv"); // let mut data = load_dummy(); let mut rng = rand::thread_rng(); let (train, test) = user_based_split(&data, &mut rng, 0.2); let train = train.to_compressed(); let test = test.to_compressed(); println!( "Train {} {} {}", train.num_users(), train.num_items(), data.len() ); for _ in 0..1000 { let mut results: Vec = File::open("lstm_results.json") .map(|file| serde_json::from_reader(&file).unwrap()) .unwrap_or(Vec::new()); let hyper = lstm::Hyperparameters::random(data.num_items(), &mut rng); println!("Running {:#?}", &hyper); let start = Instant::now(); let model = fit(&train, hyper.clone()); let result = Result { train_mrr: mrr_score(&model, &train).unwrap(), test_mrr: mrr_score(&model, &test).unwrap(), elapsed: start.elapsed(), hyperparameters: hyper, }; println!("{:#?}", result); if !result.test_mrr.is_nan() { results.push(result); results.sort_by(|a, b| a.test_mrr.partial_cmp(&b.test_mrr).unwrap()); } println!("Best result: {:#?}", results.last()); File::create("lstm_results.json") .map(|file| serde_json::to_writer_pretty(&file, &results).unwrap()) .unwrap(); } }