//! Completion streaming using Callback API //! //! In this example, you should see a streamed text output from your local LLM, //! just like you saw on ChatGPT =) use std::io::Write; use ollama_rest::{models::generate::{GenerationRequest, GenerationResponse}, Ollama}; use serde_json::json; #[tokio::main] async fn main() { // Make sure Ollama serves at 127.0.0.1:11434 let ollama = Ollama::default(); let request = serde_json::from_value::(json!({ "model": "llama3.2:1b", "prompt": "Why is the sky blue?", })).unwrap(); let final_res = ollama.generate( &request, Some(|res: &GenerationResponse| { if !res.done { print!("{}", res.response); // Flush stdout for each word to allow realtime output std::io::stdout().flush().unwrap(); } }) ).await.unwrap(); println!("\n\nFinal response:"); println!("{final_res:?}"); }