//! Completion streaming using Stream API //! //! In this example, you should see a streamed text output from your local LLM, //! just like you saw on ChatGPT =) use std::io::Write; use futures::StreamExt; use ollama_rest::{models::generate::GenerationRequest, Ollama}; use serde_json::json; #[tokio::main] async fn main() { // Make sure Ollama serves at 127.0.0.1:11434 let ollama = Ollama::default(); let request = serde_json::from_value::(json!({ "model": "llama3.2:1b", "prompt": "Why is the sky blue?", })).unwrap(); let mut stream = ollama.generate_streamed(&request).await.unwrap(); while let Some(Ok(res)) = stream.next().await { if !res.done { print!("{}", res.response); // Flush stdout for each word to allow realtime output std::io::stdout().flush().unwrap(); } } println!(); }