cerebras-rs

Crates.io	cerebras-rs
lib.rs	cerebras-rs
version	0.0.2
created_at	2025-06-11 06:12:25.622951+00
updated_at	2025-06-11 06:13:59.738137+00
description	High-performance Rust SDK for Cerebras Inference API - Low-latency AI model inference powered by Cerebras Wafer-Scale Engines
homepage
repository	https://github.com/cerebras/cerebras-rs
max_upload_size
id	1708222
size	460,941

Nikolas Yanek-Chrones (nikothomas)

documentation

https://docs.rs/cerebras-rs

README

Cerebras Rust SDK

High-performance Rust SDK for the Cerebras Inference API, providing low-latency AI model inference powered by Cerebras Wafer-Scale Engines and CS-3 systems.

Installation

Add this to your Cargo.toml:

[dependencies]
cerebras-rs = "0.0.2"

Quick Start

use cerebras_rs::prelude::*;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    // Create client from environment variable CEREBRAS_API_KEY
    let client = Client::from_env()?;
    
    // Create a chat completion
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .system_message("You are a helpful assistant")
        .user_message("What is the capital of France?")
        .temperature(0.7)
        .build();
    
    let response = client.chat_completion(request).await?;
    if let Some(choices) = &response.choices {
        if let Some(choice) = choices.first() {
            if let Some(message) = &choice.message {
                println!("{}", message.content);
            }
        }
    }
    
    Ok(())
}

Authentication

Set your API key as an environment variable:

export CEREBRAS_API_KEY="your-api-key-here"

Or create a client with an explicit API key:

use cerebras_rs::Client;

let client = Client::new("your-api-key-here");

Available Models

The SDK supports all Cerebras models:

ModelIdentifier::Llama4Scout17b16eInstruct - Llama 4 Scout 17B
ModelIdentifier::Llama3Period18b - Llama 3.1 8B
ModelIdentifier::Llama3Period370b - Llama 3.3 70B
ModelIdentifier::Qwen332b - Qwen 3 32B
ModelIdentifier::DeepseekR1DistillLlama70b - Deepseek R1 Distill Llama 70B

Examples

Chat Completion with Builder Pattern

use cerebras_rs::prelude::*;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .system_message("You are a helpful math tutor")
        .user_message("What is 15 + 27?")
        .temperature(0.3)
        .max_tokens(100)
        .build();

    let response = client.chat_completion(request).await?;
    if let Some(choices) = &response.choices {
        if let Some(choice) = choices.first() {
            if let Some(message) = &choice.message {
                println!("{}", message.content);
            }
        }
    }
    
    Ok(())
}

Streaming Responses

use cerebras_rs::prelude::*;
use futures_util::StreamExt;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .user_message("Tell me a story")
        .stream(true)
        .build();

    let mut stream = client.chat_completion_stream(request).await?;

    while let Some(chunk) = stream.next().await {
        match chunk {
            Ok(chunk) => {
                if let Some(choices) = &chunk.choices {
                    if let Some(choice) = choices.first() {
                        if let Some(delta) = &choice.delta {
                            if let Some(content) = &delta.content {
                                print!("{}", content);
                            }
                        }
                    }
                }
            }
            Err(e) => eprintln!("Error: {}", e),
        }
    }
    
    Ok(())
}

Function Calling

use cerebras_rs::prelude::*;
use cerebras_rs::models::{Tool, FunctionDefinition, tool::Type};
use serde_json::json;
use std::collections::HashMap;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    
    let weather_tool = Tool {
        r#type: Some(Type::Function),
        function: Some(FunctionDefinition {
            name: "get_weather".to_string(),
            description: Some("Get current weather".to_string()),
            parameters: Some(HashMap::new()), // Simplified for example
        }),
    };

    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .user_message("What's the weather in New York?")
        .tool(weather_tool)
        .build();

    let response = client.chat_completion(request).await?;

    if let Some(choices) = &response.choices {
        if let Some(choice) = choices.first() {
            if let Some(message) = &choice.message {
                if let Some(tool_calls) = &message.tool_calls {
                    for call in tool_calls {
                        println!("Function: {}", call.name.as_ref().unwrap_or(&"".to_string()));
                        println!("Arguments: {}", call.arguments.as_ref().unwrap_or(&"".to_string()));
                    }
                }
            }
        }
    }
    
    Ok(())
}

Text Completion

use cerebras_rs::prelude::*;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    
    let request = CompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .prompt("Once upon a time")
        .max_tokens(100)
        .temperature(0.8)
        .build();

    let response = client.completion(request).await?;
    if let Some(choices) = &response.choices {
        if let Some(choice) = choices.first() {
            if let Some(text) = &choice.text {
                println!("{}", text);
            }
        }
    }
    
    Ok(())
}

Error Handling

The SDK provides comprehensive error handling:

use cerebras_rs::{Client, ChatCompletionRequest, ModelIdentifier, Error};

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .user_message("Hello")
        .build();
    
    match client.chat_completion(request).await {
        Ok(response) => {
            if let Some(choices) = &response.choices {
                if let Some(choice) = choices.first() {
                    if let Some(message) = &choice.message {
                        println!("Success: {}", message.content);
                    }
                }
            }
        }
        Err(Error::RateLimit(retry_after)) => {
            println!("Rate limited. Retry after {} seconds", retry_after);
        }
        Err(Error::Authentication) => {
            println!("Invalid API key");
        }
        Err(e) => {
            println!("Error: {}", e);
        }
    }
    
    Ok(())
}

Advanced Features

Custom Configuration

use cerebras_rs::{Client, Configuration, ChatCompletionRequest, ModelIdentifier};

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let mut config = Configuration::new();
    config.base_path = "https://custom-endpoint.example.com".to_string();
    config.bearer_access_token = Some("your-api-key".to_string());

    let client = Client::with_configuration(config);
    
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .user_message("Hello")
        .build();
    
    let response = client.chat_completion(request).await?;
    // Process response...
    
    Ok(())
}

Response Metadata

use cerebras_rs::prelude::*;

#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let client = Client::from_env()?;
    let request = ChatCompletionRequest::builder(ModelIdentifier::Llama3Period18b)
        .user_message("Hello")
        .build();
    
    let response = client.chat_completion(request).await?;

    // Token usage
    if let Some(usage) = &response.usage {
        println!("Prompt tokens: {}", usage.prompt_tokens.unwrap_or(0));
        println!("Completion tokens: {}", usage.completion_tokens.unwrap_or(0));
        println!("Total tokens: {}", usage.total_tokens.unwrap_or(0));
    }

    // Timing information
    if let Some(time_info) = &response.time_info {
        println!("Queue time: {:.3}s", time_info.queue_time.unwrap_or(0.0));
        println!("Prompt time: {:.3}s", time_info.prompt_time.unwrap_or(0.0));
        println!("Completion time: {:.3}s", time_info.completion_time.unwrap_or(0.0));
        println!("Total time: {:.3}s", time_info.total_time.unwrap_or(0.0));
    }
    
    Ok(())
}

More Examples

Check out the examples/ directory for more comprehensive examples:

chat_completion.rs - Various chat completion scenarios
streaming.rs - Streaming response handling
function_calling.rs - Tool use and function calling

Run examples with:

cargo run --example chat_completion
cargo run --example streaming
cargo run --example function_calling

Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

License

This project is licensed under the MIT License - see the LICENSE file for details.

Links

Commit count: 0