Crates.io | llm_models |
lib.rs | llm_models |
version | 0.0.1 |
source | src |
created_at | 2024-10-10 16:57:41.706429 |
updated_at | 2024-10-10 16:57:41.706429 |
description | Load and Download LLM Models, Metadata, and Tokenizers |
homepage | https://github.com/shelbyJenkins/llm_client |
repository | https://github.com/shelbyJenkins/llm_client |
max_upload_size | |
id | 1404048 |
size | 26,942,569 |
This crate is part of the llm_client crate.
Everything you need for GGUF models. The GgugLoader
wraps the loaders for convience. All loaders return a LocalLlmModel
which contains the tokenizer, metadata, chat template, and anything that can be extract from the GGUF.
let model: LocalLlmModel = GgufLoader::default()
.llama3_1_8b_instruct()
.preset_with_available_vram_gb(48) // Load the largest quant that will fit in your vram
.load()?;
GGUF models from Hugging Face.
let model: LocalLlmModel = GgufLoader::default()
.hf_quant_file_url("https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
.load()?;
GGUF models for local storage.
let model: LocalLlmModel = GgufLoader::default()
.local_quant_file_path("/root/.cache/huggingface/hub/models--bartowski--Meta-Llama-3.1-8B-Instruct-GGUF/blobs/9da71c45c90a821809821244d4971e5e5dfad7eb091f0b8ff0546392393b6283")
.load()?;
assert_eq!(ApiLlmModel::gpt_4_o(), ApiLlmModel {
model_id: "gpt-4o".to_string(),
context_length: 128000,
cost_per_m_in_tokens: 5.00,
max_tokens_output: 4096,
cost_per_m_out_tokens: 15.00,
tokens_per_message: 3,
tokens_per_name: 1,
tokenizer: Arc<LlmTokenizer>,
})
let tok = LlmTokenizer::new_tiktoken("gpt-4o"); // Get a Tiktoken tokenizer
let tok = LlmTokenizer::new_from_tokenizer_json("path/to/tokenizer.json"); // From local path
let tok = LlmTokenizer::new_from_hf_repo(hf_token, "meta-llama/Meta-Llama-3-8B-Instruct"); // From repo
// From LocalLlmModel or ApiLlmModel
let tok = model.model_base.tokenizer;
OpenAiModelTrait
,GgufLoaderTrait
,AnthropicModelTrait
, and HfTokenTrait
for loading models