use gguf::GgufLoader; use llm_models::local_model::*; fn main() { // Using let _model = GgufLoader::default() .hf_quant_file_url("https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf") .load() .unwrap(); // By default we attempt to extract everything we need from the GGUF file. // If you need to specifiy the tokenizer or chat template to use, you can add a hf repo to load from. let _model = GgufLoader::default() .hf_quant_file_url("https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf") .hf_config_repo_id("meta-llama/Meta-Llama-3-8B-Instruct") .load() .unwrap(); // model.local_model_path can now be used to load the model into the inference engine. }