use gguf::GgufLoader;
use llm_models::local_model::*;

fn main() {
    // Using
    let _model = GgufLoader::default()
    .local_quant_file_path("/root/.cache/huggingface/hub/models--bartowski--Meta-Llama-3.1-8B-Instruct-GGUF/blobs/9da71c45c90a821809821244d4971e5e5dfad7eb091f0b8ff0546392393b6283")
        .load()
       .unwrap();

    // By default we attempt to extract everything we need from the GGUF file.
    // If you need to specifiy the tokenizer or chat template to use, you can add a local_config_path.
    let _model = GgufLoader::default()
    .local_quant_file_path("/root/.cache/huggingface/hub/models--bartowski--Meta-Llama-3.1-8B-Instruct-GGUF/blobs/9da71c45c90a821809821244d4971e5e5dfad7eb091f0b8ff0546392393b6283")
    .local_config_path("/workspaces/test/llm_utils/src/models/local_model/gguf/preset/llama/llama3_1_8b_instruct/config.json")
        .load()
       .unwrap();

    // model.local_model_path can now be used to load the model into the inference engine.
}