Crates.io | llm_prompt |
lib.rs | llm_prompt |
version | 0.0.1 |
source | src |
created_at | 2024-10-10 16:56:11.603505 |
updated_at | 2024-10-10 16:56:11.603505 |
description | LLM Prompting |
homepage | https://github.com/shelbyJenkins/llm_client |
repository | https://github.com/shelbyJenkins/llm_client |
max_upload_size | |
id | 1404045 |
size | 45,701 |
This crate is part of the llm_client crate.
use llm_prompt::*;
// OpenAI Format
let prompt: LlmPrompt = LlmPrompt::new_openai_prompt(
Some(model.tokens_per_message),
Some(model.tokens_per_name),
model.model_base.tokenizer.clone(),
);
// Chat Template
let prompt: LlmPrompt = LlmPrompt::new_chat_template_prompt(
&model.chat_template.chat_template,
&model.chat_template.bos_token,
&model.chat_template.eos_token,
model.chat_template.unk_token.as_deref(),
model.chat_template.base_generation_prefix.as_deref(),
model.model_base.tokenizer.clone(),
);
// There are three types of 'messages'
// Add system messages
prompt.add_system_message().set_content("You are a nice robot");
// User messages
prompt.add_user_message().set_content("Hello");
// LLM responses
prompt.add_assistant_message().set_content("Well how do you do?");
// Messages all share the same functions see prompting::PromptMessage for more
prompt.add_system_message().append_content(final_rule_set);
prompt.add_system_message().prepend_content(starting_rule_set);
// Builds with generation prefix. The llm will complete the response: 'Don't you think that is... cool?'
// Only Chat Template format supports this
prompt.set_generation_prefix("Don't you think that is...");
// Get total tokens in prompt
let total_prompt_tokens: u32 = prompt.get_total_prompt_tokens();
// Get chat template formatted prompt
let chat_template_prompt: String = prompt.get_built_prompt_string();
let chat_template_prompt_as_tokens: Vec<u32> = prompt.get_built_prompt_as_tokens()
// Openai formatted prompt (Openai and Anthropic format)
let openai_prompt: Vec<HashMap<String, String>> = prompt.get_built_prompt_hashmap()
// Validate requested max_tokens for a generation. If it exceeds the models limits, reduce max_tokens to a safe value
let actual_request_tokens = check_and_get_max_tokens(
model.context_length,
model.max_tokens_output, // If using a GGUF model use either model.context_length or the ctx_size of the server
total_prompt_tokens,
10, // Safety tokens
requested_max_tokens,
)?;
LlmPrompt
requires a tokenizer. You can use the llm_models crate's tokenizer, or implement the PromptTokenizer trait on your own tokenizer.
impl PromptTokenizer for LlmTokenizer {
fn tokenize(&self, input: &str) -> Vec<u32> {
self.tokenize(input)
}
fn count_tokens(&self, str: &str) -> u32 {
self.count_tokens(str)
}
}