// A WASI interface dedicated to performing inferencing for Large Language Models.
interface llm {
	/// A Large Language Model.
	type inferencing-model = string;

	/// Inference request parameters
	record inferencing-params {
		/// The maximum tokens that should be inferred.
		///
		/// Note: the backing implementation may return less tokens.
		max-tokens: u32,
		/// The amount the model should avoid repeating tokens.
		repeat-penalty: float32,
		/// The number of tokens the model should apply the repeat penalty to.
		repeat-penalty-last-n-token-count: u32,
		/// The randomness with which the next token is selected.
		temperature: float32,
		/// The number of possible next tokens the model will choose from.
		top-k: u32,
		/// The probability total of next tokens the model will choose from.
		top-p: float32
	}

	/// The set of errors which may be raised by functions in this interface
	variant error {
		model-not-supported,
		runtime-error(string),
		invalid-input(string)
	}

	/// An inferencing result
	record inferencing-result {
		/// The text generated by the model
		// TODO: this should be a stream
		text: string,
		/// Usage information about the inferencing request
		usage: inferencing-usage
	}

	/// Usage information related to the inferencing result
	record inferencing-usage {
		/// Number of tokens in the prompt
		prompt-token-count: u32,
		/// Number of tokens generated by the inferencing operation
		generated-token-count: u32
	}

	/// Perform inferencing using the provided model and prompt with the given optional params
	infer: func(model: inferencing-model, prompt: string, params: option<inferencing-params>) -> result<inferencing-result, error>;

	/// The model used for generating embeddings
	type embedding-model = string;

	/// Generate embeddings for the supplied list of text
	generate-embeddings: func(model: embedding-model, text: list<string>) -> result<embeddings-result, error>;

	/// Result of generating embeddings
	record embeddings-result {
		/// The embeddings generated by the request
		embeddings: list<list<float32>>,
		/// Usage related to the embeddings generation request
		usage: embeddings-usage
	}

	/// Usage related to an embeddings generation request
	record embeddings-usage {
		/// Number of tokens in the prompt
		prompt-token-count: u32,
	}
}