// A WASI interface dedicated to performing inferencing for Large Language Models. interface llm { /// A Large Language Model. type inferencing-model = string; /// Inference request parameters record inferencing-params { /// The maximum tokens that should be inferred. /// /// Note: the backing implementation may return less tokens. max-tokens: u32, /// The amount the model should avoid repeating tokens. repeat-penalty: float32, /// The number of tokens the model should apply the repeat penalty to. repeat-penalty-last-n-token-count: u32, /// The randomness with which the next token is selected. temperature: float32, /// The number of possible next tokens the model will choose from. top-k: u32, /// The probability total of next tokens the model will choose from. top-p: float32 } /// The set of errors which may be raised by functions in this interface variant error { model-not-supported, runtime-error(string), invalid-input(string) } /// An inferencing result record inferencing-result { /// The text generated by the model // TODO: this should be a stream text: string, /// Usage information about the inferencing request usage: inferencing-usage } /// Usage information related to the inferencing result record inferencing-usage { /// Number of tokens in the prompt prompt-token-count: u32, /// Number of tokens generated by the inferencing operation generated-token-count: u32 } /// Perform inferencing using the provided model and prompt with the given optional params infer: func(model: inferencing-model, prompt: string, params: option) -> result; /// The model used for generating embeddings type embedding-model = string; /// Generate embeddings for the supplied list of text generate-embeddings: func(model: embedding-model, text: list) -> result; /// Result of generating embeddings record embeddings-result { /// The embeddings generated by the request embeddings: list>, /// Usage related to the embeddings generation request usage: embeddings-usage } /// Usage related to an embeddings generation request record embeddings-usage { /// Number of tokens in the prompt prompt-token-count: u32, } }