#pragma once #include "replica_pool.h" #include "models/language_model.h" namespace ctranslate2 { // Generator is the high-level class for running generation with language models. // It supports parallel and asynchronous generation. class Generator : public ReplicaPool { public: using ReplicaPool::ReplicaPool; std::vector> generate_batch_async(const std::vector>& start_tokens, const GenerationOptions& options = GenerationOptions(), const size_t max_batch_size = 0, const BatchType batch_type = BatchType::Examples); std::vector> score_batch_async(const std::vector>& tokens, const ScoringOptions& options = ScoringOptions(), const size_t max_batch_size = 0, const BatchType batch_type = BatchType::Examples); std::future forward_batch_async(std::vector> tokens, const bool return_log_probs); std::future forward_batch_async(std::vector> ids, const bool return_log_probs); std::future forward_batch_async(StorageView ids, StorageView lengths, const bool return_log_probs); }; }