# Notes: # - do not submit pull requests to add new models; this list will be updated in batches with new releases. # Links: # - https://platform.openai.com/docs/models # - https://openai.com/pricing # - https://platform.openai.com/docs/api-reference/chat - platform: openai models: - name: gpt-4o max_input_tokens: 128000 max_output_tokens: 16384 input_price: 2.5 output_price: 10 supports_vision: true supports_function_calling: true - name: gpt-4o-2024-05-13 max_input_tokens: 128000 max_output_tokens: 4096 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: chatgpt-4o-latest max_input_tokens: 128000 max_output_tokens: 16384 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: gpt-4o-mini max_input_tokens: 128000 max_output_tokens: 16384 input_price: 0.15 output_price: 0.6 supports_vision: true supports_function_calling: true - name: gpt-4-turbo max_input_tokens: 128000 max_output_tokens: 4096 input_price: 10 output_price: 30 supports_vision: true supports_function_calling: true - name: o1-preview max_input_tokens: 128000 max_output_tokens: 32768 input_price: 15 output_price: 60 no_stream: true no_system_message: true - name: o1-mini max_input_tokens: 128000 max_output_tokens: 65536 input_price: 3 output_price: 12 no_stream: true no_system_message: true - name: gpt-3.5-turbo max_input_tokens: 16385 max_output_tokens: 4096 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: text-embedding-3-large type: embedding input_price: 0.13 max_tokens_per_chunk: 8191 default_chunk_size: 3000 max_batch_size: 100 - name: text-embedding-3-small type: embedding input_price: 0.02 max_tokens_per_chunk: 8191 default_chunk_size: 3000 max_batch_size: 100 # Links: # - https://ai.google.dev/models/gemini # - https://ai.google.dev/pricing # - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent - platform: gemini models: - name: gemini-1.5-pro-latest max_input_tokens: 2097152 max_output_tokens: 8192 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: gemini-1.5-flash-latest max_input_tokens: 1048576 max_output_tokens: 8192 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: gemini-1.5-flash-8b-latest max_input_tokens: 1048576 max_output_tokens: 8192 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: gemini-1.0-pro-latest max_input_tokens: 30720 max_output_tokens: 2048 input_price: 0 output_price: 0 supports_function_calling: true - name: text-embedding-004 type: embedding input_price: 0 output_price: 0 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 100 # Links: # - https://docs.anthropic.com/claude/docs/models-overview # - https://docs.anthropic.com/claude/reference/messages-streaming - platform: claude models: - name: claude-3-5-sonnet-latest max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-opus-20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: claude-3-sonnet-20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-haiku-20240307 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true # Links: # - https://docs.mistral.ai/getting-started/models/models_overview/ # - https://mistral.ai/technology/#pricing # - https://docs.mistral.ai/api/ - platform: mistral models: - name: mistral-large-latest max_input_tokens: 128000 input_price: 2 output_price: 6 supports_function_calling: true - name: mistral-small-latest max_input_tokens: 32000 input_price: 0.2 output_price: 0.6 supports_function_calling: true - name: codestral-latest max_input_tokens: 32000 input_price: 0.2 output_price: 0.6 - name: ministral-8b-latest max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 supports_function_calling: true - name: ministral-3b-latest max_input_tokens: 128000 input_price: 0.04 output_price: 0.04 supports_function_calling: true - name: open-mistral-nemo max_input_tokens: 128000 input_price: 0.15 output_price: 0.15 supports_function_calling: true - name: open-codestral-mamba max_input_tokens: 256000 input_price: 0.25 output_price: 0.25 - name: pixtral-12b-2409 max_input_tokens: 128000 input_price: 0.15 output_price: 0.15 supports_vision: true - name: mistral-embed type: embedding max_input_tokens: 8092 input_price: 0.1 max_tokens_per_chunk: 8092 default_chunk_size: 2000 # Links: # - https://docs.ai21.com/docs/jamba-15-models # - https://www.ai21.com/pricing # - https://docs.ai21.com/reference/jamba-15-api-ref - platform: ai21 models: - name: jamba-1.5-large max_input_tokens: 256000 input_price: 2 output_price: 8 supports_function_calling: true - name: jamba-1.5-mini max_input_tokens: 256000 input_price: 0.2 output_price: 0.4 supports_function_calling: true # Links: # - https://docs.cohere.com/docs/command-r-plus # - https://cohere.com/pricing # - https://docs.cohere.com/reference/chat - platform: cohere models: - name: command-r-plus-08-2024 max_input_tokens: 128000 max_output_tokens: 4096 input_price: 2.5 output_price: 10 supports_function_calling: true - name: command-r-plus max_input_tokens: 128000 max_output_tokens: 4096 input_price: 2.5 output_price: 10 supports_function_calling: true - name: command-r-08-2024 max_input_tokens: 128000 max_output_tokens: 4096 input_price: 0.15 output_price: 0.6 supports_function_calling: true - name: command-r max_input_tokens: 128000 max_output_tokens: 4096 input_price: 0.15 output_price: 0.6 supports_function_calling: true - name: embed-english-v3.0 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: embed-multilingual-v3.0 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: rerank-english-v3.0 type: reranker max_input_tokens: 4096 - name: rerank-multilingual-v3.0 type: reranker max_input_tokens: 4096 # Links: # - https://docs.x.ai/docs#models # - https://cohere.com/pricing # - https://docs.x.ai/api/endpoints#chat-completions - platform: xai models: - name: grok-beta max_input_tokens: 131072 input_price: 5 output_price: 10 # Links: # - https://docs.perplexity.ai/guides/model-cards # - https://docs.perplexity.ai/guides/pricing # - https://docs.perplexity.ai/api-reference/chat-completions - platform: perplexity models: - name: llama-3.1-sonar-huge-128k-online max_input_tokens: 127072 input_price: 5 output_price: 5 - name: llama-3.1-sonar-large-128k-online max_input_tokens: 127072 input_price: 1 output_price: 1 - name: llama-3.1-sonar-large-128k-chat max_input_tokens: 131072 input_price: 1 output_price: 1 - name: llama-3.1-sonar-small-128k-online max_input_tokens: 127072 input_price: 0.2 output_price: 0.2 - name: llama-3.1-sonar-small-128k-chat max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 - name: llama-3.1-70b-instruct max_input_tokens: 131072 input_price: 1 output_price: 1 - name: llama-3.1-8b-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 # Links: # - https://console.groq.com/docs/models # - https://console.groq.com/docs/api-reference#chat - platform: groq models: - name: llama-3.1-70b-versatile max_input_tokens: 128000 input_price: 0 output_price: 0 supports_function_calling: true - name: llama-3.1-8b-instant max_input_tokens: 128000 input_price: 0 output_price: 0 supports_function_calling: true - name: llama-3.2-90b-vision-preview max_input_tokens: 128000 input_price: 0 output_price: 0 supports_vision: true - name: llama-3.2-11b-vision-preview max_input_tokens: 128000 input_price: 0 output_price: 0 supports_vision: true - name: llama-3.2-3b-preview max_input_tokens: 128000 input_price: 0 output_price: 0 - name: llama-3.2-1b-preview max_input_tokens: 128000 input_price: 0 output_price: 0 - name: gemma2-9b-it max_input_tokens: 8192 input_price: 0 output_price: 0 supports_function_calling: true # Links: # - https://ollama.com/library # - https://github.com/ollama/ollama/blob/main/docs/openai.md - platform: ollama models: - name: llama3.1 max_input_tokens: 128000 supports_function_calling: true - name: llama3.2 max_input_tokens: 128000 supports_function_calling: true - name: gemma2 max_input_tokens: 8192 - name: qwen2.5 max_input_tokens: 128000 supports_function_calling: true - name: phi3.5 max_input_tokens: 128000 - name: nemotron-mini max_input_tokens: 128000 supports_function_calling: true - name: mistral-small max_input_tokens: 128000 supports_function_calling: true - name: deepseek-coder-v2 max_input_tokens: 32768 - name: nomic-embed-text type: embedding max_tokens_per_chunk: 8192 default_chunk_size: 1000 max_batch_size: 50 # Links: # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models # - https://cloud.google.com/vertex-ai/generative-ai/pricing # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini - platform: vertexai models: - name: gemini-1.5-pro-002 max_input_tokens: 2097152 max_output_tokens: 8192 input_price: 1.25 output_price: 3.75 supports_vision: true supports_function_calling: true - name: gemini-1.5-flash-002 max_input_tokens: 1048576 max_output_tokens: 8192 input_price: 0.01875 output_price: 0.075 supports_vision: true supports_function_calling: true - name: gemini-1.0-pro-002 max_input_tokens: 24568 max_output_tokens: 8192 input_price: 0.125 output_price: 0.375 supports_function_calling: true - name: claude-3-5-sonnet-v2@20241022 max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-5-sonnet@20240620 max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-opus@20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: claude-3-sonnet@20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-haiku@20240307 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - name: mistral-large@2407 max_input_tokens: 128000 input_price: 2 output_price: 6 supports_function_calling: true - name: mistral-nemo@2407 max_input_tokens: 128000 input_price: 0.15 output_price: 0.15 supports_function_calling: true - name: codestral@2405 max_input_tokens: 32000 input_price: 0.2 output_price: 0.6 - name: text-embedding-004 type: embedding max_input_tokens: 20000 input_price: 0.025 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 5 - name: text-multilingual-embedding-002 type: embedding max_input_tokens: 20000 input_price: 0.2 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 5 # Links: # - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns # - https://aws.amazon.com/bedrock/pricing/ # - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html # - https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html - platform: bedrock models: - name: anthropic.claude-3-5-sonnet-20241022-v2:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic.claude-3-opus-20240229-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: anthropic.claude-3-sonnet-20240229-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic.claude-3-haiku-20240307-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - name: meta.llama3-1-405b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 4096 require_max_tokens: true input_price: 5.32 output_price: 16 supports_function_calling: true - name: meta.llama3-1-70b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 0.99 output_price: 0.99 supports_function_calling: true - name: meta.llama3-1-8b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 0.22 output_price: 0.22 supports_function_calling: true - name: us.meta.llama3-2-90b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 2 output_price: 2 supports_function_calling: true supports_vision: true - name: us.meta.llama3-2-11b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 0.35 output_price: 0.35 supports_function_calling: true supports_vision: true - name: us.meta.llama3-2-3b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 0.15 output_price: 0.15 - name: us.meta.llama3-2-1b-instruct-v1:0 max_input_tokens: 128000 max_output_tokens: 2048 require_max_tokens: true input_price: 0.1 output_price: 0.1 - name: mistral.mistral-large-2407-v1:0 max_input_tokens: 128000 input_price: 2 output_price: 6 supports_function_calling: true - name: cohere.command-r-plus-v1:0 max_input_tokens: 128000 input_price: 3 output_price: 15 supports_function_calling: true - name: cohere.command-r-v1:0 max_input_tokens: 128000 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: cohere.embed-english-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: cohere.embed-multilingual-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: ai21.jamba-1-5-large-v1:0 max_input_tokens: 256000 input_price: 2 output_price: 8 supports_function_calling: true - name: ai21.jamba-1-5-mini-v1:0 max_input_tokens: 256000 input_price: 0.2 output_price: 0.4 supports_function_calling: true # Links: # - https://developers.cloudflare.com/workers-ai/models/ # - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/ - platform: cloudflare models: - name: '@cf/meta/llama-3.1-70b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - name: '@cf/meta/llama-3.1-8b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - name: '@cf/meta/llama-3.2-11b-vision-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - name: '@cf/meta/llama-3.2-3b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - name: '@cf/meta/llama-3.2-1b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - name: '@cf/baai/bge-large-en-v1.5' type: embedding input_price: 0 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 # Links: # - https://huggingface.co/models?other=text-generation-inference # - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference - platform: huggingface models: - name: NousResearch/Hermes-3-Llama-3.1-8B max_input_tokens: 8192 max_output_tokens: 4096 require_max_tokens: true input_price: 0 output_price: 0 - name: mistralai/Mistral-Small-Instruct-2409 max_input_tokens: 128000 max_output_tokens: 4096 require_max_tokens: true input_price: 0 output_price: 0 - name: mistralai/Mistral-Nemo-Instruct-2407 max_input_tokens: 128000 max_output_tokens: 4096 require_max_tokens: true input_price: 0 output_price: 0 # Links: # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 - platform: ernie models: - name: ernie-4.0-turbo-8k-preview max_input_tokens: 8192 input_price: 2.8 output_price: 8.4 supports_function_calling: true - name: ernie-4.0-8k-preview max_input_tokens: 8192 input_price: 4.2 output_price: 12.6 supports_function_calling: true - name: ernie-3.5-8k-preview max_input_tokens: 8192 input_price: 0.112 output_price: 0.28 supports_function_calling: true - name: ernie-speed-pro-128k max_input_tokens: 128000 input_price: 0.042 output_price: 0.084 - name: bge_large_zh type: embedding input_price: 0.07 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 16 - name: bge_large_en type: embedding input_price: 0.07 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 16 - name: bce_reranker_base type: reranker max_input_tokens: 1024 input_price: 0.07 # Links: # - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction # - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing # - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api - platform: qianwen models: - name: qwen-max-latest max_input_tokens: 30720 max_output_tokens: 8192 input_price: 2.8 output_price: 8.4 supports_function_calling: true - name: qwen-plus-latest max_input_tokens: 128000 max_output_tokens: 8192 input_price: 0.112 output_price: 0.28 supports_function_calling: true - name: qwen-turbo-latest max_input_tokens: 129024 max_output_tokens: 8192 input_price: 0.042 output_price: 0.084 supports_function_calling: true - name: qwen-coder-turbo-latest max_input_tokens: 129024 max_output_tokens: 8192 input_price: 0.28 output_price: 0.84 supports_function_calling: true - name: qwen-long max_input_tokens: 1000000 input_price: 0.07 output_price: 0.28 - name: qwen-vl-max input_price: 2.8 output_price: 2.8 supports_vision: true - name: qwen-vl-plus input_price: 1.12 output_price: 1.12 supports_vision: true - name: text-embedding-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 6 - name: text-embedding-v2 type: embedding input_price: 0.1 max_tokens_per_chunk: 2048 default_chunk_size: 2000 max_batch_size: 25 # Links: # - https://platform.moonshot.cn/docs/intro # - https://platform.moonshot.cn/docs/pricing/chat # - https://platform.moonshot.cn/docs/api/chat - platform: moonshot models: - name: moonshot-v1-8k max_input_tokens: 8000 input_price: 1.68 output_price: 1.68 supports_function_calling: true - name: moonshot-v1-32k max_input_tokens: 32000 input_price: 3.36 output_price: 3.36 supports_function_calling: true - name: moonshot-v1-128k max_input_tokens: 128000 input_price: 8.4 output_price: 8.4 supports_function_calling: true # Links: # - https://api-docs.deepseek.com/quick_start/pricing # - https://platform.deepseek.com/api-docs/api/create-chat-completion - platform: deepseek models: - name: deepseek-chat max_input_tokens: 32768 max_output_tokens: 4096 input_price: 0.14 output_price: 0.28 supports_function_calling: true # Links: # - https://open.bigmodel.cn/dev/howuse/model # - https://open.bigmodel.cn/pricing # - https://open.bigmodel.cn/dev/api#glm-4 - platform: zhipuai models: - name: glm-4-plus max_input_tokens: 128000 input_price: 7 output_price: 7 supports_function_calling: true - name: glm-4-alltools max_input_tokens: 128000 input_price: 14 output_price: 14 supports_function_calling: true - name: glm-4-0520 max_input_tokens: 128000 input_price: 14 output_price: 14 supports_function_calling: true - name: glm-4-long max_input_tokens: 1000000 input_price: 0.14 output_price: 0.14 supports_function_calling: true - name: glm-4-flash max_input_tokens: 128000 input_price: 0 output_price: 0 supports_function_calling: true - name: glm-4v-plus max_input_tokens: 8192 input_price: 1.4 output_price: 1.4 supports_vision: true - name: embedding-3 type: embedding max_input_tokens: 8192 input_price: 0.07 max_tokens_per_chunk: 8192 default_chunk_size: 2000 # Links: # - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 # - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion - platform: lingyiwanwu models: - name: yi-large max_input_tokens: 32768 input_price: 2.8 output_price: 2.8 - name: yi-large-fc max_input_tokens: 32768 input_price: 2.8 output_price: 2.8 supports_function_calling: true - name: yi-large-rag max_input_tokens: 16384 input_price: 3.5 output_price: 3.5 - name: yi-large-turbo max_input_tokens: 16384 input_price: 1.68 output_price: 1.68 - name: yi-medium-200k max_input_tokens: 200000 input_price: 1.68 output_price: 1.68 - name: yi-lightning max_input_tokens: 16384 input_price: 0.14 output_price: 0.14 - name: yi-vision max_input_tokens: 16384 input_price: 0.84 output_price: 0.84 supports_vision: true # Links: # - https://github.com/marketplace/models - platform: github models: - name: gpt-4o max_input_tokens: 128000 supports_function_calling: true - name: gpt-4o-mini max_input_tokens: 128000 supports_function_calling: true - name: text-embedding-3-large type: embedding max_tokens_per_chunk: 8191 default_chunk_size: 3000 max_batch_size: 100 - name: text-embedding-3-small type: embedding max_tokens_per_chunk: 8191 default_chunk_size: 3000 max_batch_size: 100 - name: meta-llama-3.1-405b-instruct max_input_tokens: 128000 - name: meta-llama-3.1-70b-instruct max_input_tokens: 128000 - name: meta-llama-3.1-8b-instruct max_input_tokens: 128000 - name: meta-llama-3.2-90b-vision-instruct max_input_tokens: 8192 supports_vision: true - name: meta-llama-3.2-11b-vision-instruct max_input_tokens: 8192 supports_vision: true - name: mistral-large-2407 max_input_tokens: 128000 supports_function_calling: true - name: mistral-nemo max_input_tokens: 128000 supports_function_calling: true - name: cohere-command-r-plus max_input_tokens: 128000 supports_function_calling: true - name: cohere-command-r max_input_tokens: 128000 supports_function_calling: true - name: cohere-embed-v3-english type: embedding max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: cohere-embed-v3-multilingual type: embedding max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: ai21-jamba-1.5-large max_input_tokens: 256000 supports_function_calling: true - name: ai21-jamba-1.5-mini max_input_tokens: 256000 supports_function_calling: true - name: phi-3.5-moe-instruct max_input_tokens: 128000 - name: phi-3.5-mini-instruct max_input_tokens: 128000 - name: phi-3.5-vision-instruct max_input_tokens: 128000 supports_vision: true # Links: # - https://deepinfra.com/models # - https://deepinfra.com/pricing - platform: deepinfra models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct max_input_tokens: 32000 input_price: 1.79 output_price: 1.79 supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-70B-Instruct max_input_tokens: 128000 input_price: 0.35 output_price: 0.4 supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-8B-Instruct max_input_tokens: 128000 input_price: 0.055 output_price: 0.055 supports_function_calling: true - name: meta-llama/Llama-3.2-90B-Vision-Instruct max_input_tokens: 128000 input_price: 0.35 output_price: 0.4 - name: meta-llama/Llama-3.2-11B-Vision-Instruct max_input_tokens: 128000 input_price: 0.055 output_price: 0.055 - name: meta-llama/Llama-3.2-3B-Instruct max_input_tokens: 128000 input_price: 0.03 output_price: 0.05 - name: meta-llama/Llama-3.2-1B-Instruct max_input_tokens: 128000 input_price: 0.01 output_price: 0.02 - name: mistralai/Mistral-Nemo-Instruct-2407 max_input_tokens: 128000 input_price: 0.13 output_price: 0.13 - name: google/gemma-2-27b-it max_input_tokens: 8192 input_price: 0.27 output_price: 0.27 - name: google/gemma-2-9b-it max_input_tokens: 8192 input_price: 0.06 output_price: 0.06 - name: Qwen/Qwen2.5-72B-Instruct max_input_tokens: 32768 input_price: 0.35 output_price: 0.40 supports_function_calling: true - name: nvidia/Llama-3.1-Nemotron-70B-Instruct max_input_tokens: 128000 input_price: 0.35 output_price: 0.40 supports_function_calling: true - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: BAAI/bge-m3 type: embedding input_price: 0.01 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 100 - name: intfloat/e5-large-v2 type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: intfloat/multilingual-e5-large type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: thenlper/gte-large type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 # Links: # - https://fireworks.ai/models # - https://fireworks.ai/pricing - platform: fireworks models: - name: accounts/fireworks/models/llama-v3p1-405b-instruct max_input_tokens: 131072 input_price: 3 output_price: 3 - name: accounts/fireworks/models/llama-v3p1-70b-instruct max_input_tokens: 131072 input_price: 0.9 output_price: 0.9 - name: accounts/fireworks/models/llama-v3p1-8b-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 - name: accounts/fireworks/models/llama-v3p2-90b-vision-instruct max_input_tokens: 131072 input_price: 0.9 output_price: 0.9 supports_vision: true - name: accounts/fireworks/models/llama-v3p2-11b-vision-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 supports_vision: true - name: accounts/fireworks/models/llama-v3p2-3b-instruct max_input_tokens: 131072 input_price: 0.1 output_price: 0.1 - name: accounts/fireworks/models/llama-v3p2-1b-instruct max_input_tokens: 131072 input_price: 0.1 output_price: 0.1 - name: accounts/fireworks/models/gemma2-9b-it max_input_tokens: 8192 input_price: 0.2 output_price: 0.2 - name: accounts/fireworks/models/qwen2p5-72b-instruct max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 - name: accounts/fireworks/models/phi-3-vision-128k-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 supports_vision: true - name: accounts/fireworks/models/firellava-13b max_input_tokens: 4096 input_price: 0.2 output_price: 0.2 supports_vision: true - name: accounts/fireworks/models/firefunction-v2 max_input_tokens: 32768 input_price: 0.2 output_price: 0.2 supports_function_calling: true - name: nomic-ai/nomic-embed-text-v1.5 type: embedding input_price: 0.008 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: WhereIsAI/UAE-Large-V1 type: embedding input_price: 0.016 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: thenlper/gte-large type: embedding input_price: 0.016 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 # Links: # - https://openrouter.ai/docs#models - platform: openrouter models: - name: openai/gpt-4o max_input_tokens: 128000 input_price: 2.5 output_price: 10 supports_vision: true supports_function_calling: true - name: openai/gpt-4o-2024-05-13 max_input_tokens: 128000 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: openai/chatgpt-4o-latest max_input_tokens: 128000 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: openai/gpt-4o-mini max_input_tokens: 128000 input_price: 0.15 output_price: 0.6 supports_vision: true supports_function_calling: true - name: openai/gpt-4-turbo max_input_tokens: 128000 input_price: 10 output_price: 30 supports_vision: true supports_function_calling: true - name: openai/o1-preview max_input_tokens: 128000 input_price: 15 output_price: 60 no_stream: true no_system_message: true - name: openai/o1-mini max_input_tokens: 128000 input_price: 3 output_price: 12 no_stream: true no_system_message: true - name: openai/gpt-3.5-turbo max_input_tokens: 16385 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: google/gemini-pro-1.5 max_input_tokens: 4000000 input_price: 2.5 output_price: 7.5 supports_vision: true supports_function_calling: true - name: google/gemini-pro-1.5-exp max_input_tokens: 4000000 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: google/gemini-flash-1.5 max_input_tokens: 4000000 input_price: 0.0375 output_price: 0.15 supports_vision: true supports_function_calling: true - name: google/gemini-flash-1.5-exp max_input_tokens: 4000000 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: google/gemini-flash-8b-1.5-exp max_input_tokens: 4000000 input_price: 0 output_price: 0 supports_vision: true supports_function_calling: true - name: google/gemini-pro max_input_tokens: 131040 input_price: 0.125 output_price: 0.375 supports_function_calling: true - name: google/gemma-2-27b-it max_input_tokens: 2800000 input_price: 0.27 output_price: 0.27 - name: google/gemma-2-9b-it max_input_tokens: 8192 input_price: 0.06 output_price: 0.06 - name: anthropic/claude-3.5-sonnet max_input_tokens: 200000 max_output_tokens: 8192 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic/claude-3-opus max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: anthropic/claude-3-sonnet max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic/claude-3-haiku max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - name: meta-llama/llama-3.1-405b-instruct max_input_tokens: 131072 input_price: 3 output_price: 3 supports_function_calling: true - name: meta-llama/llama-3.1-70b-instruct max_input_tokens: 131072 input_price: 0.75 output_price: 0.75 supports_function_calling: true - name: meta-llama/llama-3.1-8b-instruct max_input_tokens: 131072 input_price: 0.09 output_price: 0.09 - name: meta-llama/llama-3.2-90b-vision-instruct max_input_tokens: 131072 input_price: 0.35 output_price: 0.4 supports_vision: true - name: meta-llama/llama-3.2-11b-vision-instruct max_input_tokens: 131072 input_price: 0.055 output_price: 0.055 supports_vision: true - name: meta-llama/llama-3.2-3b-instruct max_input_tokens: 131072 input_price: 0.03 output_price: 0.05 - name: meta-llama/llama-3.2-1b-instruct max_input_tokens: 131072 input_price: 0.01 output_price: 0.02 - name: mistralai/mistral-large max_input_tokens: 128000 input_price: 2 output_price: 6 supports_function_calling: true - name: mistralai/mistral-small max_input_tokens: 32000 input_price: 0.2 output_price: 0.6 supports_function_calling: true - name: mistralai/ministral-8b max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 supports_function_calling: true - name: mistralai/ministral-3b max_input_tokens: 128000 input_price: 0.04 output_price: 0.04 supports_function_calling: true - name: mistralai/mistral-nemo max_input_tokens: 128000 input_price: 0.13 output_price: 0.13 supports_function_calling: true - name: mistralai/codestral-mamba max_input_tokens: 256000 input_price: 0.25 output_price: 0.25 - name: mistralai/pixtral-12b max_input_tokens: 4096 input_price: 0.1 output_price: 0.1 supports_vision: true - name: ai21/jamba-1-5-large max_input_tokens: 256000 input_price: 2 output_price: 8 supports_function_calling: true - name: ai21/jamba-1-5-mini max_input_tokens: 256000 input_price: 0.2 output_price: 0.4 supports_function_calling: true - name: cohere/command-r-plus-08-2024 max_input_tokens: 128000 input_price: 2.5 output_price: 10 supports_function_calling: true - name: cohere/command-r-plus max_input_tokens: 128000 input_price: 2.5 output_price: 10 supports_function_calling: true - name: cohere/command-r-08-2024 max_input_tokens: 128000 input_price: 0.15 output_price: 0.6 supports_function_calling: true - name: cohere/command-r max_input_tokens: 128000 input_price: 0.15 output_price: 0.6 supports_function_calling: true - name: deepseek/deepseek-chat max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 supports_function_calling: true - name: perplexity/llama-3.1-sonar-huge-128k-online max_input_tokens: 127072 input_price: 5 output_price: 5 - name: perplexity/llama-3.1-sonar-large-128k-online max_input_tokens: 127072 input_price: 1 output_price: 1 - name: perplexity/llama-3.1-sonar-large-128k-chat max_input_tokens: 131072 input_price: 1 output_price: 1 - name: perplexity/llama-3.1-sonar-small-128k-online max_input_tokens: 127072 input_price: 0.2 output_price: 0.2 - name: perplexity/llama-3.1-sonar-small-128k-chat max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 - name: 01-ai/yi-large max_input_tokens: 32768 input_price: 3 output_price: 3 - name: 01-ai/yi-large-fc max_input_tokens: 16384 input_price: 3 output_price: 3 supports_function_calling: true - name: 01-ai/yi-vision max_input_tokens: 4096 input_price: 0.84 output_price: 0.84 supports_vision: true - name: microsoft/phi-3.5-mini-128k-instruct max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 - name: microsoft/phi-3-medium-128k-instruct max_input_tokens: 128000 input_price: 1 output_price: 1 - name: microsoft/phi-3-mini-128k-instruct max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 - name: qwen/qwen-2.5-72b-instruct max_input_tokens: 131072 input_price: 0.35 output_price: 0.4 - name: qwen/qwen-2-vl-72b-instruct max_input_tokens: 32768 input_price: 0.4 output_price: 0.4 - name: nvidia/llama-3.1-nemotron-70b-instruct max_input_tokens: 131072 input_price: 0.35 output_price: 0.4 supports_function_calling: true - name: x-ai/grok-beta max_input_tokens: 32768 input_price: 5 output_price: 10 # Links # - https://siliconflow.cn/zh-cn/pricing#siliconcloud-1417 # - https://docs.siliconflow.cn/api-reference/chat-completions/chat-completions - platform: siliconflow models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct max_input_tokens: 32768 input_price: 2.94 output_price: 2.94 - name: meta-llama/Meta-Llama-3.1-70B-Instruct max_input_tokens: 32768 input_price: 0.578 output_price: 0.578 - name: meta-llama/Meta-Llama-3.1-8B-Instruct max_input_tokens: 32768 input_price: 0 output_price: 0 - name: Qwen/Qwen2.5-72B-Instruct max_input_tokens: 32768 input_price: 0.578 output_price: 0.578 supports_function_calling: true - name: Qwen/Qwen2.5-7B-Instruct max_input_tokens: 32768 input_price: 0 output_price: 0 supports_function_calling: true - name: Qwen/Qwen2.5-Coder-7B-Instruct max_input_tokens: 32768 input_price: 0 output_price: 0 - name: google/gemma-2-27b-it max_input_tokens: 8192 input_price: 0.176 output_price: 0.176 - name: google/gemma-2-9b-it max_input_tokens: 8192 input_price: 0 output_price: 0 - name: deepseek-ai/DeepSeek-V2.5 max_input_tokens: 32768 input_price: 0.186 output_price: 0.186 supports_function_calling: true - name: nvidia/Llama-3.1-Nemotron-70B-Instruct max_input_tokens: 32768 input_price: 0.578 output_price: 0.578 - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: BAAI/bge-large-zh-v1.5 type: embedding input_price: 0 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: BAAI/bge-m3 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 100 - name: BAAI/bge-reranker-v2-m3 type: reranker max_input_tokens: 8192 input_price: 0 # Links: # - https://docs.together.ai/docs/inference-models # - https://docs.together.ai/docs/embedding-models # - https://www.together.ai/pricing - platform: together models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo max_input_tokens: 32768 input_price: 3.5 output_price: 3.5 supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo max_input_tokens: 32768 input_price: 0.88 output_price: 0.88 supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo max_input_tokens: 32768 input_price: 0.18 output_price: 0.18 supports_function_calling: true - name: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo max_input_tokens: 131072 input_price: 0.88 output_price: 0.88 supports_vision: true - name: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo max_input_tokens: 131072 input_price: 0.18 output_price: 0.18 supports_vision: true - name: meta-llama/Llama-3.2-3B-Instruct-Turbo max_input_tokens: 131072 input_price: 0.06 output_price: 0.06 - name: WhereIsAI/UAE-Large-V1 type: embedding input_price: 0.016 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0.016 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: Salesforce/Llama-Rank-V1 type: reranker max_input_tokens: 8192 input_price: 0.1 # Links: # - https://jina.ai/ # - https://api.jina.ai/redoc - platform: jina models: - name: jina-embeddings-v3 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 100 - name: jina-colbert-v2 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: jina-clip-v1 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: jina-colbert-v2 type: reranker max_input_tokens: 8192 input_price: 0 - name: jina-reranker-v2-base-multilingual type: reranker max_input_tokens: 8192 input_price: 0 # Links: # - https://docs.voyageai.com/docs/embeddings # - https://docs.voyageai.com/docs/pricing # - https://docs.voyageai.com/reference/ - platform: voyageai models: - name: voyage-3 type: embedding max_input_tokens: 320000 input_price: 0.06 max_tokens_per_chunk: 32000 default_chunk_size: 2000 max_batch_size: 128 - name: voyage-3-lite type: embedding max_input_tokens: 1000000 input_price: 0.02 max_tokens_per_chunk: 32000 default_chunk_size: 1000 max_batch_size: 128 - name: voyage-multilingual-2 type: embedding max_input_tokens: 120000 input_price: 0.12 max_tokens_per_chunk: 32000 default_chunk_size: 2000 max_batch_size: 128 - name: voyage-code-2 type: embedding max_input_tokens: 120000 input_price: 0.12 max_tokens_per_chunk: 16000 default_chunk_size: 3000 max_batch_size: 128 - name: rerank-2 type: reranker max_input_tokens: 16000 input_price: 0.05 - name: rerank-2-lite type: reranker max_input_tokens: 8000 input_price: 0.02