## Copyright 2022 The IREE Authors # # Licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """Defines the groups of models.""" from e2e_test_framework.definitions import common_definitions from e2e_test_framework.models import ( matmul, tflite_models, torch_models, tf_models, jax_models, ) # x86 models, single batch. # A list of models with thread configurations. # Note `0` represents sync execution. X86_64_BENCHMARK_CONFIG = [ # Tiny models. common_definitions.CpuBenchmarkConfig( model=tflite_models.PERSON_DETECT_INT8, threads=[0, 1] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILENET_V3SMALL, threads=[0, 1] ), # Small models. common_definitions.CpuBenchmarkConfig( model=tflite_models.DEEPLABV3_FP32, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.EFFICIENTNET_INT8, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILENET_V1, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILENET_V2, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILENET_V2_INT8, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILESSD_FP32, threads=[1, 8] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.POSENET_FP32, threads=[1, 8] ), # Medium models. common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILEBERT_FP16, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILEBERT_FP32, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tflite_models.MOBILEBERT_INT8, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tf_models.EFFICIENTNET_V2_S_FP32, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tf_models.MINILM_L12_H384_UNCASED_INT32_SEQLEN128, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tf_models.GPT2_117M_1x4_FP32_TF, threads=[1, 15] ), common_definitions.CpuBenchmarkConfig( model=tf_models.GPT2_117M_1x1_FP32_TF, threads=[1, 15] ), # Large models. common_definitions.CpuBenchmarkConfig( model=tf_models.BERT_FOR_MASKED_LM_FP32_SEQLEN512, threads=[30] ), common_definitions.CpuBenchmarkConfig( model=tf_models.BERT_LARGE_TF_FP32_SEQLEN384, threads=[30] ), common_definitions.CpuBenchmarkConfig( model=torch_models.BERT_LARGE_384_FP32_TORCH_BATCHES[1], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=torch_models.FALCON7B_1X100XI64_GPTQ_TORCH, threads=[30] ), common_definitions.CpuBenchmarkConfig( model=torch_models.FALCON7B_INT4_1X100XI64_GPTQ_TORCH, threads=[30] ), ] X86_64_BENCHMARK_CONFIG_LARGE = [ common_definitions.CpuBenchmarkConfig( model=jax_models.BERT_LARGE_FP32_JAX_384XI32_BATCHES[1], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.BERT_LARGE_FP32_JAX_384XI32_BATCHES[32], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.BERT_LARGE_FP32_JAX_384XI32_BATCHES[64], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=torch_models.BERT_LARGE_384_FP32_TORCH_BATCHES[24], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=torch_models.BERT_LARGE_384_FP32_TORCH_BATCHES[48], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.RESNET50_FP32_JAX_3X224X224XF32_BATCHES[1], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.RESNET50_FP32_JAX_3X224X224XF32_BATCHES[64], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.RESNET50_FP32_JAX_3X224X224XF32_BATCHES[128], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.T5_LARGE_FP32_JAX_512XI32_BATCHES[1], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.T5_LARGE_FP32_JAX_512XI32_BATCHES[16], threads=[30] ), common_definitions.CpuBenchmarkConfig( model=jax_models.T5_LARGE_FP32_JAX_512XI32_BATCHES[32], threads=[30] ), ] # Microkernels. MICRO_MATMUL = [ matmul.MATMUL_3456X1024X2048_FP16_MLIR, matmul.MATMUL_3456X1024X2048_FP32_MLIR, matmul.MATMUL_2560X2560X2560_FP16_MLIR, matmul.MATMUL_2560X2560X2560_FP32_MLIR, matmul.MATMUL_2564x2564x2564_FP32_MLIR, matmul.MATMUL_2562x2564x2562_FP32_MLIR, matmul.MATMUL_2562x2561x2561_FP32_MLIR, matmul.MATMUL_123x2561x2561_FP32_MLIR, ] MICRO_MATMUL_SPLITK = [ matmul.MATMUL_128X256X8192_FP16_MLIR, matmul.MATMUL_128X256X8192_FP32_MLIR, ] # GPU model groups. CUDA_MODELS = [ tf_models.EFFICIENTNET_V2_S_FP32, tf_models.MINILM_L12_H384_UNCASED_INT32_SEQLEN128, tf_models.BERT_FOR_MASKED_LM_FP32_SEQLEN512, tf_models.BERT_LARGE_TF_FP32_SEQLEN384, # PyTorch model are disabled due to https://github.com/openxla/iree/issues/14993. # torch_models.MODEL_CLIP_TEXT_SEQLEN64_FP32_TORCH, # torch_models.MODEL_UNET_2D_FP32_TORCH, ] VULKAN_MODELS = [ # PyTorch model are disabled due to https://github.com/openxla/iree/issues/14993. # torch_models.MODEL_CLIP_TEXT_SEQLEN64_FP32_TORCH, # torch_models.MODEL_UNET_2D_FP32_TORCH, ]