// This file defines protos that store the results of autotuning various // operations. // // They are in proto format because we want to log them structured. They offer // tremendous statistical, testing, and debugging value. syntax = "proto3"; package xla; import "google/protobuf/any.proto"; import "google/protobuf/duration.proto"; import "tsl/protobuf/dnn.proto"; message CudnnVersion { int32 major = 1; int32 minor = 2; int32 patch = 3; } message ComputeCapability { int32 major = 1; int32 minor = 2; } message AutotuneResult { enum FailureKind { UNKNOWN = 0; // Algorithm wrote memory outside its output buffers. REDZONE_MODIFIED = 1; // Algorithm gave a different result from a reference algorithm. WRONG_RESULT = 2; // Algorithm was rejected for failing to run or for known bugs. DISQUALIFIED = 3; } message FailureResult { FailureKind kind = 1; string msg = 2; // For failure_kind == WRONG_RESULT, this field indicates the reference // configuration that we compared against. // // Note that the reference algorithm isn't always correct. However, // empirically it's more correct, as it's "algo 0", less fancy than the // compared one. oneof key { ConvKey reference_conv = 11; GemmKey reference_gemm = 12; CudaConvPlanKey reference_cuda_conv_plan = 14; stream_executor.dnn.AlgorithmProto reference_algorithm = 15; } int64 buffer_address = 13; } // Legacy and unused in new data; superseded by AlgorithmProto. message ConvKey { int64 algorithm = 1; bool tensor_ops_enabled = 2; } message GemmKey { int64 algorithm = 1; } // Legacy and unused in new data; superseded by AlgorithmProto. message CudaConvPlanKey { string exec_plan_id = 1; } // If you don't need a proto in your code, please use TritonGemmConfig instead // of using this proto directly. message TritonGemmKey { int64 block_m = 1; int64 block_n = 2; int64 block_k = 3; int64 split_k = 4; int64 num_stages = 5; int64 num_warps = 6; int64 num_ctas = 7; } int64 scratch_bytes = 8; google.protobuf.Duration run_time = 9; FailureResult failure = 7; oneof key { ConvKey conv = 5; GemmKey gemm = 6; TritonGemmKey triton = 17; CudaConvPlanKey cuda_conv_plan = 15; stream_executor.dnn.AlgorithmProto algorithm = 16; } // Next ID: 17 } message AutotuningLog { google.protobuf.Any instr = 1; // Records all auto-tuning results per algorithm. repeated AutotuneResult results = 2; CudnnVersion cudnn_version = 3; ComputeCapability compute_capability = 4; // stream_executor::DeviceDescription::pci_bus_id. string device_pci_bus_id = 5; string blas_version = 6; string fusion_name = 7; int64 fusion_count = 8; // Next ID: 9 }