// This file defines protos that store the results of autotuning various
// operations.
//
// They are in proto format because we want to log them structured. They offer
// tremendous statistical, testing, and debugging value.
syntax = "proto3";

package xla;

import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "tsl/protobuf/dnn.proto";

message CudnnVersion {
  int32 major = 1;
  int32 minor = 2;
  int32 patch = 3;
}

message ComputeCapability {
  int32 major = 1;
  int32 minor = 2;
}

message AutotuneResult {
  enum FailureKind {
    UNKNOWN = 0;

    // Algorithm wrote memory outside its output buffers.
    REDZONE_MODIFIED = 1;

    // Algorithm gave a different result from a reference algorithm.
    WRONG_RESULT = 2;

    // Algorithm was rejected for failing to run or for known bugs.
    DISQUALIFIED = 3;
  }

  message FailureResult {
    FailureKind kind = 1;
    string msg = 2;

    // For failure_kind == WRONG_RESULT, this field indicates the reference
    // configuration that we compared against.
    //
    // Note that the reference algorithm isn't always correct.  However,
    // empirically it's more correct, as it's "algo 0", less fancy than the
    // compared one.
    oneof key {
      ConvKey reference_conv = 11;
      GemmKey reference_gemm = 12;
      CudaConvPlanKey reference_cuda_conv_plan = 14;
      stream_executor.dnn.AlgorithmProto reference_algorithm = 15;
    }

    int64 buffer_address = 13;
  }

  // Legacy and unused in new data; superseded by AlgorithmProto.
  message ConvKey {
    int64 algorithm = 1;
    bool tensor_ops_enabled = 2;
  }

  message GemmKey {
    int64 algorithm = 1;
  }

  // Legacy and unused in new data; superseded by AlgorithmProto.
  message CudaConvPlanKey {
    string exec_plan_id = 1;
  }

  // If you don't need a proto in your code, please use TritonGemmConfig instead
  // of using this proto directly.
  message TritonGemmKey {
    int64 block_m = 1;
    int64 block_n = 2;
    int64 block_k = 3;
    int64 split_k = 4;
    int64 num_stages = 5;
    int64 num_warps = 6;
    int64 num_ctas = 7;
  }

  int64 scratch_bytes = 8;
  google.protobuf.Duration run_time = 9;

  FailureResult failure = 7;

  oneof key {
    ConvKey conv = 5;
    GemmKey gemm = 6;
    TritonGemmKey triton = 17;
    CudaConvPlanKey cuda_conv_plan = 15;
    stream_executor.dnn.AlgorithmProto algorithm = 16;
  }

  // Next ID: 17
}

message AutotuningLog {
  google.protobuf.Any instr = 1;

  // Records all auto-tuning results per algorithm.
  repeated AutotuneResult results = 2;

  CudnnVersion cudnn_version = 3;
  ComputeCapability compute_capability = 4;

  // stream_executor::DeviceDescription::pci_bus_id.
  string device_pci_bus_id = 5;

  string blas_version = 6;

  string fusion_name = 7;

  int64 fusion_count = 8;

  // Next ID: 9
}