syntax = "proto3";
package pdpb;

import "metapb.proto";
import "eraftpb.proto";
import "raft_serverpb.proto";
import "replication_modepb.proto";

import "gogoproto/gogo.proto";
import "rustproto.proto";

option (gogoproto.sizer_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (rustproto.lite_runtime_all) = true;

option java_package = "org.tikv.kvproto";

service PD {
    // GetMembers get the member list of this cluster. It does not require
    // the cluster_id in request matchs the id of this cluster.
    rpc GetMembers(GetMembersRequest) returns (GetMembersResponse) {}

    rpc Tso(stream TsoRequest) returns (stream TsoResponse) {}

    rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse) {}

    rpc IsBootstrapped(IsBootstrappedRequest) returns (IsBootstrappedResponse) {}

    rpc AllocID(AllocIDRequest) returns (AllocIDResponse) {}

    rpc GetStore(GetStoreRequest) returns (GetStoreResponse) {}

    rpc PutStore(PutStoreRequest) returns (PutStoreResponse) {}

    rpc GetAllStores(GetAllStoresRequest) returns (GetAllStoresResponse) {}

    rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {}

    rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {}

    rpc GetRegion(GetRegionRequest) returns (GetRegionResponse) {}

    rpc GetPrevRegion(GetRegionRequest) returns (GetRegionResponse) {}

    rpc GetRegionByID(GetRegionByIDRequest) returns (GetRegionResponse) {}

    rpc ScanRegions(ScanRegionsRequest) returns (ScanRegionsResponse) {}

    rpc AskSplit(AskSplitRequest) returns (AskSplitResponse) {
        // Use AskBatchSplit instead.
        option deprecated = true;
    }

    rpc ReportSplit(ReportSplitRequest) returns (ReportSplitResponse) {
        // Use ResportBatchSplit instead.
        option deprecated = true;
    }

    rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {}

    rpc ReportBatchSplit(ReportBatchSplitRequest) returns (ReportBatchSplitResponse) {}

    rpc GetClusterConfig(GetClusterConfigRequest) returns (GetClusterConfigResponse) {}

    rpc PutClusterConfig(PutClusterConfigRequest) returns (PutClusterConfigResponse) {}

    rpc ScatterRegion(ScatterRegionRequest) returns (ScatterRegionResponse) {}

    rpc GetGCSafePoint(GetGCSafePointRequest) returns (GetGCSafePointResponse) {}

    rpc UpdateGCSafePoint(UpdateGCSafePointRequest) returns (UpdateGCSafePointResponse) {}

    rpc UpdateServiceGCSafePoint(UpdateServiceGCSafePointRequest) returns (UpdateServiceGCSafePointResponse) {}

    rpc SyncRegions(stream SyncRegionRequest) returns (stream SyncRegionResponse) {}

    rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {}

    rpc SyncMaxTS(SyncMaxTSRequest) returns (SyncMaxTSResponse) {}

    rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {}

    rpc SplitAndScatterRegions(SplitAndScatterRegionsRequest) returns (SplitAndScatterRegionsResponse) {}

    rpc GetDCLocationInfo(GetDCLocationInfoRequest) returns (GetDCLocationInfoResponse) {}
}

message RequestHeader {
    // cluster_id is the ID of the cluster which be sent to.
    uint64 cluster_id = 1;
    // sender_id is the ID of the sender server, also member ID or etcd ID.
    uint64 sender_id = 2;
}

message ResponseHeader {
    // cluster_id is the ID of the cluster which sent the response.
    uint64 cluster_id = 1;
    Error error = 2;
}

enum ErrorType {
    OK = 0;
    UNKNOWN = 1;
    NOT_BOOTSTRAPPED = 2;
    STORE_TOMBSTONE = 3;
    ALREADY_BOOTSTRAPPED = 4;
    INCOMPATIBLE_VERSION = 5;
    REGION_NOT_FOUND = 6;
}

message Error {
    ErrorType type = 1;
    string message = 2;
}

message TsoRequest {
    RequestHeader header = 1;

    uint32 count = 2;
    string dc_location = 3;
}

message Timestamp {
    int64 physical = 1;
    int64 logical = 2;
    // Number of suffix bits used for global distinction,
    // PD client will use this to compute a TSO's logical part.
    uint32 suffix_bits = 3;}

message TsoResponse {
    ResponseHeader header = 1;

    uint32 count = 2;
    Timestamp timestamp = 3;
}

message BootstrapRequest {
    RequestHeader header = 1;

    metapb.Store store = 2;
    metapb.Region region = 3;
}

message BootstrapResponse {
    ResponseHeader header = 1;
    replication_modepb.ReplicationStatus replication_status = 2;
}

message IsBootstrappedRequest {
    RequestHeader header = 1;
}

message IsBootstrappedResponse {
    ResponseHeader header = 1;

    bool bootstrapped = 2;
}

message AllocIDRequest {
    RequestHeader header = 1;
}

message AllocIDResponse {
    ResponseHeader header = 1;

    uint64 id = 2;
}

message GetStoreRequest {
    RequestHeader header = 1;

    uint64 store_id = 2;
}

message GetStoreResponse {
    ResponseHeader header = 1;

    metapb.Store store = 2;
    StoreStats stats = 3;
}

message PutStoreRequest {
    RequestHeader header = 1;

    metapb.Store store = 2;
}

message PutStoreResponse {
    ResponseHeader header = 1;
    replication_modepb.ReplicationStatus replication_status = 2;
}

message GetAllStoresRequest {
    RequestHeader header = 1;
    // Do NOT return tombstone stores if set to true.
    bool exclude_tombstone_stores = 2;
}

message GetAllStoresResponse {
    ResponseHeader header = 1;

    repeated metapb.Store stores = 2;
}

message GetRegionRequest {
    RequestHeader header = 1;

    bytes region_key = 2;
}

message GetRegionResponse {
    reserved 4;

    ResponseHeader header = 1;

    metapb.Region region = 2;
    metapb.Peer leader = 3;
    // Leader considers that these peers are down.
    repeated PeerStats down_peers = 5;
    // Pending peers are the peers that the leader can't consider as
    // working followers.
    repeated metapb.Peer pending_peers = 6;
}

message GetRegionByIDRequest {
    RequestHeader header = 1;

    uint64 region_id = 2;
}

// Use GetRegionResponse as the response of GetRegionByIDRequest.

message ScanRegionsRequest {
    RequestHeader header = 1;

    bytes start_key = 2;
    int32 limit = 3; // no limit when limit <= 0.
    bytes end_key = 4; // end_key is +inf when it is empty.
}

message Region {
    metapb.Region region = 1;
    metapb.Peer leader = 2;
    // Leader considers that these peers are down.
    repeated PeerStats down_peers = 3;
    // Pending peers are the peers that the leader can't consider as
    // working followers.
    repeated metapb.Peer pending_peers = 4;
}

message ScanRegionsResponse {
    ResponseHeader header = 1;

    // Keep for backword compatibability.
    repeated metapb.Region region_metas = 2;
    repeated metapb.Peer leaders = 3;

    // Extended region info with down/pending peers.
    repeated Region regions = 4;
}

message GetClusterConfigRequest {
    RequestHeader header = 1;
}

message GetClusterConfigResponse {
    ResponseHeader header = 1;

    metapb.Cluster cluster = 2;
}

message PutClusterConfigRequest {
    RequestHeader header = 1;

    metapb.Cluster cluster = 2;
}

message PutClusterConfigResponse {
    ResponseHeader header = 1;
}

message Member {
    // name is the name of the PD member.
    string name = 1;
    // member_id is the unique id of the PD member.
    uint64 member_id = 2;
    repeated string peer_urls = 3;
    repeated string client_urls = 4;
    int32 leader_priority = 5;
    string deploy_path = 6;
    string binary_version = 7;
    string git_hash = 8;
    string dc_location = 9;
}

message GetMembersRequest {
    RequestHeader header = 1;
}

message GetMembersResponse {
    ResponseHeader header = 1;

    repeated Member members = 2;
    Member leader = 3;
    Member etcd_leader = 4;
    map<string, Member> tso_allocator_leaders = 5;
}

message PeerStats {
    metapb.Peer peer = 1;
    uint64 down_seconds = 2;
}

message RegionHeartbeatRequest {
    RequestHeader header = 1;

    metapb.Region region = 2;
    // Leader Peer sending the heartbeat.
    metapb.Peer leader = 3;
    // Leader considers that these peers are down.
    repeated PeerStats down_peers = 4;
    // Pending peers are the peers that the leader can't consider as
    // working followers.
    repeated metapb.Peer pending_peers = 5;
    // Bytes read/written during this period.
    uint64 bytes_written = 6;
    uint64 bytes_read = 7;
    // Keys read/written during this period.
    uint64 keys_written = 8;
    uint64 keys_read = 9;
    // Approximate region size.
    uint64 approximate_size = 10;
    reserved 11;
    // Actually reported time interval
    TimeInterval interval = 12;
    // Approximate number of keys.
    uint64 approximate_keys = 13;
    // Term is the term of raft group.
    uint64 term = 14;
    replication_modepb.RegionReplicationStatus replication_status = 15;
    // QueryStats reported write query stats, and there are read query stats in store heartbeat
    QueryStats query_stats = 16;
    // cpu_usage is the CPU time usage of the leader region since the last heartbeat,
    // which is calculated by cpu_time_delta/heartbeat_reported_interval.
    uint64 cpu_usage = 17;
}

message ChangePeer {
    metapb.Peer peer = 1;
    eraftpb.ConfChangeType change_type = 2;
}

message ChangePeerV2 {
    // If changes is empty, it means that to exit joint state.
    repeated ChangePeer changes = 1;
}

message TransferLeader {
    metapb.Peer peer = 1;
    repeated metapb.Peer peers = 2;
}

message Merge {
    metapb.Region target = 1;
}

message SplitRegion {
    CheckPolicy policy = 1;
    repeated bytes keys = 2;
}

enum CheckPolicy {
    SCAN = 0;
    APPROXIMATE = 1;
    USEKEY = 2;
}

message RegionHeartbeatResponse {
    ResponseHeader header = 1;

    // Notice, Pd only allows handling reported epoch >= current pd's.
    // Leader peer reports region status with RegionHeartbeatRequest
    // to pd regularly, pd will determine whether this region
    // should do ChangePeer or not.
    // E,g, max peer number is 3, region A, first only peer 1 in A.
    // 1. Pd region state -> Peers (1), ConfVer (1).
    // 2. Leader peer 1 reports region state to pd, pd finds the
    // peer number is < 3, so first changes its current region
    // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2.
    // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2),
    // pd updates its state -> Peers (1, 2), ConfVer (2).
    // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange
    // finished, pd stills responses ChangePeer Adding 2, of course, we must
    // guarantee the second ChangePeer can't be applied in TiKV.
    ChangePeer change_peer = 2;
    // Pd can return transfer_leader to let TiKV does leader transfer itself.
    TransferLeader transfer_leader = 3;
    // ID of the region
    uint64 region_id = 4;
    metapb.RegionEpoch region_epoch = 5;
    // Leader of the region at the moment of the corresponding request was made.
    metapb.Peer target_peer = 6;
    Merge merge = 7;
    // PD sends split_region to let TiKV split a region into two regions.
    SplitRegion split_region = 8;
    // Multiple change peer operations atomically.
    // Note: PD can use both ChangePeer and ChangePeerV2 at the same time
    //       (not in the same RegionHeartbeatResponse).
    //       Now, PD use ChangePeerV2 only for replacing peers.
    ChangePeerV2 change_peer_v2 = 9;
}

message AskSplitRequest {
    RequestHeader header = 1;

    metapb.Region region = 2;
}

message AskSplitResponse {
    ResponseHeader header = 1;

    // We split the region into two, first uses the origin
    // parent region id, and the second uses the new_region_id.
    // We must guarantee that the new_region_id is global unique.
    uint64 new_region_id = 2;
    // The peer ids for the new split region.
    repeated uint64 new_peer_ids = 3;
}

message ReportSplitRequest {
    RequestHeader header = 1;

    metapb.Region left = 2;
    metapb.Region right = 3;
}

message ReportSplitResponse {
    ResponseHeader header = 1;
}

message AskBatchSplitRequest {
    RequestHeader header = 1;

    metapb.Region region = 2;
    uint32 split_count = 3;
}

message SplitID {
    uint64 new_region_id = 1;
    repeated uint64 new_peer_ids = 2;
}

message AskBatchSplitResponse {
    ResponseHeader header = 1;

    repeated SplitID ids = 2;
}

message ReportBatchSplitRequest {
    RequestHeader header = 1;

    repeated metapb.Region regions = 2;
}

message ReportBatchSplitResponse {
    ResponseHeader header = 1;
}

message TimeInterval {
    // The unix timestamp in seconds of the start of this period.
    uint64 start_timestamp = 1;
    // The unix timestamp in seconds of the end of this period.
    uint64 end_timestamp = 2;
}

message RecordPair {
    string key = 1;
    uint64 value = 2;
}

message PeerStat {
    uint64 region_id = 1;
    uint64 read_keys = 2;
    uint64 read_bytes = 3;
    QueryStats query_stats = 4;
}

message StoreStats {
    uint64 store_id = 1;
    // Capacity for the store.
    uint64 capacity = 2;
    // Available size for the store.
    uint64 available = 3;
    // Total region count in this store.
    uint32 region_count = 4;
    // Current sending snapshot count.
    uint32 sending_snap_count = 5;
    // Current receiving snapshot count.
    uint32 receiving_snap_count = 6;
    // When the store is started (unix timestamp in seconds).
    uint32 start_time = 7;
    // How many region is applying snapshot.
    uint32 applying_snap_count = 8;
    // If the store is busy
    bool is_busy = 9;
    // Actually used space by db
    uint64 used_size = 10;
    // Bytes written for the store during this period.
    uint64 bytes_written = 11;
    // Keys written for the store during this period.
    uint64 keys_written = 12;
    // Bytes read for the store during this period.
    uint64 bytes_read = 13;
    // Keys read for the store during this period.
    uint64 keys_read = 14;
    // Actually reported time interval
    TimeInterval interval = 15;
    // Threads' CPU usages in the store
    repeated RecordPair cpu_usages = 16;
    // Threads' read disk I/O rates in the store
    repeated RecordPair read_io_rates = 17;
    // Threads' write disk I/O rates in the store
    repeated RecordPair write_io_rates = 18;
    // Operations' latencies in the store
    repeated RecordPair op_latencies = 19;
    // Hot peer stat in the store
    repeated PeerStat peer_stats = 20;
    // Store query stats
    QueryStats query_stats = 21;
    // Score that represents the speed of the store, ranges in [1, 100], lower is better.
    uint64 slow_score = 22;
    // Damaged regions on the store that need to be removed by PD.
    repeated uint64 damaged_regions_id = 23;
}

message PeerReport {
    raft_serverpb.RaftLocalState raft_state = 1;
    raft_serverpb.RegionLocalState region_state = 2;
}

message StoreReport {
    repeated PeerReport peer_reports = 1;
}

message StoreHeartbeatRequest {
    RequestHeader header = 1;

    StoreStats stats = 2;
    // Detailed store report that is only filled up on PD's demand for online unsafe recover.
    StoreReport store_report = 3;
}

message RecoveryPlan {
    repeated metapb.Region creates = 1;
    repeated metapb.Region updates = 2;
    repeated uint64 deletes = 3;
}

message StoreHeartbeatResponse {
    ResponseHeader header = 1;
    replication_modepb.ReplicationStatus replication_status = 2;
    string cluster_version = 3;
    bool require_detailed_report = 4;
    RecoveryPlan plan = 5;
}

message ScatterRegionRequest {
    RequestHeader header = 1;

    uint64 region_id = 2 [deprecated=true];

    // PD will use these region information if it can't find the region.
    // For example, the region is just split and hasn't report to PD yet.
    metapb.Region region = 3;
    metapb.Peer   leader = 4;

    // If group is defined, the regions with the same group would be scattered as a whole group.
    // If not defined, the regions would be scattered in a cluster level.
    string group = 5;

    // If regions_id is defined, the region_id would be ignored.
    repeated uint64 regions_id = 6;
    uint64 retry_limit = 7;
}

message ScatterRegionResponse {
    ResponseHeader header = 1;
    uint64 finished_percentage = 2;
}

message GetGCSafePointRequest {
    RequestHeader header = 1;
}

message GetGCSafePointResponse {
    ResponseHeader header = 1;

    uint64 safe_point = 2;
}

message UpdateGCSafePointRequest {
    RequestHeader header = 1;

    uint64 safe_point = 2;
}

message UpdateGCSafePointResponse {
    ResponseHeader header = 1;

    uint64 new_safe_point = 2;
}

message UpdateServiceGCSafePointRequest {
    RequestHeader header = 1;

    bytes service_id = 2;
    int64 TTL = 3;
    uint64 safe_point = 4;
}

message UpdateServiceGCSafePointResponse {
    ResponseHeader header = 1;

    bytes service_id = 2;
    int64 TTL = 3;
    uint64 min_safe_point = 4;
}

message RegionStat {
    // Bytes read/written during this period.
    uint64 bytes_written = 1;
    uint64 bytes_read = 2;
    // Keys read/written during this period.
    uint64 keys_written = 3;
    uint64 keys_read = 4;
}

message SyncRegionRequest{
    RequestHeader header = 1;
    Member member = 2;
    // the follower PD will use the start index to locate historical changes
    // that require synchronization.
    uint64 start_index = 3;
}

message SyncRegionResponse{
    ResponseHeader header = 1;
    // the leader PD will send the repsonds include
    // changed regions records and the index of the first record.
    repeated metapb.Region regions = 2;
    uint64 start_index = 3;
    repeated RegionStat region_stats = 4;
    repeated metapb.Peer region_leaders = 5;
}

message GetOperatorRequest {
   RequestHeader header = 1;
   uint64 region_id = 2;
}

enum OperatorStatus {
	SUCCESS = 0;
	TIMEOUT = 1;
	CANCEL  = 2;
	REPLACE = 3;
	RUNNING = 4;
}

message GetOperatorResponse {
    ResponseHeader header = 1;
    uint64 region_id = 2;
    bytes desc = 3;
    OperatorStatus status = 4;
    bytes kind = 5;
}

message SyncMaxTSRequest {
    RequestHeader header = 1;
    Timestamp max_ts = 2;
    // If skip_check is true, the sync will try to write the max_ts without checking whether it's bigger.
    bool skip_check = 3;
}

message SyncMaxTSResponse {
    ResponseHeader header = 1;
    Timestamp max_local_ts = 2;
    repeated string synced_dcs = 3;
}

message SplitRegionsRequest {
    RequestHeader header = 1;
    repeated bytes split_keys = 2;
    uint64 retry_limit = 3;
}

message SplitRegionsResponse {
    ResponseHeader header = 1;
    uint64 finished_percentage = 2;
    repeated uint64 regions_id = 3;
}

message SplitAndScatterRegionsRequest {
    RequestHeader header = 1;
    repeated bytes split_keys = 2;
    string group = 3;
    uint64 retry_limit = 4;
}

message SplitAndScatterRegionsResponse {
    ResponseHeader header = 1;
    uint64 split_finished_percentage = 2;
    uint64 scatter_finished_percentage = 3;
    repeated uint64 regions_id = 4;
}

message GetDCLocationInfoRequest {
    RequestHeader header = 1;
    string dc_location = 2;
}

message GetDCLocationInfoResponse {
    ResponseHeader header = 1;
    // suffix sign
    int32 suffix = 2;
    // max_ts will be included into this response if PD leader think the receiver needs,
    // which it's set when the number of the max suffix bits changes.
    Timestamp max_ts = 3;
}

message QueryStats {
    uint64 GC = 1;
    uint64 Get = 2;
    uint64 Scan = 3;
    uint64 Coprocessor = 4;
    uint64 Delete = 5;
    uint64 DeleteRange = 6;
    uint64 Put = 7;
    uint64 Prewrite = 8;
    uint64 AcquirePessimisticLock = 9;
    uint64 Commit = 10;
    uint64 Rollback = 11;
}

enum QueryKind {
    Others = 0;
    GC = 1;
    Get = 2;
    Scan = 3;
    Coprocessor = 4;
    Delete = 5;
    DeleteRange = 6;
    Put = 7;
    Prewrite = 8;
    AcquirePessimisticLock = 9;
    Commit = 10;
    Rollback = 11;
}