syntax = "proto3"; package pdpb; import "metapb.proto"; import "eraftpb.proto"; import "raft_serverpb.proto"; import "replication_modepb.proto"; import "gogoproto/gogo.proto"; import "rustproto.proto"; option (gogoproto.sizer_all) = true; option (gogoproto.marshaler_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; option java_package = "org.tikv.kvproto"; service PD { // GetMembers get the member list of this cluster. It does not require // the cluster_id in request matchs the id of this cluster. rpc GetMembers(GetMembersRequest) returns (GetMembersResponse) {} rpc Tso(stream TsoRequest) returns (stream TsoResponse) {} rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse) {} rpc IsBootstrapped(IsBootstrappedRequest) returns (IsBootstrappedResponse) {} rpc AllocID(AllocIDRequest) returns (AllocIDResponse) {} rpc GetStore(GetStoreRequest) returns (GetStoreResponse) {} rpc PutStore(PutStoreRequest) returns (PutStoreResponse) {} rpc GetAllStores(GetAllStoresRequest) returns (GetAllStoresResponse) {} rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {} rpc GetRegion(GetRegionRequest) returns (GetRegionResponse) {} rpc GetPrevRegion(GetRegionRequest) returns (GetRegionResponse) {} rpc GetRegionByID(GetRegionByIDRequest) returns (GetRegionResponse) {} rpc ScanRegions(ScanRegionsRequest) returns (ScanRegionsResponse) {} rpc AskSplit(AskSplitRequest) returns (AskSplitResponse) { // Use AskBatchSplit instead. option deprecated = true; } rpc ReportSplit(ReportSplitRequest) returns (ReportSplitResponse) { // Use ResportBatchSplit instead. option deprecated = true; } rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {} rpc ReportBatchSplit(ReportBatchSplitRequest) returns (ReportBatchSplitResponse) {} rpc GetClusterConfig(GetClusterConfigRequest) returns (GetClusterConfigResponse) {} rpc PutClusterConfig(PutClusterConfigRequest) returns (PutClusterConfigResponse) {} rpc ScatterRegion(ScatterRegionRequest) returns (ScatterRegionResponse) {} rpc GetGCSafePoint(GetGCSafePointRequest) returns (GetGCSafePointResponse) {} rpc UpdateGCSafePoint(UpdateGCSafePointRequest) returns (UpdateGCSafePointResponse) {} rpc UpdateServiceGCSafePoint(UpdateServiceGCSafePointRequest) returns (UpdateServiceGCSafePointResponse) {} rpc SyncRegions(stream SyncRegionRequest) returns (stream SyncRegionResponse) {} rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {} rpc SyncMaxTS(SyncMaxTSRequest) returns (SyncMaxTSResponse) {} rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {} rpc SplitAndScatterRegions(SplitAndScatterRegionsRequest) returns (SplitAndScatterRegionsResponse) {} rpc GetDCLocationInfo(GetDCLocationInfoRequest) returns (GetDCLocationInfoResponse) {} } message RequestHeader { // cluster_id is the ID of the cluster which be sent to. uint64 cluster_id = 1; // sender_id is the ID of the sender server, also member ID or etcd ID. uint64 sender_id = 2; } message ResponseHeader { // cluster_id is the ID of the cluster which sent the response. uint64 cluster_id = 1; Error error = 2; } enum ErrorType { OK = 0; UNKNOWN = 1; NOT_BOOTSTRAPPED = 2; STORE_TOMBSTONE = 3; ALREADY_BOOTSTRAPPED = 4; INCOMPATIBLE_VERSION = 5; REGION_NOT_FOUND = 6; } message Error { ErrorType type = 1; string message = 2; } message TsoRequest { RequestHeader header = 1; uint32 count = 2; string dc_location = 3; } message Timestamp { int64 physical = 1; int64 logical = 2; // Number of suffix bits used for global distinction, // PD client will use this to compute a TSO's logical part. uint32 suffix_bits = 3;} message TsoResponse { ResponseHeader header = 1; uint32 count = 2; Timestamp timestamp = 3; } message BootstrapRequest { RequestHeader header = 1; metapb.Store store = 2; metapb.Region region = 3; } message BootstrapResponse { ResponseHeader header = 1; replication_modepb.ReplicationStatus replication_status = 2; } message IsBootstrappedRequest { RequestHeader header = 1; } message IsBootstrappedResponse { ResponseHeader header = 1; bool bootstrapped = 2; } message AllocIDRequest { RequestHeader header = 1; } message AllocIDResponse { ResponseHeader header = 1; uint64 id = 2; } message GetStoreRequest { RequestHeader header = 1; uint64 store_id = 2; } message GetStoreResponse { ResponseHeader header = 1; metapb.Store store = 2; StoreStats stats = 3; } message PutStoreRequest { RequestHeader header = 1; metapb.Store store = 2; } message PutStoreResponse { ResponseHeader header = 1; replication_modepb.ReplicationStatus replication_status = 2; } message GetAllStoresRequest { RequestHeader header = 1; // Do NOT return tombstone stores if set to true. bool exclude_tombstone_stores = 2; } message GetAllStoresResponse { ResponseHeader header = 1; repeated metapb.Store stores = 2; } message GetRegionRequest { RequestHeader header = 1; bytes region_key = 2; } message GetRegionResponse { reserved 4; ResponseHeader header = 1; metapb.Region region = 2; metapb.Peer leader = 3; // Leader considers that these peers are down. repeated PeerStats down_peers = 5; // Pending peers are the peers that the leader can't consider as // working followers. repeated metapb.Peer pending_peers = 6; } message GetRegionByIDRequest { RequestHeader header = 1; uint64 region_id = 2; } // Use GetRegionResponse as the response of GetRegionByIDRequest. message ScanRegionsRequest { RequestHeader header = 1; bytes start_key = 2; int32 limit = 3; // no limit when limit <= 0. bytes end_key = 4; // end_key is +inf when it is empty. } message Region { metapb.Region region = 1; metapb.Peer leader = 2; // Leader considers that these peers are down. repeated PeerStats down_peers = 3; // Pending peers are the peers that the leader can't consider as // working followers. repeated metapb.Peer pending_peers = 4; } message ScanRegionsResponse { ResponseHeader header = 1; // Keep for backword compatibability. repeated metapb.Region region_metas = 2; repeated metapb.Peer leaders = 3; // Extended region info with down/pending peers. repeated Region regions = 4; } message GetClusterConfigRequest { RequestHeader header = 1; } message GetClusterConfigResponse { ResponseHeader header = 1; metapb.Cluster cluster = 2; } message PutClusterConfigRequest { RequestHeader header = 1; metapb.Cluster cluster = 2; } message PutClusterConfigResponse { ResponseHeader header = 1; } message Member { // name is the name of the PD member. string name = 1; // member_id is the unique id of the PD member. uint64 member_id = 2; repeated string peer_urls = 3; repeated string client_urls = 4; int32 leader_priority = 5; string deploy_path = 6; string binary_version = 7; string git_hash = 8; string dc_location = 9; } message GetMembersRequest { RequestHeader header = 1; } message GetMembersResponse { ResponseHeader header = 1; repeated Member members = 2; Member leader = 3; Member etcd_leader = 4; map tso_allocator_leaders = 5; } message PeerStats { metapb.Peer peer = 1; uint64 down_seconds = 2; } message RegionHeartbeatRequest { RequestHeader header = 1; metapb.Region region = 2; // Leader Peer sending the heartbeat. metapb.Peer leader = 3; // Leader considers that these peers are down. repeated PeerStats down_peers = 4; // Pending peers are the peers that the leader can't consider as // working followers. repeated metapb.Peer pending_peers = 5; // Bytes read/written during this period. uint64 bytes_written = 6; uint64 bytes_read = 7; // Keys read/written during this period. uint64 keys_written = 8; uint64 keys_read = 9; // Approximate region size. uint64 approximate_size = 10; reserved 11; // Actually reported time interval TimeInterval interval = 12; // Approximate number of keys. uint64 approximate_keys = 13; // Term is the term of raft group. uint64 term = 14; replication_modepb.RegionReplicationStatus replication_status = 15; // QueryStats reported write query stats, and there are read query stats in store heartbeat QueryStats query_stats = 16; // cpu_usage is the CPU time usage of the leader region since the last heartbeat, // which is calculated by cpu_time_delta/heartbeat_reported_interval. uint64 cpu_usage = 17; } message ChangePeer { metapb.Peer peer = 1; eraftpb.ConfChangeType change_type = 2; } message ChangePeerV2 { // If changes is empty, it means that to exit joint state. repeated ChangePeer changes = 1; } message TransferLeader { metapb.Peer peer = 1; repeated metapb.Peer peers = 2; } message Merge { metapb.Region target = 1; } message SplitRegion { CheckPolicy policy = 1; repeated bytes keys = 2; } enum CheckPolicy { SCAN = 0; APPROXIMATE = 1; USEKEY = 2; } message RegionHeartbeatResponse { ResponseHeader header = 1; // Notice, Pd only allows handling reported epoch >= current pd's. // Leader peer reports region status with RegionHeartbeatRequest // to pd regularly, pd will determine whether this region // should do ChangePeer or not. // E,g, max peer number is 3, region A, first only peer 1 in A. // 1. Pd region state -> Peers (1), ConfVer (1). // 2. Leader peer 1 reports region state to pd, pd finds the // peer number is < 3, so first changes its current region // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2. // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2), // pd updates its state -> Peers (1, 2), ConfVer (2). // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange // finished, pd stills responses ChangePeer Adding 2, of course, we must // guarantee the second ChangePeer can't be applied in TiKV. ChangePeer change_peer = 2; // Pd can return transfer_leader to let TiKV does leader transfer itself. TransferLeader transfer_leader = 3; // ID of the region uint64 region_id = 4; metapb.RegionEpoch region_epoch = 5; // Leader of the region at the moment of the corresponding request was made. metapb.Peer target_peer = 6; Merge merge = 7; // PD sends split_region to let TiKV split a region into two regions. SplitRegion split_region = 8; // Multiple change peer operations atomically. // Note: PD can use both ChangePeer and ChangePeerV2 at the same time // (not in the same RegionHeartbeatResponse). // Now, PD use ChangePeerV2 only for replacing peers. ChangePeerV2 change_peer_v2 = 9; } message AskSplitRequest { RequestHeader header = 1; metapb.Region region = 2; } message AskSplitResponse { ResponseHeader header = 1; // We split the region into two, first uses the origin // parent region id, and the second uses the new_region_id. // We must guarantee that the new_region_id is global unique. uint64 new_region_id = 2; // The peer ids for the new split region. repeated uint64 new_peer_ids = 3; } message ReportSplitRequest { RequestHeader header = 1; metapb.Region left = 2; metapb.Region right = 3; } message ReportSplitResponse { ResponseHeader header = 1; } message AskBatchSplitRequest { RequestHeader header = 1; metapb.Region region = 2; uint32 split_count = 3; } message SplitID { uint64 new_region_id = 1; repeated uint64 new_peer_ids = 2; } message AskBatchSplitResponse { ResponseHeader header = 1; repeated SplitID ids = 2; } message ReportBatchSplitRequest { RequestHeader header = 1; repeated metapb.Region regions = 2; } message ReportBatchSplitResponse { ResponseHeader header = 1; } message TimeInterval { // The unix timestamp in seconds of the start of this period. uint64 start_timestamp = 1; // The unix timestamp in seconds of the end of this period. uint64 end_timestamp = 2; } message RecordPair { string key = 1; uint64 value = 2; } message PeerStat { uint64 region_id = 1; uint64 read_keys = 2; uint64 read_bytes = 3; QueryStats query_stats = 4; } message StoreStats { uint64 store_id = 1; // Capacity for the store. uint64 capacity = 2; // Available size for the store. uint64 available = 3; // Total region count in this store. uint32 region_count = 4; // Current sending snapshot count. uint32 sending_snap_count = 5; // Current receiving snapshot count. uint32 receiving_snap_count = 6; // When the store is started (unix timestamp in seconds). uint32 start_time = 7; // How many region is applying snapshot. uint32 applying_snap_count = 8; // If the store is busy bool is_busy = 9; // Actually used space by db uint64 used_size = 10; // Bytes written for the store during this period. uint64 bytes_written = 11; // Keys written for the store during this period. uint64 keys_written = 12; // Bytes read for the store during this period. uint64 bytes_read = 13; // Keys read for the store during this period. uint64 keys_read = 14; // Actually reported time interval TimeInterval interval = 15; // Threads' CPU usages in the store repeated RecordPair cpu_usages = 16; // Threads' read disk I/O rates in the store repeated RecordPair read_io_rates = 17; // Threads' write disk I/O rates in the store repeated RecordPair write_io_rates = 18; // Operations' latencies in the store repeated RecordPair op_latencies = 19; // Hot peer stat in the store repeated PeerStat peer_stats = 20; // Store query stats QueryStats query_stats = 21; // Score that represents the speed of the store, ranges in [1, 100], lower is better. uint64 slow_score = 22; // Damaged regions on the store that need to be removed by PD. repeated uint64 damaged_regions_id = 23; } message PeerReport { raft_serverpb.RaftLocalState raft_state = 1; raft_serverpb.RegionLocalState region_state = 2; } message StoreReport { repeated PeerReport peer_reports = 1; } message StoreHeartbeatRequest { RequestHeader header = 1; StoreStats stats = 2; // Detailed store report that is only filled up on PD's demand for online unsafe recover. StoreReport store_report = 3; } message RecoveryPlan { repeated metapb.Region creates = 1; repeated metapb.Region updates = 2; repeated uint64 deletes = 3; } message StoreHeartbeatResponse { ResponseHeader header = 1; replication_modepb.ReplicationStatus replication_status = 2; string cluster_version = 3; bool require_detailed_report = 4; RecoveryPlan plan = 5; } message ScatterRegionRequest { RequestHeader header = 1; uint64 region_id = 2 [deprecated=true]; // PD will use these region information if it can't find the region. // For example, the region is just split and hasn't report to PD yet. metapb.Region region = 3; metapb.Peer leader = 4; // If group is defined, the regions with the same group would be scattered as a whole group. // If not defined, the regions would be scattered in a cluster level. string group = 5; // If regions_id is defined, the region_id would be ignored. repeated uint64 regions_id = 6; uint64 retry_limit = 7; } message ScatterRegionResponse { ResponseHeader header = 1; uint64 finished_percentage = 2; } message GetGCSafePointRequest { RequestHeader header = 1; } message GetGCSafePointResponse { ResponseHeader header = 1; uint64 safe_point = 2; } message UpdateGCSafePointRequest { RequestHeader header = 1; uint64 safe_point = 2; } message UpdateGCSafePointResponse { ResponseHeader header = 1; uint64 new_safe_point = 2; } message UpdateServiceGCSafePointRequest { RequestHeader header = 1; bytes service_id = 2; int64 TTL = 3; uint64 safe_point = 4; } message UpdateServiceGCSafePointResponse { ResponseHeader header = 1; bytes service_id = 2; int64 TTL = 3; uint64 min_safe_point = 4; } message RegionStat { // Bytes read/written during this period. uint64 bytes_written = 1; uint64 bytes_read = 2; // Keys read/written during this period. uint64 keys_written = 3; uint64 keys_read = 4; } message SyncRegionRequest{ RequestHeader header = 1; Member member = 2; // the follower PD will use the start index to locate historical changes // that require synchronization. uint64 start_index = 3; } message SyncRegionResponse{ ResponseHeader header = 1; // the leader PD will send the repsonds include // changed regions records and the index of the first record. repeated metapb.Region regions = 2; uint64 start_index = 3; repeated RegionStat region_stats = 4; repeated metapb.Peer region_leaders = 5; } message GetOperatorRequest { RequestHeader header = 1; uint64 region_id = 2; } enum OperatorStatus { SUCCESS = 0; TIMEOUT = 1; CANCEL = 2; REPLACE = 3; RUNNING = 4; } message GetOperatorResponse { ResponseHeader header = 1; uint64 region_id = 2; bytes desc = 3; OperatorStatus status = 4; bytes kind = 5; } message SyncMaxTSRequest { RequestHeader header = 1; Timestamp max_ts = 2; // If skip_check is true, the sync will try to write the max_ts without checking whether it's bigger. bool skip_check = 3; } message SyncMaxTSResponse { ResponseHeader header = 1; Timestamp max_local_ts = 2; repeated string synced_dcs = 3; } message SplitRegionsRequest { RequestHeader header = 1; repeated bytes split_keys = 2; uint64 retry_limit = 3; } message SplitRegionsResponse { ResponseHeader header = 1; uint64 finished_percentage = 2; repeated uint64 regions_id = 3; } message SplitAndScatterRegionsRequest { RequestHeader header = 1; repeated bytes split_keys = 2; string group = 3; uint64 retry_limit = 4; } message SplitAndScatterRegionsResponse { ResponseHeader header = 1; uint64 split_finished_percentage = 2; uint64 scatter_finished_percentage = 3; repeated uint64 regions_id = 4; } message GetDCLocationInfoRequest { RequestHeader header = 1; string dc_location = 2; } message GetDCLocationInfoResponse { ResponseHeader header = 1; // suffix sign int32 suffix = 2; // max_ts will be included into this response if PD leader think the receiver needs, // which it's set when the number of the max suffix bits changes. Timestamp max_ts = 3; } message QueryStats { uint64 GC = 1; uint64 Get = 2; uint64 Scan = 3; uint64 Coprocessor = 4; uint64 Delete = 5; uint64 DeleteRange = 6; uint64 Put = 7; uint64 Prewrite = 8; uint64 AcquirePessimisticLock = 9; uint64 Commit = 10; uint64 Rollback = 11; } enum QueryKind { Others = 0; GC = 1; Get = 2; Scan = 3; Coprocessor = 4; Delete = 5; DeleteRange = 6; Put = 7; Prewrite = 8; AcquirePessimisticLock = 9; Commit = 10; Rollback = 11; }