syntax = "proto3"; package eraftpb; import "rustproto.proto"; option (rustproto.carllerche_bytes_for_bytes_all) = true; enum EntryType { EntryNormal = 0; EntryConfChange = 1; EntryConfChangeV2 = 2; } // The entry is a type of change that needs to be applied. It contains two data fields. // While the fields are built into the model; their usage is determined by the entry_type. // // For normal entries, the data field should contain the data change that should be applied. // The context field can be used for any contextual data that might be relevant to the // application of the data. // // For configuration changes, the data will contain the ConfChange message and the // context will provide anything needed to assist the configuration change. The context // if for the user to set and use in this case. message Entry { EntryType entry_type = 1; uint64 term = 2; uint64 index = 3; bytes data = 4; bytes context = 6; // Deprecated! It is kept for backward compatibility. // TODO: remove it in the next major release. bool sync_log = 5; } message SnapshotMetadata { // The current `ConfState`. ConfState conf_state = 1; // The applied index. uint64 index = 2; // The term of the applied index. uint64 term = 3; } message Snapshot { bytes data = 1; SnapshotMetadata metadata = 2; } enum MessageType { MsgHup = 0; MsgBeat = 1; MsgPropose = 2; MsgAppend = 3; MsgAppendResponse = 4; MsgRequestVote = 5; MsgRequestVoteResponse = 6; MsgSnapshot = 7; MsgHeartbeat = 8; MsgHeartbeatResponse = 9; MsgUnreachable = 10; MsgSnapStatus = 11; MsgCheckQuorum = 12; MsgTransferLeader = 13; MsgTimeoutNow = 14; MsgReadIndex = 15; MsgReadIndexResp = 16; MsgRequestPreVote = 17; MsgRequestPreVoteResponse = 18; } message Message { MessageType msg_type = 1; uint64 to = 2; uint64 from = 3; uint64 term = 4; // logTerm is generally used for appending Raft logs to followers. For example, // (type=MsgAppend,index=100,log_term=5) means leader appends entries starting at // index=101, and the term of entry at index 100 is 5. // (type=MsgAppendResponse,reject=true,index=100,log_term=5) means follower rejects some // entries from its leader as it already has an entry with term 5 at index 100. uint64 log_term = 5; uint64 index = 6; repeated Entry entries = 7; uint64 commit = 8; uint64 commit_term = 15; Snapshot snapshot = 9; uint64 request_snapshot = 13; bool reject = 10; uint64 reject_hint = 11; bytes context = 12; uint64 deprecated_priority = 14; // If this new field is not set, then use the above old field; otherwise // use the new field. When broadcasting request vote, both fields are // set if the priority is larger than 0. This change is not a fully // compatible change, but it makes minimal impact that only new priority // is not recognized by the old nodes during rolling update. int64 priority = 16; } message HardState { uint64 term = 1; uint64 vote = 2; uint64 commit = 3; } enum ConfChangeTransition { // Automatically use the simple protocol if possible, otherwise fall back // to ConfChangeType::Implicit. Most applications will want to use this. Auto = 0; // Use joint consensus unconditionally, and transition out of them // automatically (by proposing a zero configuration change). // // This option is suitable for applications that want to minimize the time // spent in the joint configuration and do not store the joint configuration // in the state machine (outside of InitialState). Implicit = 1; // Use joint consensus and remain in the joint configuration until the // application proposes a no-op configuration change. This is suitable for // applications that want to explicitly control the transitions, for example // to use a custom payload (via the Context field). Explicit = 2; } message ConfState { repeated uint64 voters = 1; repeated uint64 learners = 2; // The voters in the outgoing config. If not empty the node is in joint consensus. repeated uint64 voters_outgoing = 3; // The nodes that will become learners when the outgoing config is removed. // These nodes are necessarily currently in nodes_joint (or they would have // been added to the incoming config right away). repeated uint64 learners_next = 4; // If set, the config is joint and Raft will automatically transition into // the final config (i.e. remove the outgoing config) when this is safe. bool auto_leave = 5; } enum ConfChangeType { AddNode = 0; RemoveNode = 1; AddLearnerNode = 2; } message ConfChange { ConfChangeType change_type = 2; uint64 node_id = 3; bytes context = 4; uint64 id = 1; } // ConfChangeSingle is an individual configuration change operation. Multiple // such operations can be carried out atomically via a ConfChangeV2. message ConfChangeSingle { ConfChangeType change_type = 1; uint64 node_id = 2; } // ConfChangeV2 messages initiate configuration changes. They support both the // simple "one at a time" membership change protocol and full Joint Consensus // allowing for arbitrary changes in membership. // // The supplied context is treated as an opaque payload and can be used to // attach an action on the state machine to the application of the config change // proposal. Note that contrary to Joint Consensus as outlined in the Raft // paper[1], configuration changes become active when they are *applied* to the // state machine (not when they are appended to the log). // // The simple protocol can be used whenever only a single change is made. // // Non-simple changes require the use of Joint Consensus, for which two // configuration changes are run. The first configuration change specifies the // desired changes and transitions the Raft group into the joint configuration, // in which quorum requires a majority of both the pre-changes and post-changes // configuration. Joint Consensus avoids entering fragile intermediate // configurations that could compromise survivability. For example, without the // use of Joint Consensus and running across three availability zones with a // replication factor of three, it is not possible to replace a voter without // entering an intermediate configuration that does not survive the outage of // one availability zone. // // The provided ConfChangeTransition specifies how (and whether) Joint Consensus // is used, and assigns the task of leaving the joint configuration either to // Raft or the application. Leaving the joint configuration is accomplished by // proposing a ConfChangeV2 with only and optionally the Context field // populated. // // For details on Raft membership changes, see: // // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf message ConfChangeV2 { ConfChangeTransition transition = 1; repeated ConfChangeSingle changes = 2; bytes context = 3; }