use of com.hazelcast.cp.internal.raft.impl.state.RaftState in project hazelcast by hazelcast.
the class QueryTask method handleLeaderLocalRead.
private void handleLeaderLocalRead() {
RaftState state = raftNode.state();
if (state.role() != LEADER) {
resultFuture.completeExceptionally(new NotLeaderException(raftNode.getGroupId(), raftNode.getLocalMember(), state.leader()));
return;
}
// TODO: We can reject the query, if leader is not able to reach majority of the followers
handleAnyLocalRead();
}
use of com.hazelcast.cp.internal.raft.impl.state.RaftState in project hazelcast by hazelcast.
the class QueryTask method handleAnyLocalRead.
private void handleAnyLocalRead() {
RaftState state = raftNode.state();
if (logger.isFineEnabled()) {
logger.fine("Querying: " + operation + " with policy: " + queryPolicy + " in term: " + state.term());
}
// TODO: We can reject the query, if follower have not received any heartbeat recently
raftNode.runQuery(operation, resultFuture);
}
use of com.hazelcast.cp.internal.raft.impl.state.RaftState in project hazelcast by hazelcast.
the class AppendSuccessResponseHandlerTask method handleResponse.
@Override
protected void handleResponse() {
RaftState state = raftNode.state();
if (state.role() != LEADER) {
logger.warning("Ignored " + resp + ". We are not LEADER anymore.");
return;
}
assert resp.term() <= state.term() : "Invalid " + resp + " for current term: " + state.term();
if (logger.isFineEnabled()) {
logger.fine("Received " + resp);
}
if (updateFollowerIndices(state)) {
if (!raftNode.tryAdvanceCommitIndex()) {
trySendAppendRequest(state);
}
} else {
raftNode.tryRunQueries();
}
checkIfQueryAckNeeded(state);
}
use of com.hazelcast.cp.internal.raft.impl.state.RaftState in project hazelcast by hazelcast.
the class VoteRequestHandlerTask method innerRun.
@Override
@SuppressWarnings({ "checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity" })
protected // Justification: It is easier to follow the RequestVoteRPC logic in a single method
void innerRun() {
RaftState state = raftNode.state();
RaftEndpoint localMember = localMember();
// Reply false if last AppendEntries call was received less than election timeout ago (leader stickiness)
// (Raft thesis - Section 4.2.3) This check conflicts with the leadership transfer mechanism,
// in which a server legitimately starts an election without waiting an election timeout.
// Those VoteRequest objects are marked with a special flag ("disruptive") to bypass leader stickiness.
// Also if request comes from the current leader, then stickiness check is skipped.
// Since current leader may have restarted by recovering its persistent state.
long leaderElectionTimeoutDeadline = Clock.currentTimeMillis() - raftNode.getLeaderElectionTimeoutInMillis();
if (!req.isDisruptive() && raftNode.lastAppendEntriesTimestamp() > leaderElectionTimeoutDeadline && !req.candidate().equals(state.leader())) {
logger.info("Rejecting " + req + " since received append entries recently.");
raftNode.send(new VoteResponse(localMember, state.term(), false), req.candidate());
return;
}
// Reply false if term < currentTerm (§5.1)
if (state.term() > req.term()) {
logger.info("Rejecting " + req + " since current term: " + state.term() + " is bigger");
raftNode.send(new VoteResponse(localMember, state.term(), false), req.candidate());
return;
}
if (state.term() < req.term()) {
// If RPC request or response contains term T > currentTerm: set currentTerm = T, convert to follower (§5.1)
if (state.role() != FOLLOWER) {
logger.info("Demoting to FOLLOWER after " + req + " since current term: " + state.term() + " is smaller");
} else {
logger.info("Moving to new term: " + req.term() + " from current term: " + state.term() + " after " + req);
}
raftNode.toFollower(req.term());
}
if (state.leader() != null && !req.candidate().equals(state.leader())) {
logger.warning("Rejecting " + req + " since we have a leader: " + state.leader());
raftNode.send(new VoteResponse(localMember, req.term(), false), req.candidate());
return;
}
if (state.votedFor() != null) {
boolean granted = (req.candidate().equals(state.votedFor()));
if (granted) {
logger.info("Vote granted for duplicate" + req);
} else {
logger.info("Duplicate " + req + ". currently voted-for: " + state.votedFor());
}
raftNode.send(new VoteResponse(localMember, req.term(), granted), req.candidate());
return;
}
RaftLog raftLog = state.log();
if (raftLog.lastLogOrSnapshotTerm() > req.lastLogTerm()) {
logger.info("Rejecting " + req + " since our last log term: " + raftLog.lastLogOrSnapshotTerm() + " is greater");
raftNode.send(new VoteResponse(localMember, req.term(), false), req.candidate());
return;
}
if (raftLog.lastLogOrSnapshotTerm() == req.lastLogTerm() && raftLog.lastLogOrSnapshotIndex() > req.lastLogIndex()) {
logger.info("Rejecting " + req + " since our last log index: " + raftLog.lastLogOrSnapshotIndex() + " is greater");
raftNode.send(new VoteResponse(localMember, req.term(), false), req.candidate());
return;
}
logger.info("Granted vote for " + req);
state.persistVote(req.term(), req.candidate());
raftNode.send(new VoteResponse(localMember, req.term(), true), req.candidate());
}
use of com.hazelcast.cp.internal.raft.impl.state.RaftState in project hazelcast by hazelcast.
the class AppendRequestHandlerTask method innerRun.
@Override
@SuppressWarnings({ "checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity", "checkstyle:methodlength", "checkstyle:nestedifdepth" })
protected // Justification: It is easier to follow the AppendEntriesRPC logic in a single method
void innerRun() {
if (logger.isFineEnabled()) {
logger.fine("Received " + req);
}
RaftState state = raftNode.state();
// Reply false if term < currentTerm (§5.1)
if (req.term() < state.term()) {
if (logger.isFineEnabled()) {
logger.warning("Stale " + req + " received in current term: " + state.term());
}
raftNode.send(createFailureResponse(state.term()), req.leader());
return;
}
// Transform into follower if a newer term is seen or another node wins the election of the current term
if (req.term() > state.term() || state.role() != FOLLOWER) {
// If RPC request or response contains term T > currentTerm: set currentTerm = T, convert to follower (§5.1)
logger.info("Demoting to FOLLOWER from current role: " + state.role() + ", term: " + state.term() + " to new term: " + req.term() + " and leader: " + req.leader());
raftNode.toFollower(req.term());
}
if (!req.leader().equals(state.leader())) {
logger.info("Setting leader: " + req.leader());
raftNode.leader(req.leader());
}
RaftLog raftLog = state.log();
// Verify the last log entry
if (req.prevLogIndex() > 0) {
long lastLogIndex = raftLog.lastLogOrSnapshotIndex();
int lastLogTerm = raftLog.lastLogOrSnapshotTerm();
int prevLogTerm;
if (req.prevLogIndex() == lastLogIndex) {
prevLogTerm = lastLogTerm;
} else {
// Reply false if log does not contain an entry at prevLogIndex whose term matches prevLogTerm (§5.3)
LogEntry prevLog = raftLog.getLogEntry(req.prevLogIndex());
if (prevLog == null) {
if (logger.isFineEnabled()) {
logger.warning("Failed to get previous log index for " + req + ", last log index: " + lastLogIndex);
}
raftNode.send(createFailureResponse(req.term()), req.leader());
return;
}
prevLogTerm = prevLog.term();
}
if (req.prevLogTerm() != prevLogTerm) {
if (logger.isFineEnabled()) {
logger.warning("Previous log term of " + req + " is different than ours: " + prevLogTerm);
}
raftNode.send(createFailureResponse(req.term()), req.leader());
return;
}
}
int truncatedAppendRequestEntryCount = 0;
LogEntry[] newEntries = null;
// Process any new entries
if (req.entryCount() > 0) {
// Delete any conflicting entries, skip any duplicates
long lastLogIndex = raftLog.lastLogOrSnapshotIndex();
for (int i = 0; i < req.entryCount(); i++) {
LogEntry reqEntry = req.entries()[i];
if (reqEntry.index() > lastLogIndex) {
newEntries = Arrays.copyOfRange(req.entries(), i, req.entryCount());
break;
}
LogEntry localEntry = raftLog.getLogEntry(reqEntry.index());
assert localEntry != null : "Entry not found on log index: " + reqEntry.index() + " for " + req;
// delete the existing entry and all that follow it (§5.3)
if (reqEntry.term() != localEntry.term()) {
List<LogEntry> truncatedEntries = raftLog.deleteEntriesFrom(reqEntry.index());
if (logger.isFineEnabled()) {
logger.warning("Truncated " + truncatedEntries.size() + " entries from entry index: " + reqEntry.index() + " => " + truncatedEntries);
} else {
logger.warning("Truncated " + truncatedEntries.size() + " entries from entry index: " + reqEntry.index());
}
raftNode.invalidateFuturesFrom(reqEntry.index());
revertPreAppliedRaftGroupCmd(truncatedEntries);
newEntries = Arrays.copyOfRange(req.entries(), i, req.entryCount());
raftLog.flush();
break;
}
}
if (newEntries != null && newEntries.length > 0) {
if (raftLog.availableCapacity() < newEntries.length) {
if (logger.isFineEnabled()) {
logger.warning("Truncating " + newEntries.length + " entries to " + raftLog.availableCapacity() + " to fit into the available capacity of the Raft log");
}
truncatedAppendRequestEntryCount = newEntries.length - raftLog.availableCapacity();
newEntries = Arrays.copyOf(newEntries, raftLog.availableCapacity());
}
// Append any new entries not already in the log
if (logger.isFineEnabled()) {
logger.fine("Appending " + newEntries.length + " entries: " + Arrays.toString(newEntries));
}
raftLog.appendEntries(newEntries);
raftLog.flush();
}
}
// I cannot use raftLog.lastLogOrSnapshotIndex() for lastLogIndex because my log may contain
// some uncommitted entries from the previous leader and those entries will be truncated soon
// I can only send a response based on how many entries I have appended from this append request
long lastLogIndex = req.prevLogIndex() + req.entryCount() - truncatedAppendRequestEntryCount;
long oldCommitIndex = state.commitIndex();
// Update the commit index
if (req.leaderCommitIndex() > oldCommitIndex) {
// If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, index of last new entry)
long newCommitIndex = min(req.leaderCommitIndex(), lastLogIndex);
if (logger.isFineEnabled()) {
logger.fine("Setting commit index: " + newCommitIndex);
}
state.commitIndex(newCommitIndex);
}
raftNode.updateLastAppendEntriesTimestamp();
try {
AppendSuccessResponse resp = new AppendSuccessResponse(localMember(), state.term(), lastLogIndex, req.queryRound());
raftNode.send(resp, req.leader());
} finally {
if (state.commitIndex() > oldCommitIndex) {
raftNode.applyLogEntries();
}
if (newEntries != null) {
preApplyRaftGroupCmd(newEntries, state.commitIndex());
}
}
}
Aggregations