use of com.hazelcast.cp.internal.raft.impl.dto.InstallSnapshot in project hazelcast by hazelcast.
the class RaftNodeImpl method sendAppendRequest.
/**
* Sends an append-entries request to the follower member.
* <p>
* Log entries between follower's known nextIndex and latest appended entry index are sent in a batch.
* Batch size can be {@link RaftAlgorithmConfig#getAppendRequestMaxEntryCount()} at most.
* <p>
* If follower's nextIndex is behind the latest snapshot index, then {@link InstallSnapshot} request is sent.
* <p>
* If leader doesn't know follower's matchIndex (if {@code matchIndex == 0}), then an empty append-entries is sent
* to save bandwidth until leader learns the matchIndex of the follower.
*/
@SuppressWarnings({ "checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity", "checkstyle:methodlength" })
public void sendAppendRequest(RaftEndpoint follower) {
if (!raftIntegration.isReachable(follower)) {
return;
}
RaftLog raftLog = state.log();
LeaderState leaderState = state.leaderState();
FollowerState followerState = leaderState.getFollowerState(follower);
if (followerState.isAppendRequestBackoffSet()) {
// or a back-off timeout occurs.
return;
}
long nextIndex = followerState.nextIndex();
if (nextIndex <= raftLog.snapshotIndex() && (!raftLog.containsLogEntry(nextIndex) || (nextIndex > 1 && !raftLog.containsLogEntry(nextIndex - 1)))) {
InstallSnapshot installSnapshot = new InstallSnapshot(state.localEndpoint(), state.term(), raftLog.snapshot(), leaderState.queryRound());
if (logger.isFineEnabled()) {
logger.fine("Sending " + installSnapshot + " to " + follower + " since next index: " + nextIndex + " <= snapshot index: " + raftLog.snapshotIndex());
}
// no need to submit the flush task here because we send committed state...
raftIntegration.send(installSnapshot, follower);
followerState.setMaxAppendRequestBackoff();
scheduleAppendAckResetTask();
return;
}
int prevEntryTerm = 0;
long prevEntryIndex = 0;
LogEntry[] entries;
boolean shouldBackoff = true;
if (nextIndex > 1) {
prevEntryIndex = nextIndex - 1;
LogEntry prevEntry = (raftLog.snapshotIndex() == prevEntryIndex) ? raftLog.snapshot() : raftLog.getLogEntry(prevEntryIndex);
assert prevEntry != null : "Prev entry index: " + prevEntryIndex + ", snapshot: " + raftLog.snapshotIndex();
prevEntryTerm = prevEntry.term();
long matchIndex = followerState.matchIndex();
if (matchIndex == 0) {
// Until the leader has discovered where it and the follower's logs match,
// the leader can send AppendEntries with no entries (like heartbeats) to save bandwidth.
// We still need to enable append request backoff here because we do not want to bombard
// the follower before we learn its match index
entries = new LogEntry[0];
} else if (nextIndex <= raftLog.lastLogOrSnapshotIndex()) {
// Then, once the matchIndex immediately precedes the nextIndex,
// the leader should begin to send the actual entries
long end = min(nextIndex + appendRequestMaxEntryCount, raftLog.lastLogOrSnapshotIndex());
entries = raftLog.getEntriesBetween(nextIndex, end);
} else {
// The follower has caught up with the leader. Sending an empty append request as a heartbeat...
entries = new LogEntry[0];
shouldBackoff = false;
}
} else if (nextIndex == 1 && raftLog.lastLogOrSnapshotIndex() > 0) {
// Entries will be sent to the follower for the first time...
long end = min(nextIndex + appendRequestMaxEntryCount, raftLog.lastLogOrSnapshotIndex());
entries = raftLog.getEntriesBetween(nextIndex, end);
} else {
// There is no entry in the Raft log. Sending an empty append request as a heartbeat...
entries = new LogEntry[0];
shouldBackoff = false;
}
AppendRequest request = new AppendRequest(getLocalMember(), state.term(), prevEntryTerm, prevEntryIndex, state.commitIndex(), entries, leaderState.queryRound());
if (logger.isFineEnabled()) {
logger.fine("Sending " + request + " to " + follower + " with next index: " + nextIndex);
}
raftIntegration.send(request, follower);
if (entries.length > 0 && entries[entries.length - 1].index() > leaderState.flushedLogIndex()) {
// if I am sending any non-flushed entry to the follower, I should trigger the flush task.
// I hope that I will flush before receiving append responses from half of the followers...
// This is a very critical optimization because
// it makes the leader and followers flush in parallel...
submitFlushTask();
}
if (shouldBackoff) {
followerState.setAppendRequestBackoff();
scheduleAppendAckResetTask();
}
}
Aggregations