Search in sources :

Example 1 with FollowerState

use of com.hazelcast.cp.internal.raft.impl.state.FollowerState in project hazelcast by hazelcast.

the class AppendFailureResponseHandlerTask method updateNextIndex.

private boolean updateNextIndex(RaftState state) {
    LeaderState leaderState = state.leaderState();
    FollowerState followerState = leaderState.getFollowerState(resp.follower());
    long nextIndex = followerState.nextIndex();
    long matchIndex = followerState.matchIndex();
    if (resp.expectedNextIndex() == nextIndex) {
        // Received a response for the last append request. Resetting the flag...
        followerState.appendRequestAckReceived();
        // this is the response of the request I have sent for this nextIndex
        nextIndex--;
        if (nextIndex <= matchIndex) {
            logger.severe("Cannot decrement next index: " + nextIndex + " below match index: " + matchIndex + " for follower: " + resp.follower());
            return false;
        }
        if (logger.isFineEnabled()) {
            logger.fine("Updating next index: " + nextIndex + " for follower: " + resp.follower());
        }
        followerState.nextIndex(nextIndex);
        return true;
    }
    return false;
}
Also used : FollowerState(com.hazelcast.cp.internal.raft.impl.state.FollowerState) LeaderState(com.hazelcast.cp.internal.raft.impl.state.LeaderState)

Example 2 with FollowerState

use of com.hazelcast.cp.internal.raft.impl.state.FollowerState in project hazelcast by hazelcast.

the class AppendSuccessResponseHandlerTask method updateFollowerIndices.

private boolean updateFollowerIndices(RaftState state) {
    // If successful: update nextIndex and matchIndex for follower (ยง5.3)
    RaftEndpoint follower = resp.follower();
    LeaderState leaderState = state.leaderState();
    FollowerState followerState = leaderState.getFollowerState(follower);
    QueryState queryState = leaderState.queryState();
    if (queryState.tryAck(resp.queryRound(), follower)) {
        if (logger.isFineEnabled()) {
            logger.fine("Ack from " + follower + " for query round: " + resp.queryRound());
        }
    }
    long matchIndex = followerState.matchIndex();
    long followerLastLogIndex = resp.lastLogIndex();
    if (followerLastLogIndex > matchIndex) {
        // Received a response for the last append request. Resetting the flag...
        followerState.appendRequestAckReceived();
        long newNextIndex = followerLastLogIndex + 1;
        followerState.matchIndex(followerLastLogIndex);
        followerState.nextIndex(newNextIndex);
        if (logger.isFineEnabled()) {
            logger.fine("Updated match index: " + followerLastLogIndex + " and next index: " + newNextIndex + " for follower: " + follower);
        }
        return true;
    } else if (followerLastLogIndex == matchIndex) {
        // Received a response for the last append request. Resetting the flag...
        followerState.appendRequestAckReceived();
    } else if (logger.isFineEnabled()) {
        logger.fine("Will not update match index for follower: " + follower + ". follower last log index: " + followerLastLogIndex + ", match index: " + matchIndex);
    }
    return false;
}
Also used : RaftEndpoint(com.hazelcast.cp.internal.raft.impl.RaftEndpoint) QueryState(com.hazelcast.cp.internal.raft.impl.state.QueryState) FollowerState(com.hazelcast.cp.internal.raft.impl.state.FollowerState) LeaderState(com.hazelcast.cp.internal.raft.impl.state.LeaderState)

Example 3 with FollowerState

use of com.hazelcast.cp.internal.raft.impl.state.FollowerState in project hazelcast by hazelcast.

the class RaftNodeImpl method sendAppendRequest.

/**
 * Sends an append-entries request to the follower member.
 * <p>
 * Log entries between follower's known nextIndex and latest appended entry index are sent in a batch.
 * Batch size can be {@link RaftAlgorithmConfig#getAppendRequestMaxEntryCount()} at most.
 * <p>
 * If follower's nextIndex is behind the latest snapshot index, then {@link InstallSnapshot} request is sent.
 * <p>
 * If leader doesn't know follower's matchIndex (if {@code matchIndex == 0}), then an empty append-entries is sent
 * to save bandwidth until leader learns the matchIndex of the follower.
 */
@SuppressWarnings({ "checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity", "checkstyle:methodlength" })
public void sendAppendRequest(RaftEndpoint follower) {
    if (!raftIntegration.isReachable(follower)) {
        return;
    }
    RaftLog raftLog = state.log();
    LeaderState leaderState = state.leaderState();
    FollowerState followerState = leaderState.getFollowerState(follower);
    if (followerState.isAppendRequestBackoffSet()) {
        // or a back-off timeout occurs.
        return;
    }
    long nextIndex = followerState.nextIndex();
    if (nextIndex <= raftLog.snapshotIndex() && (!raftLog.containsLogEntry(nextIndex) || (nextIndex > 1 && !raftLog.containsLogEntry(nextIndex - 1)))) {
        InstallSnapshot installSnapshot = new InstallSnapshot(state.localEndpoint(), state.term(), raftLog.snapshot(), leaderState.queryRound());
        if (logger.isFineEnabled()) {
            logger.fine("Sending " + installSnapshot + " to " + follower + " since next index: " + nextIndex + " <= snapshot index: " + raftLog.snapshotIndex());
        }
        // no need to submit the flush task here because we send committed state...
        raftIntegration.send(installSnapshot, follower);
        followerState.setMaxAppendRequestBackoff();
        scheduleAppendAckResetTask();
        return;
    }
    int prevEntryTerm = 0;
    long prevEntryIndex = 0;
    LogEntry[] entries;
    boolean shouldBackoff = true;
    if (nextIndex > 1) {
        prevEntryIndex = nextIndex - 1;
        LogEntry prevEntry = (raftLog.snapshotIndex() == prevEntryIndex) ? raftLog.snapshot() : raftLog.getLogEntry(prevEntryIndex);
        assert prevEntry != null : "Prev entry index: " + prevEntryIndex + ", snapshot: " + raftLog.snapshotIndex();
        prevEntryTerm = prevEntry.term();
        long matchIndex = followerState.matchIndex();
        if (matchIndex == 0) {
            // Until the leader has discovered where it and the follower's logs match,
            // the leader can send AppendEntries with no entries (like heartbeats) to save bandwidth.
            // We still need to enable append request backoff here because we do not want to bombard
            // the follower before we learn its match index
            entries = new LogEntry[0];
        } else if (nextIndex <= raftLog.lastLogOrSnapshotIndex()) {
            // Then, once the matchIndex immediately precedes the nextIndex,
            // the leader should begin to send the actual entries
            long end = min(nextIndex + appendRequestMaxEntryCount, raftLog.lastLogOrSnapshotIndex());
            entries = raftLog.getEntriesBetween(nextIndex, end);
        } else {
            // The follower has caught up with the leader. Sending an empty append request as a heartbeat...
            entries = new LogEntry[0];
            shouldBackoff = false;
        }
    } else if (nextIndex == 1 && raftLog.lastLogOrSnapshotIndex() > 0) {
        // Entries will be sent to the follower for the first time...
        long end = min(nextIndex + appendRequestMaxEntryCount, raftLog.lastLogOrSnapshotIndex());
        entries = raftLog.getEntriesBetween(nextIndex, end);
    } else {
        // There is no entry in the Raft log. Sending an empty append request as a heartbeat...
        entries = new LogEntry[0];
        shouldBackoff = false;
    }
    AppendRequest request = new AppendRequest(getLocalMember(), state.term(), prevEntryTerm, prevEntryIndex, state.commitIndex(), entries, leaderState.queryRound());
    if (logger.isFineEnabled()) {
        logger.fine("Sending " + request + " to " + follower + " with next index: " + nextIndex);
    }
    raftIntegration.send(request, follower);
    if (entries.length > 0 && entries[entries.length - 1].index() > leaderState.flushedLogIndex()) {
        // if I am sending any non-flushed entry to the follower, I should trigger the flush task.
        // I hope that I will flush before receiving append responses from half of the followers...
        // This is a very critical optimization because
        // it makes the leader and followers flush in parallel...
        submitFlushTask();
    }
    if (shouldBackoff) {
        followerState.setAppendRequestBackoff();
        scheduleAppendAckResetTask();
    }
}
Also used : FollowerState(com.hazelcast.cp.internal.raft.impl.state.FollowerState) AppendRequest(com.hazelcast.cp.internal.raft.impl.dto.AppendRequest) LogEntry(com.hazelcast.cp.internal.raft.impl.log.LogEntry) RaftLog(com.hazelcast.cp.internal.raft.impl.log.RaftLog) LeaderState(com.hazelcast.cp.internal.raft.impl.state.LeaderState) InstallSnapshot(com.hazelcast.cp.internal.raft.impl.dto.InstallSnapshot)

Aggregations

FollowerState (com.hazelcast.cp.internal.raft.impl.state.FollowerState)3 LeaderState (com.hazelcast.cp.internal.raft.impl.state.LeaderState)3 RaftEndpoint (com.hazelcast.cp.internal.raft.impl.RaftEndpoint)1 AppendRequest (com.hazelcast.cp.internal.raft.impl.dto.AppendRequest)1 InstallSnapshot (com.hazelcast.cp.internal.raft.impl.dto.InstallSnapshot)1 LogEntry (com.hazelcast.cp.internal.raft.impl.log.LogEntry)1 RaftLog (com.hazelcast.cp.internal.raft.impl.log.RaftLog)1 QueryState (com.hazelcast.cp.internal.raft.impl.state.QueryState)1