use of com.hazelcast.cp.internal.raft.impl.command.UpdateRaftGroupMembersCmd in project hazelcast by hazelcast.
the class RaftNodeImpl method applyRestoredRaftGroupCommands.
private void applyRestoredRaftGroupCommands(SnapshotEntry snapshot) {
// If there is a single Raft group command after the last snapshot,
// here we cannot know if the that command is committed or not so we
// just "pre-apply" that command without committing it.
// If there are multiple Raft group commands, it is definitely known
// that all the command up to the last command are committed,
// but the last command may not be committed.
// This conclusion boils down to the fact that once you append a Raft
// group command, you cannot append a new one before committing it.
RaftLog log = state.log();
LogEntry committedEntry = null;
LogEntry lastAppliedEntry = null;
for (long i = snapshot != null ? snapshot.index() + 1 : 1; i <= log.lastLogOrSnapshotIndex(); i++) {
LogEntry entry = log.getLogEntry(i);
assert entry != null : "index: " + i;
if (entry.operation() instanceof RaftGroupCmd) {
committedEntry = lastAppliedEntry;
lastAppliedEntry = entry;
}
}
if (committedEntry != null) {
state.commitIndex(committedEntry.index());
applyLogEntries();
}
if (lastAppliedEntry != null) {
if (lastAppliedEntry.operation() instanceof UpdateRaftGroupMembersCmd) {
setStatus(UPDATING_GROUP_MEMBER_LIST);
Collection<RaftEndpoint> members = ((UpdateRaftGroupMembersCmd) lastAppliedEntry.operation()).getMembers();
updateGroupMembers(lastAppliedEntry.index(), members);
} else if (lastAppliedEntry.operation() instanceof DestroyRaftGroupCmd) {
setStatus(TERMINATING);
} else {
throw new IllegalStateException("Invalid group command for restore: " + lastAppliedEntry);
}
}
}
use of com.hazelcast.cp.internal.raft.impl.command.UpdateRaftGroupMembersCmd in project hazelcast by hazelcast.
the class RaftNodeImpl method canReplicateNewEntry.
/**
* Returns true if a new entry with the operation is currently allowed to
* be replicated. This method can be invoked only when the local Raft node
* is the leader.
* <p>
* Replication is not allowed, when;
* <ul>
* <li>Node is terminating, terminated or stepped down. See {@link RaftNodeStatus}.</li>
* <li>Raft log contains max allowed uncommitted entry count.
* See {@link RaftAlgorithmConfig#getUncommittedEntryCountToRejectNewAppends()}.</li>
* <li>The operation is a {@link RaftGroupCmd} and there's an ongoing membership change in group.</li>
* <li>The operation is a membership change operation and there's no committed entry in this term yet.
* See {@link RaftIntegration#getAppendedEntryOnLeaderElection()}.</li>
* <li>There is an ongoing leadership transfer.</li>
* </ul>
*/
public boolean canReplicateNewEntry(Object operation) {
if (isTerminatedOrSteppedDown()) {
return false;
}
RaftLog log = state.log();
long lastLogIndex = log.lastLogOrSnapshotIndex();
long commitIndex = state.commitIndex();
if (lastLogIndex - commitIndex >= maxUncommittedEntryCount) {
return false;
}
if (status == TERMINATING) {
return false;
} else if (status == UPDATING_GROUP_MEMBER_LIST) {
return state.lastGroupMembers().isKnownMember(getLocalMember()) && !(operation instanceof RaftGroupCmd);
}
if (operation instanceof UpdateRaftGroupMembersCmd) {
// the leader must have committed an entry in its term to make a membership change
// https://groups.google.com/forum/#!msg/raft-dev/t4xj6dJTP6E/d2D9LrWRza8J
// last committed entry is either in the last snapshot or still in the log
LogEntry lastCommittedEntry = commitIndex == log.snapshotIndex() ? log.snapshot() : log.getLogEntry(commitIndex);
assert lastCommittedEntry != null;
return lastCommittedEntry.term() == state.term();
}
return state.leadershipTransferState() == null;
}
use of com.hazelcast.cp.internal.raft.impl.command.UpdateRaftGroupMembersCmd in project hazelcast by hazelcast.
the class SnapshotTest method testMembershipChangeBlocksSnapshotBug.
@Test
public void testMembershipChangeBlocksSnapshotBug() throws ExecutionException, InterruptedException {
// The comments below show how the code behaves before the mentioned bug is fixed.
int commitIndexAdvanceCount = 50;
int uncommittedEntryCount = 10;
RaftAlgorithmConfig config = new RaftAlgorithmConfig().setCommitIndexAdvanceCountToSnapshot(commitIndexAdvanceCount).setUncommittedEntryCountToRejectNewAppends(uncommittedEntryCount);
group = newGroup(3, config);
group.start();
RaftNodeImpl leader = group.waitUntilLeaderElected();
RaftNodeImpl[] followers = group.getNodesExcept(leader.getLocalMember());
group.dropMessagesToMember(leader.getLocalMember(), followers[0].getLocalMember(), AppendRequest.class);
while (getSnapshotEntry(leader).index() == 0) {
leader.replicate(new ApplyRaftRunnable("into_snapshot")).get();
}
// now, the leader has taken a snapshot.
// It also keeps some already committed entries in the log because followers[0] hasn't appended them.
// LOG: [ <46 - 49>, <50>], SNAPSHOT INDEX: 50, COMMIT INDEX: 50
long leaderCommitIndex = getCommitIndex(leader);
do {
leader.replicate(new ApplyRaftRunnable("committed_after_snapshot")).get();
} while (getCommitIndex(leader) < leaderCommitIndex + commitIndexAdvanceCount - 1);
// committing new entries.
// one more entry is needed to take the next snapshot.
// LOG: [ <46 - 49>, <50>, <51 - 99 (committed)> ], SNAPSHOT INDEX: 50, COMMIT INDEX: 99
group.dropMessagesToMember(leader.getLocalMember(), followers[1].getLocalMember(), AppendRequest.class);
for (int i = 0; i < uncommittedEntryCount - 1; i++) {
leader.replicate(new ApplyRaftRunnable("uncommitted_after_snapshot"));
}
// appended some more entries which will not be committed because the leader has no majority.
// the last uncommitted index is reserved for membership changed.
// LOG: [ <46 - 49>, <50>, <51 - 99 (committed)>, <100 - 108 (uncommitted)> ], SNAPSHOT INDEX: 50, COMMIT INDEX: 99
// There are only 2 empty indices in the log.
RaftNodeImpl newRaftNode = group.createNewRaftNode();
Function<Object, Object> alterFunc = o -> {
if (o instanceof AppendRequest) {
AppendRequest request = (AppendRequest) o;
LogEntry[] entries = request.entries();
if (entries.length > 0) {
if (entries[entries.length - 1].operation() instanceof UpdateRaftGroupMembersCmd) {
entries = Arrays.copyOf(entries, entries.length - 1);
return new AppendRequest(request.leader(), request.term(), request.prevLogTerm(), request.prevLogIndex(), request.leaderCommitIndex(), entries, request.queryRound());
} else if (entries[0].operation() instanceof UpdateRaftGroupMembersCmd) {
entries = new LogEntry[0];
return new AppendRequest(request.leader(), request.term(), request.prevLogTerm(), request.prevLogIndex(), request.leaderCommitIndex(), entries, request.queryRound());
}
}
}
return null;
};
group.alterMessagesToMember(leader.getLocalMember(), followers[1].getLocalMember(), alterFunc);
group.alterMessagesToMember(leader.getLocalMember(), newRaftNode.getLocalMember(), alterFunc);
long lastLogIndex1 = getLastLogOrSnapshotEntry(leader).index();
leader.replicateMembershipChange(newRaftNode.getLocalMember(), MembershipChangeMode.ADD);
// When the membership change entry is appended, the leader's Log will be as following:
// LOG: [ <46 - 49>, <50>, <51 - 99 (committed)>, <100 - 108 (uncommitted)>, <109 (membership change)> ], SNAPSHOT INDEX: 50, COMMIT INDEX: 99
assertTrueEventually(() -> assertTrue(getLastLogOrSnapshotEntry(leader).index() > lastLogIndex1));
group.allowMessagesToMember(leader.getLocalMember(), followers[1].getLocalMember(), AppendRequest.class);
// Then, only the entries before the membership change will be committed because we alter the append request. The log will be:
// LOG: [ <46 - 49>, <50>, <51 - 108 (committed)>, <109 (membership change)> ], SNAPSHOT INDEX: 50, COMMIT INDEX: 108
// There is only 1 empty index in the log.
assertTrueEventually(() -> {
assertEquals(lastLogIndex1, getCommitIndex(leader));
assertEquals(lastLogIndex1, getCommitIndex(followers[1]));
});
// assertTrueEventually(() -> {
// assertEquals(lastLogIndex1 + 1, getCommitIndex(leader));
// assertEquals(lastLogIndex1 + 1, getCommitIndex(followers[1]));
// });
long lastLogIndex2 = getLastLogOrSnapshotEntry(leader).index();
leader.replicate(new ApplyRaftRunnable("after_membership_change_append"));
assertTrueEventually(() -> assertTrue(getLastLogOrSnapshotEntry(leader).index() > lastLogIndex2));
// Now the log is full. There is no empty space left.
// LOG: [ <46 - 49>, <50>, <51 - 108 (committed)>, <109 (membership change)>, <110 (uncommitted)> ], SNAPSHOT INDEX: 50, COMMIT INDEX: 108
long lastLogIndex3 = getLastLogOrSnapshotEntry(leader).index();
Future f = leader.replicate(new ApplyRaftRunnable("after_membership_change_append"));
assertTrueEventually(() -> assertTrue(getLastLogOrSnapshotEntry(leader).index() > lastLogIndex3));
assertFalse(f.isDone());
}
use of com.hazelcast.cp.internal.raft.impl.command.UpdateRaftGroupMembersCmd in project hazelcast by hazelcast.
the class MembershipChangeTask method run.
@Override
public void run() {
try {
if (!verifyRaftNodeStatus()) {
return;
}
RaftState state = raftNode.state();
if (state.role() != LEADER) {
resultFuture.completeExceptionally(new NotLeaderException(raftNode.getGroupId(), raftNode.getLocalMember(), state.leader()));
return;
}
if (!isValidGroupMemberCommitIndex()) {
return;
}
Collection<RaftEndpoint> members = new LinkedHashSet<RaftEndpoint>(state.members());
boolean memberExists = members.contains(member);
switch(membershipChangeMode) {
case ADD:
if (memberExists) {
resultFuture.completeExceptionally(new MemberAlreadyExistsException(member));
return;
}
members.add(member);
break;
case REMOVE:
if (!memberExists) {
resultFuture.completeExceptionally(new MemberDoesNotExistException(member));
return;
}
members.remove(member);
break;
default:
resultFuture.completeExceptionally(new IllegalArgumentException("Unknown type: " + membershipChangeMode));
return;
}
logger.info("New members after " + membershipChangeMode + " " + member + " -> " + members);
new ReplicateTask(raftNode, new UpdateRaftGroupMembersCmd(members, member, membershipChangeMode), resultFuture).run();
} catch (Throwable t) {
logger.severe(this + " failed", t);
RaftEndpoint leader = raftNode.getLeader();
UUID leaderUuid = leader != null ? leader.getUuid() : null;
resultFuture.completeExceptionally(new CPSubsystemException("Internal failure", t, leaderUuid));
}
}
use of com.hazelcast.cp.internal.raft.impl.command.UpdateRaftGroupMembersCmd in project hazelcast by hazelcast.
the class CPMemberAddRemoveTest method testRemoveMemberFromMajorityLostRaftGroup.
@Test
public void testRemoveMemberFromMajorityLostRaftGroup() throws ExecutionException, InterruptedException {
HazelcastInstance[] instances = newInstances(3, 3, 0);
waitAllForLeaderElection(instances, getMetadataGroupId(instances[0]));
CPGroupId groupId = getRaftInvocationManager(instances[0]).createRaftGroup("test", 2).get();
getRaftInvocationManager(instances[0]).invoke(groupId, new DummyOp()).get();
RaftNodeImpl groupLeaderRaftNode = getLeaderNode(instances, groupId);
CPGroup group = instances[0].getCPSubsystem().getCPSubsystemManagementService().getCPGroup(groupId.getName()).toCompletableFuture().get();
CPMember[] groupMembers = group.members().toArray(new CPMember[0]);
CPMember crashedMember = groupMembers[0].getUuid().equals(groupLeaderRaftNode.getLocalMember().getUuid()) ? groupMembers[1] : groupMembers[0];
HazelcastInstance runningInstance = (getAddress(instances[0])).equals(crashedMember.getAddress()) ? instances[1] : instances[0];
RaftInvocationManager invocationManager = getRaftInvocationManager(runningInstance);
factory.getInstance(crashedMember.getAddress()).getLifecycleService().terminate();
// from now on, "test" group lost the majority
// we triggered removal of the crashed member but we won't be able to commit to the "test" group
CompletableFuture<Void> f = runningInstance.getCPSubsystem().getCPSubsystemManagementService().removeCPMember(crashedMember.getUuid()).toCompletableFuture();
// wait until RaftCleanupHandler kicks in and appends ApplyRaftGroupMembersCmd to the leader of the "test" group
assertTrueEventually(() -> assertTrue(getLastLogOrSnapshotEntry(groupLeaderRaftNode).operation() instanceof UpdateRaftGroupMembersCmd));
// force-destroy the raft group.
// Now, the pending membership change in the "test" group will fail and we will fix it in the metadata group.
runningInstance.getCPSubsystem().getCPSubsystemManagementService().forceDestroyCPGroup(groupId.getName()).toCompletableFuture().get();
f.get();
MembershipChangeSchedule schedule = invocationManager.<MembershipChangeSchedule>query(getMetadataGroupId(runningInstance), new GetMembershipChangeScheduleOp(), LINEARIZABLE).get();
assertNull(schedule);
}
Aggregations