use of org.apache.ratis.protocol.RaftPeerId in project incubator-ratis by apache.
the class LeaderElectionTests method testEnforceLeader.
@Test
public void testEnforceLeader() throws Exception {
LOG.info("Running testEnforceLeader");
final int numServer = 5;
try (final MiniRaftCluster cluster = newCluster(numServer)) {
cluster.start();
final RaftPeerId firstLeader = waitForLeader(cluster).getId();
LOG.info("firstLeader = {}", firstLeader);
final int first = MiniRaftCluster.getIdIndex(firstLeader.toString());
final int random = ThreadLocalRandom.current().nextInt(numServer - 1);
final String newLeader = "s" + (random < first ? random : random + 1);
LOG.info("enforce leader to {}", newLeader);
enforceLeader(cluster, newLeader, LOG);
}
}
use of org.apache.ratis.protocol.RaftPeerId in project incubator-ratis by apache.
the class InstallSnapshotNotificationTests method testInstallSnapshotDuringBootstrap.
private void testInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception {
leaderSnapshotInfoRef.set(null);
numSnapshotRequests.set(0);
int i = 0;
try {
RaftTestUtil.waitForLeader(cluster);
final RaftPeerId leaderId = cluster.getLeader().getId();
try (final RaftClient client = cluster.createClient(leaderId)) {
for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i));
Assert.assertTrue(reply.isSuccess());
}
}
// wait for the snapshot to be done
final RaftServer.Division leader = cluster.getLeader();
final long nextIndex = leader.getRaftLog().getNextIndex();
LOG.info("nextIndex = {}", nextIndex);
final List<File> snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex);
JavaUtils.attemptRepeatedly(() -> {
Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists));
return null;
}, 10, ONE_SECOND, "snapshotFile.exist", LOG);
RaftSnapshotBaseTest.assertLeaderContent(cluster);
final SnapshotInfo leaderSnapshotInfo = cluster.getLeader().getStateMachine().getLatestSnapshot();
final boolean set = leaderSnapshotInfoRef.compareAndSet(null, leaderSnapshotInfo);
Assert.assertTrue(set);
// add two more peers
final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, true);
// trigger setConfiguration
cluster.setConfiguration(change.allPeersInNewConf);
RaftServerTestUtil.waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null);
// leader snapshot.
for (RaftServer.Division follower : cluster.getFollowers()) {
Assert.assertEquals(leaderSnapshotInfo.getIndex(), RaftServerTestUtil.getLatestInstalledSnapshotIndex(follower));
}
// Make sure each new peer got one snapshot notification.
Assert.assertEquals(2, numSnapshotRequests.get());
} finally {
cluster.shutdown();
}
}
use of org.apache.ratis.protocol.RaftPeerId in project alluxio by Alluxio.
the class RaftJournalSystem method catchUp.
/**
* Attempts to catch up. If the master loses leadership during this method, it will return early.
*
* The caller is responsible for detecting and responding to leadership changes.
*/
private void catchUp(JournalStateMachine stateMachine, RaftJournalAppender client) throws TimeoutException, InterruptedException {
long startTime = System.currentTimeMillis();
long waitBeforeRetry = ServerConfiguration.global().getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_CATCHUP_RETRY_WAIT);
// Wait for any outstanding snapshot to complete.
CommonUtils.waitFor("snapshotting to finish", () -> !stateMachine.isSnapshotting(), WaitForOptions.defaults().setTimeoutMs(10 * Constants.MINUTE_MS));
OptionalLong endCommitIndex = OptionalLong.empty();
try {
// affects the completion time estimate in the logs.
synchronized (this) {
// synchronized to appease findbugs; shouldn't make any difference
RaftPeerId serverId = mServer.getId();
Optional<RaftProtos.CommitInfoProto> commitInfo = getGroupInfo().getCommitInfos().stream().filter(commit -> serverId.equals(RaftPeerId.valueOf(commit.getServer().getId()))).findFirst();
if (commitInfo.isPresent()) {
endCommitIndex = OptionalLong.of(commitInfo.get().getCommitIndex());
} else {
throw new IOException("Commit info was not present. Couldn't find the current server's " + "latest commit");
}
}
} catch (IOException e) {
LogUtils.warnWithException(LOG, "Failed to get raft log information before replay." + " Replay statistics will not be available", e);
}
RaftJournalProgressLogger progressLogger = new RaftJournalProgressLogger(mStateMachine, endCommitIndex);
// leader before trying again.
while (true) {
if (mPrimarySelector.getState() != PrimarySelector.State.PRIMARY) {
return;
}
long lastAppliedSN = stateMachine.getLastAppliedSequenceNumber();
long gainPrimacySN = ThreadLocalRandom.current().nextLong(Long.MIN_VALUE, 0);
LOG.info("Performing catchup. Last applied SN: {}. Catchup ID: {}", lastAppliedSN, gainPrimacySN);
Exception ex;
try {
CompletableFuture<RaftClientReply> future = client.sendAsync(toRaftMessage(JournalEntry.newBuilder().setSequenceNumber(gainPrimacySN).build()), TimeDuration.valueOf(5, TimeUnit.SECONDS));
RaftClientReply reply = future.get(5, TimeUnit.SECONDS);
ex = reply.getException();
} catch (TimeoutException | ExecutionException | IOException e) {
ex = e;
}
if (ex != null) {
// LeaderNotReadyException typically indicates Ratis is still replaying the journal.
if (ex instanceof LeaderNotReadyException) {
progressLogger.logProgress();
} else {
LOG.info("Exception submitting term start entry: {}", ex.toString());
}
// avoid excessive retries when server is not ready
Thread.sleep(waitBeforeRetry);
continue;
}
try {
CommonUtils.waitFor("term start entry " + gainPrimacySN + " to be applied to state machine", () -> stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs(5 * Constants.SECOND_MS));
} catch (TimeoutException e) {
LOG.info(e.toString());
continue;
}
// are not leader.
try {
CommonUtils.waitFor("check primacySN " + gainPrimacySN + " and lastAppliedSN " + lastAppliedSN + " to be applied to leader", () -> stateMachine.getLastAppliedSequenceNumber() == lastAppliedSN && stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs((int) mConf.getMaxElectionTimeoutMs()));
} catch (TimeoutException e) {
// Restart the catchup process.
continue;
}
LOG.info("Caught up in {}ms. Last sequence number from previous term: {}.", System.currentTimeMillis() - startTime, stateMachine.getLastAppliedSequenceNumber());
return;
}
}
use of org.apache.ratis.protocol.RaftPeerId in project alluxio by Alluxio.
the class RaftJournalSystem method removeQuorumServer.
/**
* Removes from RAFT quorum, a server with given address.
* For server to be removed, it should be in unavailable state in quorum.
*
* @param serverNetAddress address of the server to remove from the quorum
* @throws IOException
*/
public synchronized void removeQuorumServer(NetAddress serverNetAddress) throws IOException {
InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(serverNetAddress.getHost(), serverNetAddress.getRpcPort());
RaftPeerId peerId = RaftJournalUtils.getPeerId(serverAddress);
try (RaftClient client = createClient()) {
Collection<RaftPeer> peers = mServer.getGroups().iterator().next().getPeers();
RaftClientReply reply = client.admin().setConfiguration(peers.stream().filter(peer -> !peer.getId().equals(peerId)).collect(Collectors.toList()));
if (reply.getException() != null) {
throw reply.getException();
}
}
}
use of org.apache.ratis.protocol.RaftPeerId in project alluxio by Alluxio.
the class RaftJournalSystem method transferLeadership.
/**
* Transfers the leadership of the quorum to another server.
*
* @param newLeaderNetAddress the address of the server
* @return the guid of transfer leader command
*/
public synchronized String transferLeadership(NetAddress newLeaderNetAddress) {
final boolean allowed = mTransferLeaderAllowed.getAndSet(false);
String transferId = UUID.randomUUID().toString();
if (!allowed) {
String msg = "transfer is not allowed at the moment because the master is " + (mRaftJournalWriter == null ? "still gaining primacy" : "already transferring the ") + "leadership";
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(msg).build());
return transferId;
}
try {
InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort());
List<RaftPeer> oldPeers = new ArrayList<>(mRaftGroup.getPeers());
// The NetUtil function is used by Ratis to convert InetSocketAddress to string
String strAddr = NetUtils.address2String(serverAddress);
// if you cannot find the address in the quorum, throw exception.
if (oldPeers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) {
throw new IOException(String.format("<%s> is not part of the quorum <%s>.", strAddr, oldPeers.stream().map(RaftPeer::getAddress).collect(Collectors.toList())));
}
if (strAddr.equals(mRaftGroup.getPeer(mPeerId).getAddress())) {
throw new IOException(String.format("%s is already the leader", strAddr));
}
RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress);
/* update priorities to enable transfer */
List<RaftPeer> peersWithNewPriorities = new ArrayList<>();
for (RaftPeer peer : oldPeers) {
peersWithNewPriorities.add(RaftPeer.newBuilder(peer).setPriority(peer.getId().equals(newLeaderPeerId) ? 2 : 1).build());
}
try (RaftClient client = createClient()) {
String stringPeers = "[" + peersWithNewPriorities.stream().map(RaftPeer::toString).collect(Collectors.joining(", ")) + "]";
LOG.info("Applying new peer state before transferring leadership: {}", stringPeers);
RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriorities);
processReply(reply, "failed to set master priorities before initiating election");
/* transfer leadership */
LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", serverAddress, newLeaderPeerId);
// fire and forget: need to immediately return as the master will shut down its RPC servers
// once the TransferLeadershipRequest is initiated.
final int SLEEP_TIME_MS = 3_000;
final int TRANSFER_LEADER_WAIT_MS = 30_000;
new Thread(() -> {
try {
Thread.sleep(SLEEP_TIME_MS);
RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, TRANSFER_LEADER_WAIT_MS);
processReply(reply1, "election failed");
} catch (Throwable t) {
LOG.error("caught an error when executing transfer: {}", t.getMessage());
// we only allow transfers again if the transfer is unsuccessful: a success means it
// will soon lose primacy
mTransferLeaderAllowed.set(true);
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
/* checking the transfer happens in {@link QuorumElectCommand} */
}
}).start();
LOG.info("Transferring leadership initiated");
}
} catch (Throwable t) {
mTransferLeaderAllowed.set(true);
LOG.warn(t.getMessage());
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
}
return transferId;
}
Aggregations