use of org.apache.ratis.protocol.RaftClientReply in project alluxio by Alluxio.
the class RaftJournalSystem method resetPriorities.
/**
* Resets RaftPeer priorities.
*
* @throws IOException
*/
public synchronized void resetPriorities() throws IOException {
List<RaftPeer> resetPeers = new ArrayList<>();
final int NEUTRAL_PRIORITY = 1;
for (RaftPeer peer : mRaftGroup.getPeers()) {
resetPeers.add(RaftPeer.newBuilder(peer).setPriority(NEUTRAL_PRIORITY).build());
}
LOG.info("Resetting RaftPeer priorities");
try (RaftClient client = createClient()) {
RaftClientReply reply = client.admin().setConfiguration(resetPeers);
processReply(reply, "failed to reset master priorities to 1");
}
}
use of org.apache.ratis.protocol.RaftClientReply in project alluxio by Alluxio.
the class RaftJournalSystem method catchUp.
/**
* Attempts to catch up. If the master loses leadership during this method, it will return early.
*
* The caller is responsible for detecting and responding to leadership changes.
*/
private void catchUp(JournalStateMachine stateMachine, RaftJournalAppender client) throws TimeoutException, InterruptedException {
long startTime = System.currentTimeMillis();
long waitBeforeRetry = ServerConfiguration.global().getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_CATCHUP_RETRY_WAIT);
// Wait for any outstanding snapshot to complete.
CommonUtils.waitFor("snapshotting to finish", () -> !stateMachine.isSnapshotting(), WaitForOptions.defaults().setTimeoutMs(10 * Constants.MINUTE_MS));
OptionalLong endCommitIndex = OptionalLong.empty();
try {
// affects the completion time estimate in the logs.
synchronized (this) {
// synchronized to appease findbugs; shouldn't make any difference
RaftPeerId serverId = mServer.getId();
Optional<RaftProtos.CommitInfoProto> commitInfo = getGroupInfo().getCommitInfos().stream().filter(commit -> serverId.equals(RaftPeerId.valueOf(commit.getServer().getId()))).findFirst();
if (commitInfo.isPresent()) {
endCommitIndex = OptionalLong.of(commitInfo.get().getCommitIndex());
} else {
throw new IOException("Commit info was not present. Couldn't find the current server's " + "latest commit");
}
}
} catch (IOException e) {
LogUtils.warnWithException(LOG, "Failed to get raft log information before replay." + " Replay statistics will not be available", e);
}
RaftJournalProgressLogger progressLogger = new RaftJournalProgressLogger(mStateMachine, endCommitIndex);
// leader before trying again.
while (true) {
if (mPrimarySelector.getState() != PrimarySelector.State.PRIMARY) {
return;
}
long lastAppliedSN = stateMachine.getLastAppliedSequenceNumber();
long gainPrimacySN = ThreadLocalRandom.current().nextLong(Long.MIN_VALUE, 0);
LOG.info("Performing catchup. Last applied SN: {}. Catchup ID: {}", lastAppliedSN, gainPrimacySN);
Exception ex;
try {
CompletableFuture<RaftClientReply> future = client.sendAsync(toRaftMessage(JournalEntry.newBuilder().setSequenceNumber(gainPrimacySN).build()), TimeDuration.valueOf(5, TimeUnit.SECONDS));
RaftClientReply reply = future.get(5, TimeUnit.SECONDS);
ex = reply.getException();
} catch (TimeoutException | ExecutionException | IOException e) {
ex = e;
}
if (ex != null) {
// LeaderNotReadyException typically indicates Ratis is still replaying the journal.
if (ex instanceof LeaderNotReadyException) {
progressLogger.logProgress();
} else {
LOG.info("Exception submitting term start entry: {}", ex.toString());
}
// avoid excessive retries when server is not ready
Thread.sleep(waitBeforeRetry);
continue;
}
try {
CommonUtils.waitFor("term start entry " + gainPrimacySN + " to be applied to state machine", () -> stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs(5 * Constants.SECOND_MS));
} catch (TimeoutException e) {
LOG.info(e.toString());
continue;
}
// are not leader.
try {
CommonUtils.waitFor("check primacySN " + gainPrimacySN + " and lastAppliedSN " + lastAppliedSN + " to be applied to leader", () -> stateMachine.getLastAppliedSequenceNumber() == lastAppliedSN && stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs((int) mConf.getMaxElectionTimeoutMs()));
} catch (TimeoutException e) {
// Restart the catchup process.
continue;
}
LOG.info("Caught up in {}ms. Last sequence number from previous term: {}.", System.currentTimeMillis() - startTime, stateMachine.getLastAppliedSequenceNumber());
return;
}
}
use of org.apache.ratis.protocol.RaftClientReply in project alluxio by Alluxio.
the class RaftJournalSystem method removeQuorumServer.
/**
* Removes from RAFT quorum, a server with given address.
* For server to be removed, it should be in unavailable state in quorum.
*
* @param serverNetAddress address of the server to remove from the quorum
* @throws IOException
*/
public synchronized void removeQuorumServer(NetAddress serverNetAddress) throws IOException {
InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(serverNetAddress.getHost(), serverNetAddress.getRpcPort());
RaftPeerId peerId = RaftJournalUtils.getPeerId(serverAddress);
try (RaftClient client = createClient()) {
Collection<RaftPeer> peers = mServer.getGroups().iterator().next().getPeers();
RaftClientReply reply = client.admin().setConfiguration(peers.stream().filter(peer -> !peer.getId().equals(peerId)).collect(Collectors.toList()));
if (reply.getException() != null) {
throw reply.getException();
}
}
}
use of org.apache.ratis.protocol.RaftClientReply in project alluxio by Alluxio.
the class RaftJournalSystem method transferLeadership.
/**
* Transfers the leadership of the quorum to another server.
*
* @param newLeaderNetAddress the address of the server
* @return the guid of transfer leader command
*/
public synchronized String transferLeadership(NetAddress newLeaderNetAddress) {
final boolean allowed = mTransferLeaderAllowed.getAndSet(false);
String transferId = UUID.randomUUID().toString();
if (!allowed) {
String msg = "transfer is not allowed at the moment because the master is " + (mRaftJournalWriter == null ? "still gaining primacy" : "already transferring the ") + "leadership";
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(msg).build());
return transferId;
}
try {
InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort());
List<RaftPeer> oldPeers = new ArrayList<>(mRaftGroup.getPeers());
// The NetUtil function is used by Ratis to convert InetSocketAddress to string
String strAddr = NetUtils.address2String(serverAddress);
// if you cannot find the address in the quorum, throw exception.
if (oldPeers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) {
throw new IOException(String.format("<%s> is not part of the quorum <%s>.", strAddr, oldPeers.stream().map(RaftPeer::getAddress).collect(Collectors.toList())));
}
if (strAddr.equals(mRaftGroup.getPeer(mPeerId).getAddress())) {
throw new IOException(String.format("%s is already the leader", strAddr));
}
RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress);
/* update priorities to enable transfer */
List<RaftPeer> peersWithNewPriorities = new ArrayList<>();
for (RaftPeer peer : oldPeers) {
peersWithNewPriorities.add(RaftPeer.newBuilder(peer).setPriority(peer.getId().equals(newLeaderPeerId) ? 2 : 1).build());
}
try (RaftClient client = createClient()) {
String stringPeers = "[" + peersWithNewPriorities.stream().map(RaftPeer::toString).collect(Collectors.joining(", ")) + "]";
LOG.info("Applying new peer state before transferring leadership: {}", stringPeers);
RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriorities);
processReply(reply, "failed to set master priorities before initiating election");
/* transfer leadership */
LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", serverAddress, newLeaderPeerId);
// fire and forget: need to immediately return as the master will shut down its RPC servers
// once the TransferLeadershipRequest is initiated.
final int SLEEP_TIME_MS = 3_000;
final int TRANSFER_LEADER_WAIT_MS = 30_000;
new Thread(() -> {
try {
Thread.sleep(SLEEP_TIME_MS);
RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, TRANSFER_LEADER_WAIT_MS);
processReply(reply1, "election failed");
} catch (Throwable t) {
LOG.error("caught an error when executing transfer: {}", t.getMessage());
// we only allow transfers again if the transfer is unsuccessful: a success means it
// will soon lose primacy
mTransferLeaderAllowed.set(true);
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
/* checking the transfer happens in {@link QuorumElectCommand} */
}
}).start();
LOG.info("Transferring leadership initiated");
}
} catch (Throwable t) {
mTransferLeaderAllowed.set(true);
LOG.warn(t.getMessage());
mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
}
return transferId;
}
use of org.apache.ratis.protocol.RaftClientReply in project alluxio by Alluxio.
the class RaftJournalWriter method flush.
@Override
public void flush() throws IOException, JournalClosedException {
if (mClosed) {
throw new JournalClosedException("Cannot flush. Journal writer has been closed");
}
if (mJournalEntryBuilder != null) {
long flushSN = mNextSequenceNumberToWrite.get() - 1;
try {
// It is ok to submit the same entries multiple times because we de-duplicate by sequence
// number when applying them. This could happen if submit fails and we re-submit the same
// entry on retry.
JournalEntry entry = mJournalEntryBuilder.build();
Message message = RaftJournalSystem.toRaftMessage(entry);
mLastSubmittedSequenceNumber.set(flushSN);
LOG.trace("Flushing entry {} ({})", entry, message);
RaftClientReply reply = mClient.sendAsync(message, TimeDuration.valueOf(mWriteTimeoutMs, TimeUnit.MILLISECONDS)).get(mWriteTimeoutMs, TimeUnit.MILLISECONDS);
mLastCommittedSequenceNumber.set(flushSN);
if (reply.getException() != null) {
throw reply.getException();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(e);
} catch (ExecutionException e) {
throw new IOException(e.getCause());
} catch (TimeoutException e) {
throw new IOException(String.format("Timed out after waiting %s milliseconds for journal entries to be processed", mWriteTimeoutMs), e);
}
mJournalEntryBuilder = null;
}
}
Aggregations