Search in sources :

Example 1 with LeaderNotReadyException

use of org.apache.ratis.protocol.exceptions.LeaderNotReadyException in project incubator-ratis by apache.

the class ClientProtoUtils method toRaftClientReply.

static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) {
    final RaftRpcReplyProto rp = replyProto.getRpcReply();
    final RaftGroupMemberId serverMemberId = ProtoUtils.toRaftGroupMemberId(rp.getReplyId(), rp.getRaftGroupId());
    final RaftException e;
    if (replyProto.getExceptionDetailsCase().equals(NOTLEADEREXCEPTION)) {
        NotLeaderExceptionProto nleProto = replyProto.getNotLeaderException();
        final RaftPeer suggestedLeader = nleProto.hasSuggestedLeader() ? ProtoUtils.toRaftPeer(nleProto.getSuggestedLeader()) : null;
        final List<RaftPeer> peers = ProtoUtils.toRaftPeers(nleProto.getPeersInConfList());
        e = new NotLeaderException(serverMemberId, suggestedLeader, peers);
    } else if (replyProto.getExceptionDetailsCase() == NOTREPLICATEDEXCEPTION) {
        final NotReplicatedExceptionProto nre = replyProto.getNotReplicatedException();
        e = new NotReplicatedException(nre.getCallId(), nre.getReplication(), nre.getLogIndex());
    } else if (replyProto.getExceptionDetailsCase().equals(STATEMACHINEEXCEPTION)) {
        e = toStateMachineException(serverMemberId, replyProto.getStateMachineException());
    } else if (replyProto.getExceptionDetailsCase().equals(DATASTREAMEXCEPTION)) {
        e = ProtoUtils.toThrowable(replyProto.getDataStreamException(), DataStreamException.class);
    } else if (replyProto.getExceptionDetailsCase().equals(LEADERNOTREADYEXCEPTION)) {
        LeaderNotReadyExceptionProto lnreProto = replyProto.getLeaderNotReadyException();
        e = new LeaderNotReadyException(ProtoUtils.toRaftGroupMemberId(lnreProto.getServerId()));
    } else if (replyProto.getExceptionDetailsCase().equals(ALREADYCLOSEDEXCEPTION)) {
        e = toAlreadyClosedException(replyProto.getAlreadyClosedException());
    } else if (replyProto.getExceptionDetailsCase().equals(LEADERSTEPPINGDOWNEXCEPTION)) {
        e = ProtoUtils.toThrowable(replyProto.getLeaderSteppingDownException(), LeaderSteppingDownException.class);
    } else if (replyProto.getExceptionDetailsCase().equals(TRANSFERLEADERSHIPEXCEPTION)) {
        e = ProtoUtils.toThrowable(replyProto.getTransferLeadershipException(), TransferLeadershipException.class);
    } else {
        e = null;
    }
    return RaftClientReply.newBuilder().setClientId(ClientId.valueOf(rp.getRequestorId())).setServerId(serverMemberId).setCallId(rp.getCallId()).setSuccess(rp.getSuccess()).setMessage(toMessage(replyProto.getMessage())).setException(e).setLogIndex(replyProto.getLogIndex()).setCommitInfos(replyProto.getCommitInfosList()).build();
}
Also used : RaftException(org.apache.ratis.protocol.exceptions.RaftException) NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) NotReplicatedException(org.apache.ratis.protocol.exceptions.NotReplicatedException) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) LeaderSteppingDownException(org.apache.ratis.protocol.exceptions.LeaderSteppingDownException)

Example 2 with LeaderNotReadyException

use of org.apache.ratis.protocol.exceptions.LeaderNotReadyException in project incubator-ratis by apache.

the class RaftClientImpl method handleIOException.

void handleIOException(RaftClientRequest request, IOException ioe, RaftPeerId newLeader, Consumer<RaftClientRequest> handler) {
    LOG.debug("{}: suggested new leader: {}. Failed {} with {}", clientId, newLeader, request, ioe);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Stack trace", new Throwable("TRACE"));
    }
    Optional.ofNullable(handler).ifPresent(h -> h.accept(request));
    if (ioe instanceof LeaderNotReadyException || ioe instanceof ResourceUnavailableException) {
        return;
    }
    final RaftPeerId oldLeader = request.getServerId();
    final RaftPeerId curLeader = leaderId;
    final boolean stillLeader = oldLeader.equals(curLeader);
    if (newLeader == null && stillLeader) {
        newLeader = CollectionUtils.random(oldLeader, CollectionUtils.as(peers, RaftPeer::getId));
    }
    LOG.debug("{}: oldLeader={},  curLeader={}, newLeader={}", clientId, oldLeader, curLeader, newLeader);
    final boolean changeLeader = newLeader != null && stillLeader;
    final boolean reconnect = changeLeader || clientRpc.shouldReconnect(ioe);
    if (reconnect) {
        if (changeLeader && oldLeader.equals(leaderId)) {
            LOG.debug("{} {}: client change Leader from {} to {} ex={}", groupId, clientId, oldLeader, newLeader, ioe.getClass().getName());
            this.leaderId = newLeader;
        }
        clientRpc.handleException(oldLeader, ioe, true);
    }
}
Also used : ResourceUnavailableException(org.apache.ratis.protocol.exceptions.ResourceUnavailableException) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftPeer(org.apache.ratis.protocol.RaftPeer)

Example 3 with LeaderNotReadyException

use of org.apache.ratis.protocol.exceptions.LeaderNotReadyException in project incubator-ratis by apache.

the class RaftServerImpl method checkLeaderState.

/**
 * @return null if the server is in leader state.
 */
private CompletableFuture<RaftClientReply> checkLeaderState(RaftClientRequest request, CacheEntry entry, boolean isWrite) {
    try {
        assertGroup(request.getRequestorId(), request.getRaftGroupId());
    } catch (GroupMismatchException e) {
        return RetryCacheImpl.failWithException(e, entry);
    }
    if (!getInfo().isLeader()) {
        NotLeaderException exception = generateNotLeaderException();
        final RaftClientReply reply = newExceptionReply(request, exception);
        return RetryCacheImpl.failWithReply(reply, entry);
    }
    if (!getInfo().isLeaderReady()) {
        final CacheEntry cacheEntry = retryCache.getIfPresent(ClientInvocationId.valueOf(request));
        if (cacheEntry != null && cacheEntry.isCompletedNormally()) {
            return cacheEntry.getReplyFuture();
        }
        final LeaderNotReadyException lnre = new LeaderNotReadyException(getMemberId());
        final RaftClientReply reply = newExceptionReply(request, lnre);
        return RetryCacheImpl.failWithReply(reply, entry);
    }
    if (isWrite && isSteppingDown()) {
        final LeaderSteppingDownException lsde = new LeaderSteppingDownException(getMemberId() + " is stepping down");
        final RaftClientReply reply = newExceptionReply(request, lsde);
        return RetryCacheImpl.failWithReply(reply, entry);
    }
    return null;
}
Also used : NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) GroupMismatchException(org.apache.ratis.protocol.exceptions.GroupMismatchException) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) CacheEntry(org.apache.ratis.server.impl.RetryCacheImpl.CacheEntry) LeaderSteppingDownException(org.apache.ratis.protocol.exceptions.LeaderSteppingDownException)

Example 4 with LeaderNotReadyException

use of org.apache.ratis.protocol.exceptions.LeaderNotReadyException in project alluxio by Alluxio.

the class RaftJournalSystem method catchUp.

/**
 * Attempts to catch up. If the master loses leadership during this method, it will return early.
 *
 * The caller is responsible for detecting and responding to leadership changes.
 */
private void catchUp(JournalStateMachine stateMachine, RaftJournalAppender client) throws TimeoutException, InterruptedException {
    long startTime = System.currentTimeMillis();
    long waitBeforeRetry = ServerConfiguration.global().getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_CATCHUP_RETRY_WAIT);
    // Wait for any outstanding snapshot to complete.
    CommonUtils.waitFor("snapshotting to finish", () -> !stateMachine.isSnapshotting(), WaitForOptions.defaults().setTimeoutMs(10 * Constants.MINUTE_MS));
    OptionalLong endCommitIndex = OptionalLong.empty();
    try {
        // affects the completion time estimate in the logs.
        synchronized (this) {
            // synchronized to appease findbugs; shouldn't make any difference
            RaftPeerId serverId = mServer.getId();
            Optional<RaftProtos.CommitInfoProto> commitInfo = getGroupInfo().getCommitInfos().stream().filter(commit -> serverId.equals(RaftPeerId.valueOf(commit.getServer().getId()))).findFirst();
            if (commitInfo.isPresent()) {
                endCommitIndex = OptionalLong.of(commitInfo.get().getCommitIndex());
            } else {
                throw new IOException("Commit info was not present. Couldn't find the current server's " + "latest commit");
            }
        }
    } catch (IOException e) {
        LogUtils.warnWithException(LOG, "Failed to get raft log information before replay." + " Replay statistics will not be available", e);
    }
    RaftJournalProgressLogger progressLogger = new RaftJournalProgressLogger(mStateMachine, endCommitIndex);
    // leader before trying again.
    while (true) {
        if (mPrimarySelector.getState() != PrimarySelector.State.PRIMARY) {
            return;
        }
        long lastAppliedSN = stateMachine.getLastAppliedSequenceNumber();
        long gainPrimacySN = ThreadLocalRandom.current().nextLong(Long.MIN_VALUE, 0);
        LOG.info("Performing catchup. Last applied SN: {}. Catchup ID: {}", lastAppliedSN, gainPrimacySN);
        Exception ex;
        try {
            CompletableFuture<RaftClientReply> future = client.sendAsync(toRaftMessage(JournalEntry.newBuilder().setSequenceNumber(gainPrimacySN).build()), TimeDuration.valueOf(5, TimeUnit.SECONDS));
            RaftClientReply reply = future.get(5, TimeUnit.SECONDS);
            ex = reply.getException();
        } catch (TimeoutException | ExecutionException | IOException e) {
            ex = e;
        }
        if (ex != null) {
            // LeaderNotReadyException typically indicates Ratis is still replaying the journal.
            if (ex instanceof LeaderNotReadyException) {
                progressLogger.logProgress();
            } else {
                LOG.info("Exception submitting term start entry: {}", ex.toString());
            }
            // avoid excessive retries when server is not ready
            Thread.sleep(waitBeforeRetry);
            continue;
        }
        try {
            CommonUtils.waitFor("term start entry " + gainPrimacySN + " to be applied to state machine", () -> stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs(5 * Constants.SECOND_MS));
        } catch (TimeoutException e) {
            LOG.info(e.toString());
            continue;
        }
        // are not leader.
        try {
            CommonUtils.waitFor("check primacySN " + gainPrimacySN + " and lastAppliedSN " + lastAppliedSN + " to be applied to leader", () -> stateMachine.getLastAppliedSequenceNumber() == lastAppliedSN && stateMachine.getLastPrimaryStartSequenceNumber() == gainPrimacySN, WaitForOptions.defaults().setInterval(Constants.SECOND_MS).setTimeoutMs((int) mConf.getMaxElectionTimeoutMs()));
        } catch (TimeoutException e) {
            // Restart the catchup process.
            continue;
        }
        LOG.info("Caught up in {}ms. Last sequence number from previous term: {}.", System.currentTimeMillis() - startTime, stateMachine.getLastAppliedSequenceNumber());
        return;
    }
}
Also used : Arrays(java.util.Arrays) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply) RaftGroup(org.apache.ratis.protocol.RaftGroup) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PropertyKey(alluxio.conf.PropertyKey) GrpcService(alluxio.grpc.GrpcService) LogUtils(alluxio.util.LogUtils) NetUtils(org.apache.ratis.util.NetUtils) JournalQueryRequest(alluxio.grpc.JournalQueryRequest) MetricKey(alluxio.metrics.MetricKey) Map(java.util.Map) RaftConfigKeys(org.apache.ratis.RaftConfigKeys) CancelledException(alluxio.exception.status.CancelledException) UnsafeByteOperations(org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations) QuorumServerInfo(alluxio.grpc.QuorumServerInfo) ServerConfiguration(alluxio.conf.ServerConfiguration) RaftPeer(org.apache.ratis.protocol.RaftPeer) RetryPolicy(org.apache.ratis.retry.RetryPolicy) Master(alluxio.master.Master) Collection(java.util.Collection) AbstractJournalSystem(alluxio.master.journal.AbstractJournalSystem) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) SupportedRpcType(org.apache.ratis.rpc.SupportedRpcType) CompletionException(java.util.concurrent.CompletionException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) UUID(java.util.UUID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) List(java.util.List) ClientId(org.apache.ratis.protocol.ClientId) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) RaftProperties(org.apache.ratis.conf.RaftProperties) ServiceType(alluxio.grpc.ServiceType) ExponentialBackoffRetry(org.apache.ratis.retry.ExponentialBackoffRetry) Optional(java.util.Optional) PrimarySelector(alluxio.master.PrimarySelector) RatisDropwizardExports(alluxio.metrics.sink.RatisDropwizardExports) AccessDeniedException(java.nio.file.AccessDeniedException) RaftClientConfigKeys(org.apache.ratis.client.RaftClientConfigKeys) UnavailableException(alluxio.exception.status.UnavailableException) TimeDuration(org.apache.ratis.util.TimeDuration) AsyncJournalWriter(alluxio.master.journal.AsyncJournalWriter) GroupInfoRequest(org.apache.ratis.protocol.GroupInfoRequest) CatchupFuture(alluxio.master.journal.CatchupFuture) SetConfigurationRequest(org.apache.ratis.protocol.SetConfigurationRequest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) JournalEntry(alluxio.proto.journal.Journal.JournalEntry) WaitForOptions(alluxio.util.WaitForOptions) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) AddQuorumServerRequest(alluxio.grpc.AddQuorumServerRequest) Message(org.apache.ratis.protocol.Message) OptionalLong(java.util.OptionalLong) Constants(alluxio.Constants) QuorumServerState(alluxio.grpc.QuorumServerState) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) MetricsSystem(alluxio.metrics.MetricsSystem) LinkedList(java.util.LinkedList) SizeInBytes(org.apache.ratis.util.SizeInBytes) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) NetAddress(alluxio.grpc.NetAddress) Iterator(java.util.Iterator) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftServerConfigKeys(org.apache.ratis.server.RaftServerConfigKeys) ExceptionMessage(alluxio.exception.ExceptionMessage) RaftProtos(org.apache.ratis.proto.RaftProtos) FileUtils(org.apache.commons.io.FileUtils) GrpcConfigKeys(org.apache.ratis.grpc.GrpcConfigKeys) RaftClientRequest(org.apache.ratis.protocol.RaftClientRequest) IOException(java.io.IOException) TransferLeaderMessage(alluxio.grpc.TransferLeaderMessage) HostAndPort(com.google.common.net.HostAndPort) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Parameters(org.apache.ratis.conf.Parameters) AtomicLong(java.util.concurrent.atomic.AtomicLong) LifeCycle(org.apache.ratis.util.LifeCycle) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RaftServer(org.apache.ratis.server.RaftServer) RaftClient(org.apache.ratis.client.RaftClient) Comparator(java.util.Comparator) Journal(alluxio.master.journal.Journal) Collections(java.util.Collections) CommonUtils(alluxio.util.CommonUtils) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) IOException(java.io.IOException) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) TimeoutException(java.util.concurrent.TimeoutException) CancelledException(alluxio.exception.status.CancelledException) CompletionException(java.util.concurrent.CompletionException) AccessDeniedException(java.nio.file.AccessDeniedException) UnavailableException(alluxio.exception.status.UnavailableException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) OptionalLong(java.util.OptionalLong) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 5 with LeaderNotReadyException

use of org.apache.ratis.protocol.exceptions.LeaderNotReadyException in project incubator-ratis by apache.

the class ClientProtoUtils method toRaftClientReplyProto.

static RaftClientReplyProto toRaftClientReplyProto(RaftClientReply reply) {
    final RaftClientReplyProto.Builder b = RaftClientReplyProto.newBuilder();
    if (reply != null) {
        b.setRpcReply(toRaftRpcReplyProtoBuilder(reply.getClientId().toByteString(), reply.getServerId().toByteString(), reply.getRaftGroupId(), reply.getCallId(), reply.isSuccess()));
        b.setLogIndex(reply.getLogIndex());
        if (reply.getMessage() != null) {
            b.setMessage(toClientMessageEntryProtoBuilder(reply.getMessage()));
        }
        b.addAllCommitInfos(reply.getCommitInfos());
        final NotLeaderException nle = reply.getNotLeaderException();
        if (nle != null) {
            NotLeaderExceptionProto.Builder nleBuilder = NotLeaderExceptionProto.newBuilder();
            final RaftPeer suggestedLeader = nle.getSuggestedLeader();
            if (suggestedLeader != null) {
                nleBuilder.setSuggestedLeader(suggestedLeader.getRaftPeerProto());
            }
            nleBuilder.addAllPeersInConf(ProtoUtils.toRaftPeerProtos(nle.getPeers()));
            b.setNotLeaderException(nleBuilder.build());
        }
        final NotReplicatedException nre = reply.getNotReplicatedException();
        if (nre != null) {
            final NotReplicatedExceptionProto.Builder nreBuilder = NotReplicatedExceptionProto.newBuilder().setCallId(nre.getCallId()).setReplication(nre.getRequiredReplication()).setLogIndex(nre.getLogIndex());
            b.setNotReplicatedException(nreBuilder);
        }
        final LeaderNotReadyException lnre = reply.getLeaderNotReadyException();
        if (lnre != null) {
            LeaderNotReadyExceptionProto.Builder lnreBuilder = LeaderNotReadyExceptionProto.newBuilder().setServerId(ProtoUtils.toRaftGroupMemberIdProtoBuilder(lnre.getServerId()));
            b.setLeaderNotReadyException(lnreBuilder);
        }
        Optional.ofNullable(reply.getStateMachineException()).map(ClientProtoUtils::toStateMachineExceptionProtoBuilder).ifPresent(b::setStateMachineException);
        Optional.ofNullable(reply.getDataStreamException()).map(ProtoUtils::toThrowableProto).ifPresent(b::setDataStreamException);
        Optional.ofNullable(reply.getAlreadyClosedException()).map(ClientProtoUtils::toAlreadyClosedExceptionProtoBuilder).ifPresent(b::setAlreadyClosedException);
        Optional.ofNullable(reply.getLeaderSteppingDownException()).map(ProtoUtils::toThrowableProto).ifPresent(b::setLeaderSteppingDownException);
        Optional.ofNullable(reply.getTransferLeadershipException()).map(ProtoUtils::toThrowableProto).ifPresent(b::setTransferLeadershipException);
        final RaftClientReplyProto serialized = b.build();
        final RaftException e = reply.getException();
        if (e != null) {
            final RaftClientReply deserialized = toRaftClientReply(serialized);
            if (!Optional.ofNullable(deserialized.getException()).map(Object::getClass).filter(e.getClass()::equals).isPresent()) {
                throw new AssertionError("Corruption while serializing reply= " + reply + " but serialized=" + serialized + " and deserialized=" + deserialized, e);
            }
        }
        return serialized;
    }
    return b.build();
}
Also used : NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) RaftException(org.apache.ratis.protocol.exceptions.RaftException) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) NotReplicatedException(org.apache.ratis.protocol.exceptions.NotReplicatedException)

Aggregations

LeaderNotReadyException (org.apache.ratis.protocol.exceptions.LeaderNotReadyException)6 RaftPeerId (org.apache.ratis.protocol.RaftPeerId)3 NotLeaderException (org.apache.ratis.protocol.exceptions.NotLeaderException)3 IOException (java.io.IOException)2 RaftPeer (org.apache.ratis.protocol.RaftPeer)2 LeaderSteppingDownException (org.apache.ratis.protocol.exceptions.LeaderSteppingDownException)2 Constants (alluxio.Constants)1 PropertyKey (alluxio.conf.PropertyKey)1 ServerConfiguration (alluxio.conf.ServerConfiguration)1 ExceptionMessage (alluxio.exception.ExceptionMessage)1 CancelledException (alluxio.exception.status.CancelledException)1 UnavailableException (alluxio.exception.status.UnavailableException)1 AddQuorumServerRequest (alluxio.grpc.AddQuorumServerRequest)1 GrpcService (alluxio.grpc.GrpcService)1 JournalQueryRequest (alluxio.grpc.JournalQueryRequest)1 NetAddress (alluxio.grpc.NetAddress)1 QuorumServerInfo (alluxio.grpc.QuorumServerInfo)1 QuorumServerState (alluxio.grpc.QuorumServerState)1 ServiceType (alluxio.grpc.ServiceType)1 TransferLeaderMessage (alluxio.grpc.TransferLeaderMessage)1