Search in sources :

Example 1 with RaftPeer

use of org.apache.ratis.protocol.RaftPeer in project incubator-ratis by apache.

the class RetryCacheTests method testRetryOnNewLeader.

/**
 * Test retry while the leader changes to another peer
 */
@Test
public void testRetryOnNewLeader() throws Exception {
    final MiniRaftCluster cluster = getCluster();
    RaftTestUtil.waitForLeader(cluster);
    final RaftPeerId leaderId = cluster.getLeaderAndSendFirstMessage().getId();
    final RaftClient client = cluster.createClient(leaderId);
    RaftClientRpc rpc = client.getClientRpc();
    final long callId = 999;
    final long seqNum = 111;
    RaftClientRequest r = cluster.newRaftClientRequest(client.getId(), leaderId, callId, seqNum, new SimpleMessage("message"));
    RaftClientReply reply = rpc.sendRequest(r);
    Assert.assertEquals(callId, reply.getCallId());
    Assert.assertTrue(reply.isSuccess());
    long oldLastApplied = cluster.getLeader().getState().getLastAppliedIndex();
    // trigger the reconfiguration, make sure the original leader is kicked out
    PeerChanges change = cluster.addNewPeers(2, true);
    RaftPeer[] allPeers = cluster.removePeers(2, true, asList(change.newPeers)).allPeersInNewConf;
    // trigger setConfiguration
    cluster.setConfiguration(allPeers);
    RaftTestUtil.waitForLeader(cluster);
    final RaftPeerId newLeaderId = cluster.getLeader().getId();
    Assert.assertNotEquals(leaderId, newLeaderId);
    // same clientId and callId in the request
    r = cluster.newRaftClientRequest(client.getId(), newLeaderId, callId, seqNum, new SimpleMessage("message"));
    for (int i = 0; i < 10; i++) {
        try {
            reply = rpc.sendRequest(r);
            LOG.info("successfully sent out the retry request_" + i);
            Assert.assertEquals(client.getId(), reply.getClientId());
            Assert.assertEquals(callId, reply.getCallId());
            Assert.assertTrue(reply.isSuccess());
        } catch (Exception e) {
            LOG.info("hit exception while retrying the same request: " + e);
        }
        Thread.sleep(100);
    }
    // check the new leader and make sure the retry did not get committed
    Assert.assertEquals(oldLastApplied + 3, cluster.getLeader().getState().getLastAppliedIndex());
    client.close();
}
Also used : SimpleMessage(org.apache.ratis.RaftTestUtil.SimpleMessage) RaftPeer(org.apache.ratis.protocol.RaftPeer) IOException(java.io.IOException) RaftClientRequest(org.apache.ratis.protocol.RaftClientRequest) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) PeerChanges(org.apache.ratis.MiniRaftCluster.PeerChanges) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftClient(org.apache.ratis.client.RaftClient) RaftClientRpc(org.apache.ratis.client.RaftClientRpc) Test(org.junit.Test)

Example 2 with RaftPeer

use of org.apache.ratis.protocol.RaftPeer in project alluxio by Alluxio.

the class RaftJournalSystem method resetPriorities.

/**
 * Resets RaftPeer priorities.
 *
 * @throws IOException
 */
public synchronized void resetPriorities() throws IOException {
    List<RaftPeer> resetPeers = new ArrayList<>();
    final int NEUTRAL_PRIORITY = 1;
    for (RaftPeer peer : mRaftGroup.getPeers()) {
        resetPeers.add(RaftPeer.newBuilder(peer).setPriority(NEUTRAL_PRIORITY).build());
    }
    LOG.info("Resetting RaftPeer priorities");
    try (RaftClient client = createClient()) {
        RaftClientReply reply = client.admin().setConfiguration(resetPeers);
        processReply(reply, "failed to reset master priorities to 1");
    }
}
Also used : RaftClientReply(org.apache.ratis.protocol.RaftClientReply) ArrayList(java.util.ArrayList) RaftPeer(org.apache.ratis.protocol.RaftPeer) RaftClient(org.apache.ratis.client.RaftClient)

Example 3 with RaftPeer

use of org.apache.ratis.protocol.RaftPeer in project alluxio by Alluxio.

the class RaftJournalSystem method removeQuorumServer.

/**
 * Removes from RAFT quorum, a server with given address.
 * For server to be removed, it should be in unavailable state in quorum.
 *
 * @param serverNetAddress address of the server to remove from the quorum
 * @throws IOException
 */
public synchronized void removeQuorumServer(NetAddress serverNetAddress) throws IOException {
    InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(serverNetAddress.getHost(), serverNetAddress.getRpcPort());
    RaftPeerId peerId = RaftJournalUtils.getPeerId(serverAddress);
    try (RaftClient client = createClient()) {
        Collection<RaftPeer> peers = mServer.getGroups().iterator().next().getPeers();
        RaftClientReply reply = client.admin().setConfiguration(peers.stream().filter(peer -> !peer.getId().equals(peerId)).collect(Collectors.toList()));
        if (reply.getException() != null) {
            throw reply.getException();
        }
    }
}
Also used : RaftClientReply(org.apache.ratis.protocol.RaftClientReply) InetSocketAddress(java.net.InetSocketAddress) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftPeer(org.apache.ratis.protocol.RaftPeer) RaftClient(org.apache.ratis.client.RaftClient)

Example 4 with RaftPeer

use of org.apache.ratis.protocol.RaftPeer in project alluxio by Alluxio.

the class RaftJournalSystem method transferLeadership.

/**
 * Transfers the leadership of the quorum to another server.
 *
 * @param newLeaderNetAddress the address of the server
 * @return the guid of transfer leader command
 */
public synchronized String transferLeadership(NetAddress newLeaderNetAddress) {
    final boolean allowed = mTransferLeaderAllowed.getAndSet(false);
    String transferId = UUID.randomUUID().toString();
    if (!allowed) {
        String msg = "transfer is not allowed at the moment because the master is " + (mRaftJournalWriter == null ? "still gaining primacy" : "already transferring the ") + "leadership";
        mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(msg).build());
        return transferId;
    }
    try {
        InetSocketAddress serverAddress = InetSocketAddress.createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort());
        List<RaftPeer> oldPeers = new ArrayList<>(mRaftGroup.getPeers());
        // The NetUtil function is used by Ratis to convert InetSocketAddress to string
        String strAddr = NetUtils.address2String(serverAddress);
        // if you cannot find the address in the quorum, throw exception.
        if (oldPeers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) {
            throw new IOException(String.format("<%s> is not part of the quorum <%s>.", strAddr, oldPeers.stream().map(RaftPeer::getAddress).collect(Collectors.toList())));
        }
        if (strAddr.equals(mRaftGroup.getPeer(mPeerId).getAddress())) {
            throw new IOException(String.format("%s is already the leader", strAddr));
        }
        RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress);
        /* update priorities to enable transfer */
        List<RaftPeer> peersWithNewPriorities = new ArrayList<>();
        for (RaftPeer peer : oldPeers) {
            peersWithNewPriorities.add(RaftPeer.newBuilder(peer).setPriority(peer.getId().equals(newLeaderPeerId) ? 2 : 1).build());
        }
        try (RaftClient client = createClient()) {
            String stringPeers = "[" + peersWithNewPriorities.stream().map(RaftPeer::toString).collect(Collectors.joining(", ")) + "]";
            LOG.info("Applying new peer state before transferring leadership: {}", stringPeers);
            RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriorities);
            processReply(reply, "failed to set master priorities before initiating election");
            /* transfer leadership */
            LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", serverAddress, newLeaderPeerId);
            // fire and forget: need to immediately return as the master will shut down its RPC servers
            // once the TransferLeadershipRequest is initiated.
            final int SLEEP_TIME_MS = 3_000;
            final int TRANSFER_LEADER_WAIT_MS = 30_000;
            new Thread(() -> {
                try {
                    Thread.sleep(SLEEP_TIME_MS);
                    RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, TRANSFER_LEADER_WAIT_MS);
                    processReply(reply1, "election failed");
                } catch (Throwable t) {
                    LOG.error("caught an error when executing transfer: {}", t.getMessage());
                    // we only allow transfers again if the transfer is unsuccessful: a success means it
                    // will soon lose primacy
                    mTransferLeaderAllowed.set(true);
                    mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
                /* checking the transfer happens in {@link QuorumElectCommand} */
                }
            }).start();
            LOG.info("Transferring leadership initiated");
        }
    } catch (Throwable t) {
        mTransferLeaderAllowed.set(true);
        LOG.warn(t.getMessage());
        mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(t.getMessage()).build());
    }
    return transferId;
}
Also used : InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RaftPeer(org.apache.ratis.protocol.RaftPeer) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftClient(org.apache.ratis.client.RaftClient)

Example 5 with RaftPeer

use of org.apache.ratis.protocol.RaftPeer in project alluxio by Alluxio.

the class RaftJournalSystem method startInternal.

@Override
public synchronized void startInternal() throws InterruptedException, IOException {
    LOG.info("Initializing Raft Journal System");
    InetSocketAddress localAddress = mConf.getLocalAddress();
    mPeerId = RaftJournalUtils.getPeerId(localAddress);
    List<InetSocketAddress> addresses = mConf.getClusterAddresses();
    Set<RaftPeer> peers = addresses.stream().map(addr -> RaftPeer.newBuilder().setId(RaftJournalUtils.getPeerId(addr)).setAddress(addr).build()).collect(Collectors.toSet());
    mRaftGroup = RaftGroup.valueOf(RAFT_GROUP_ID, peers);
    initServer();
    super.registerMetrics();
    List<InetSocketAddress> clusterAddresses = mConf.getClusterAddresses();
    LOG.info("Starting Raft journal system. Cluster addresses: {}. Local address: {}", clusterAddresses, mConf.getLocalAddress());
    long startTime = System.currentTimeMillis();
    try {
        mServer.start();
    } catch (IOException e) {
        String errorMessage = ExceptionMessage.FAILED_RAFT_BOOTSTRAP.getMessage(Arrays.toString(clusterAddresses.toArray()), e.getCause() == null ? e : e.getCause().toString());
        throw new IOException(errorMessage, e.getCause());
    }
    LOG.info("Started Raft Journal System in {}ms", System.currentTimeMillis() - startTime);
    joinQuorum();
}
Also used : Arrays(java.util.Arrays) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply) RaftGroup(org.apache.ratis.protocol.RaftGroup) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PropertyKey(alluxio.conf.PropertyKey) GrpcService(alluxio.grpc.GrpcService) LogUtils(alluxio.util.LogUtils) NetUtils(org.apache.ratis.util.NetUtils) JournalQueryRequest(alluxio.grpc.JournalQueryRequest) MetricKey(alluxio.metrics.MetricKey) Map(java.util.Map) RaftConfigKeys(org.apache.ratis.RaftConfigKeys) CancelledException(alluxio.exception.status.CancelledException) UnsafeByteOperations(org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations) QuorumServerInfo(alluxio.grpc.QuorumServerInfo) ServerConfiguration(alluxio.conf.ServerConfiguration) RaftPeer(org.apache.ratis.protocol.RaftPeer) RetryPolicy(org.apache.ratis.retry.RetryPolicy) Master(alluxio.master.Master) Collection(java.util.Collection) AbstractJournalSystem(alluxio.master.journal.AbstractJournalSystem) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) SupportedRpcType(org.apache.ratis.rpc.SupportedRpcType) CompletionException(java.util.concurrent.CompletionException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) UUID(java.util.UUID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) List(java.util.List) ClientId(org.apache.ratis.protocol.ClientId) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) RaftProperties(org.apache.ratis.conf.RaftProperties) ServiceType(alluxio.grpc.ServiceType) ExponentialBackoffRetry(org.apache.ratis.retry.ExponentialBackoffRetry) Optional(java.util.Optional) PrimarySelector(alluxio.master.PrimarySelector) RatisDropwizardExports(alluxio.metrics.sink.RatisDropwizardExports) AccessDeniedException(java.nio.file.AccessDeniedException) RaftClientConfigKeys(org.apache.ratis.client.RaftClientConfigKeys) UnavailableException(alluxio.exception.status.UnavailableException) TimeDuration(org.apache.ratis.util.TimeDuration) AsyncJournalWriter(alluxio.master.journal.AsyncJournalWriter) GroupInfoRequest(org.apache.ratis.protocol.GroupInfoRequest) CatchupFuture(alluxio.master.journal.CatchupFuture) SetConfigurationRequest(org.apache.ratis.protocol.SetConfigurationRequest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) JournalEntry(alluxio.proto.journal.Journal.JournalEntry) WaitForOptions(alluxio.util.WaitForOptions) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) AddQuorumServerRequest(alluxio.grpc.AddQuorumServerRequest) Message(org.apache.ratis.protocol.Message) OptionalLong(java.util.OptionalLong) Constants(alluxio.Constants) QuorumServerState(alluxio.grpc.QuorumServerState) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) MetricsSystem(alluxio.metrics.MetricsSystem) LinkedList(java.util.LinkedList) SizeInBytes(org.apache.ratis.util.SizeInBytes) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) NetAddress(alluxio.grpc.NetAddress) Iterator(java.util.Iterator) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftServerConfigKeys(org.apache.ratis.server.RaftServerConfigKeys) ExceptionMessage(alluxio.exception.ExceptionMessage) RaftProtos(org.apache.ratis.proto.RaftProtos) FileUtils(org.apache.commons.io.FileUtils) GrpcConfigKeys(org.apache.ratis.grpc.GrpcConfigKeys) RaftClientRequest(org.apache.ratis.protocol.RaftClientRequest) IOException(java.io.IOException) TransferLeaderMessage(alluxio.grpc.TransferLeaderMessage) HostAndPort(com.google.common.net.HostAndPort) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Parameters(org.apache.ratis.conf.Parameters) AtomicLong(java.util.concurrent.atomic.AtomicLong) LifeCycle(org.apache.ratis.util.LifeCycle) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RaftServer(org.apache.ratis.server.RaftServer) RaftClient(org.apache.ratis.client.RaftClient) Comparator(java.util.Comparator) Journal(alluxio.master.journal.Journal) Collections(java.util.Collections) CommonUtils(alluxio.util.CommonUtils) InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) RaftPeer(org.apache.ratis.protocol.RaftPeer)

Aggregations

RaftPeer (org.apache.ratis.protocol.RaftPeer)11 RaftPeerId (org.apache.ratis.protocol.RaftPeerId)8 RaftClient (org.apache.ratis.client.RaftClient)7 RaftClientReply (org.apache.ratis.protocol.RaftClientReply)6 IOException (java.io.IOException)4 InetSocketAddress (java.net.InetSocketAddress)4 ArrayList (java.util.ArrayList)4 RaftGroup (org.apache.ratis.protocol.RaftGroup)3 BaseTest (org.apache.ratis.BaseTest)2 MiniRaftCluster (org.apache.ratis.MiniRaftCluster)2 RaftProperties (org.apache.ratis.conf.RaftProperties)2 RaftClientRequest (org.apache.ratis.protocol.RaftClientRequest)2 RaftGroupId (org.apache.ratis.protocol.RaftGroupId)2 Test (org.junit.Test)2 Constants (alluxio.Constants)1 PropertyKey (alluxio.conf.PropertyKey)1 ServerConfiguration (alluxio.conf.ServerConfiguration)1 ExceptionMessage (alluxio.exception.ExceptionMessage)1 CancelledException (alluxio.exception.status.CancelledException)1 UnavailableException (alluxio.exception.status.UnavailableException)1