Search in sources :

Example 1 with FailedToCommitClusterStateException

use of org.opensearch.cluster.coordination.FailedToCommitClusterStateException in project OpenSearch by opensearch-project.

the class MasterServiceTests method testAcking.

public void testAcking() throws InterruptedException {
    final DiscoveryNode node1 = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
    final DiscoveryNode node2 = new DiscoveryNode("node2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
    final DiscoveryNode node3 = new DiscoveryNode("node3", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
    try (MasterService masterService = new MasterService(Settings.builder().put(ClusterName.CLUSTER_NAME_SETTING.getKey(), MasterServiceTests.class.getSimpleName()).put(Node.NODE_NAME_SETTING.getKey(), "test_node").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool)) {
        final ClusterState initialClusterState = ClusterState.builder(new ClusterName(MasterServiceTests.class.getSimpleName())).nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3).localNodeId(node1.getId()).masterNodeId(node1.getId())).blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK).build();
        final AtomicReference<ClusterStatePublisher> publisherRef = new AtomicReference<>();
        masterService.setClusterStatePublisher((e, pl, al) -> publisherRef.get().publish(e, pl, al));
        masterService.setClusterStateSupplier(() -> initialClusterState);
        masterService.start();
        // check that we don't time out before even committing the cluster state
        {
            final CountDownLatch latch = new CountDownLatch(1);
            publisherRef.set((clusterChangedEvent, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("mock exception")));
            masterService.submitStateUpdateTask("test2", new AckedClusterStateUpdateTask<Void>(null, null) {

                @Override
                public ClusterState execute(ClusterState currentState) {
                    return ClusterState.builder(currentState).build();
                }

                @Override
                public TimeValue ackTimeout() {
                    return TimeValue.ZERO;
                }

                @Override
                public TimeValue timeout() {
                    return null;
                }

                @Override
                public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                    fail();
                }

                @Override
                protected Void newResponse(boolean acknowledged) {
                    fail();
                    return null;
                }

                @Override
                public void onFailure(String source, Exception e) {
                    latch.countDown();
                }

                @Override
                public void onAckTimeout() {
                    fail();
                }
            });
            latch.await();
        }
        // check that we timeout if commit took too long
        {
            final CountDownLatch latch = new CountDownLatch(2);
            final TimeValue ackTimeout = TimeValue.timeValueMillis(randomInt(100));
            publisherRef.set((clusterChangedEvent, publishListener, ackListener) -> {
                publishListener.onResponse(null);
                ackListener.onCommit(TimeValue.timeValueMillis(ackTimeout.millis() + randomInt(100)));
                ackListener.onNodeAck(node1, null);
                ackListener.onNodeAck(node2, null);
                ackListener.onNodeAck(node3, null);
            });
            masterService.submitStateUpdateTask("test2", new AckedClusterStateUpdateTask<Void>(null, null) {

                @Override
                public ClusterState execute(ClusterState currentState) {
                    return ClusterState.builder(currentState).build();
                }

                @Override
                public TimeValue ackTimeout() {
                    return ackTimeout;
                }

                @Override
                public TimeValue timeout() {
                    return null;
                }

                @Override
                public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                    latch.countDown();
                }

                @Override
                protected Void newResponse(boolean acknowledged) {
                    fail();
                    return null;
                }

                @Override
                public void onFailure(String source, Exception e) {
                    fail();
                }

                @Override
                public void onAckTimeout() {
                    latch.countDown();
                }
            });
            latch.await();
        }
    }
}
Also used : TestThreadPool(org.opensearch.threadpool.TestThreadPool) Level(org.apache.logging.log4j.Level) Version(org.opensearch.Version) OpenSearchException(org.opensearch.OpenSearchException) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) Matchers.hasKey(org.hamcrest.Matchers.hasKey) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ClusterStatePublisher(org.opensearch.cluster.coordination.ClusterStatePublisher) AfterClass(org.junit.AfterClass) CyclicBarrier(java.util.concurrent.CyclicBarrier) TimeValue(org.opensearch.common.unit.TimeValue) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ClusterStateTaskExecutor(org.opensearch.cluster.ClusterStateTaskExecutor) Settings(org.opensearch.common.settings.Settings) Nullable(org.opensearch.common.Nullable) Tuple(org.opensearch.common.collect.Tuple) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.anyOf(org.hamcrest.Matchers.anyOf) Matchers.containsString(org.hamcrest.Matchers.containsString) ClusterStateTaskListener(org.opensearch.cluster.ClusterStateTaskListener) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) MockLogAppender(org.opensearch.test.MockLogAppender) BeforeClass(org.junit.BeforeClass) ThreadPool(org.opensearch.threadpool.ThreadPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Priority(org.opensearch.common.Priority) HashMap(java.util.HashMap) Node(org.opensearch.node.Node) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) ClusterState(org.opensearch.cluster.ClusterState) AckedClusterStateUpdateTask(org.opensearch.cluster.AckedClusterStateUpdateTask) ClusterStateTaskConfig(org.opensearch.cluster.ClusterStateTaskConfig) ClusterSettings(org.opensearch.common.settings.ClusterSettings) ClusterBlocks(org.opensearch.cluster.block.ClusterBlocks) Before(org.junit.Before) Collections.emptyMap(java.util.Collections.emptyMap) Collections.emptySet(java.util.Collections.emptySet) Semaphore(java.util.concurrent.Semaphore) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BaseFuture(org.opensearch.common.util.concurrent.BaseFuture) LocalClusterUpdateTask(org.opensearch.cluster.LocalClusterUpdateTask) TestLogging(org.opensearch.test.junit.annotations.TestLogging) TimeUnit(java.util.concurrent.TimeUnit) ClusterName(org.opensearch.cluster.ClusterName) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) ClusterChangedEvent(org.opensearch.cluster.ClusterChangedEvent) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ClusterSettings(org.opensearch.common.settings.ClusterSettings) AtomicReference(java.util.concurrent.atomic.AtomicReference) Matchers.containsString(org.hamcrest.Matchers.containsString) CountDownLatch(java.util.concurrent.CountDownLatch) OpenSearchException(org.opensearch.OpenSearchException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AckedClusterStateUpdateTask(org.opensearch.cluster.AckedClusterStateUpdateTask) ClusterName(org.opensearch.cluster.ClusterName) ClusterStatePublisher(org.opensearch.cluster.coordination.ClusterStatePublisher) TimeValue(org.opensearch.common.unit.TimeValue)

Example 2 with FailedToCommitClusterStateException

use of org.opensearch.cluster.coordination.FailedToCommitClusterStateException in project OpenSearch by opensearch-project.

the class BatchedRerouteServiceTests method testNotifiesOnFailure.

public void testNotifiesOnFailure() throws InterruptedException {
    final BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r) -> {
        if (rarely()) {
            throw new OpenSearchException("simulated");
        }
        return randomBoolean() ? s : ClusterState.builder(s).build();
    });
    final int iterations = between(1, 100);
    final CountDownLatch countDownLatch = new CountDownLatch(iterations);
    for (int i = 0; i < iterations; i++) {
        batchedRerouteService.reroute("iteration " + i, randomFrom(EnumSet.allOf(Priority.class)), ActionListener.wrap(r -> {
            countDownLatch.countDown();
            if (rarely()) {
                throw new OpenSearchException("failure during notification");
            }
        }, e -> {
            countDownLatch.countDown();
            if (randomBoolean()) {
                throw new OpenSearchException("failure during failure notification", e);
            }
        }));
        if (rarely()) {
            clusterService.getMasterService().setClusterStatePublisher(randomBoolean() ? ClusterServiceUtils.createClusterStatePublisher(clusterService.getClusterApplierService()) : (event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated")));
        }
        if (rarely()) {
            clusterService.getClusterApplierService().onNewClusterState("simulated", () -> {
                ClusterState state = clusterService.state();
                return ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).masterNodeId(randomBoolean() ? null : state.nodes().getLocalNodeId())).build();
            }, (source, e) -> {
            });
        }
    }
    // i.e. it doesn't leak any listeners
    assertTrue(countDownLatch.await(10, TimeUnit.SECONDS));
}
Also used : DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) ThreadPool(org.opensearch.threadpool.ThreadPool) TestThreadPool(org.opensearch.threadpool.TestThreadPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Priority(org.opensearch.common.Priority) OpenSearchException(org.opensearch.OpenSearchException) Function(java.util.function.Function) ArrayList(java.util.ArrayList) ClusterState(org.opensearch.cluster.ClusterState) After(org.junit.After) Matchers.lessThan(org.hamcrest.Matchers.lessThan) ActionListener(org.opensearch.action.ActionListener) EnumSet(java.util.EnumSet) Before(org.junit.Before) CyclicBarrier(java.util.concurrent.CyclicBarrier) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) Randomness(org.opensearch.common.Randomness) ClusterService(org.opensearch.cluster.service.ClusterService) ClusterServiceUtils(org.opensearch.test.ClusterServiceUtils) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ClusterState(org.opensearch.cluster.ClusterState) OpenSearchException(org.opensearch.OpenSearchException) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 3 with FailedToCommitClusterStateException

use of org.opensearch.cluster.coordination.FailedToCommitClusterStateException in project OpenSearch by opensearch-project.

the class TransportMasterNodeActionTests method testMasterFailoverAfterStepDown.

public void testMasterFailoverAfterStepDown() throws ExecutionException, InterruptedException {
    Request request = new Request().masterNodeTimeout(TimeValue.timeValueHours(1));
    PlainActionFuture<Response> listener = new PlainActionFuture<>();
    final Response response = new Response();
    setState(clusterService, ClusterStateCreationUtils.state(localNode, localNode, allNodes));
    new Action("internal:testAction", transportService, clusterService, threadPool) {

        @Override
        protected void masterOperation(Request request, ClusterState state, ActionListener<Response> listener) throws Exception {
            // The other node has become master, simulate failures of this node while publishing cluster state through ZenDiscovery
            setState(clusterService, ClusterStateCreationUtils.state(localNode, remoteNode, allNodes));
            Exception failure = randomBoolean() ? new FailedToCommitClusterStateException("Fake error") : new NotMasterException("Fake error");
            listener.onFailure(failure);
        }
    }.execute(request, listener);
    assertThat(transport.capturedRequests().length, equalTo(1));
    CapturingTransport.CapturedRequest capturedRequest = transport.capturedRequests()[0];
    assertTrue(capturedRequest.node.isMasterNode());
    assertThat(capturedRequest.request, equalTo(request));
    assertThat(capturedRequest.action, equalTo("internal:testAction"));
    transport.handleResponse(capturedRequest.requestId, response);
    assertTrue(listener.isDone());
    assertThat(listener.get(), equalTo(response));
}
Also used : FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ClusterState(org.opensearch.cluster.ClusterState) CapturingTransport(org.opensearch.test.transport.CapturingTransport) OpenSearchException(org.opensearch.OpenSearchException) NotMasterException(org.opensearch.cluster.NotMasterException) NodeClosedException(org.opensearch.node.NodeClosedException) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) MasterNotDiscoveredException(org.opensearch.discovery.MasterNotDiscoveredException) ActionRequestValidationException(org.opensearch.action.ActionRequestValidationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ActionResponse(org.opensearch.action.ActionResponse) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) NotMasterException(org.opensearch.cluster.NotMasterException)

Example 4 with FailedToCommitClusterStateException

use of org.opensearch.cluster.coordination.FailedToCommitClusterStateException in project OpenSearch by opensearch-project.

the class MasterService method onPublicationFailed.

void onPublicationFailed(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeMillis, Exception exception) {
    if (exception instanceof FailedToCommitClusterStateException) {
        final long version = clusterChangedEvent.state().version();
        logger.warn(() -> new ParameterizedMessage("failing [{}]: failed to commit cluster state version [{}]", clusterChangedEvent.source(), version), exception);
        taskOutputs.publishingFailed((FailedToCommitClusterStateException) exception);
    } else {
        handleException(clusterChangedEvent.source(), startTimeMillis, clusterChangedEvent.state(), exception);
    }
}
Also used : FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 5 with FailedToCommitClusterStateException

use of org.opensearch.cluster.coordination.FailedToCommitClusterStateException in project OpenSearch by opensearch-project.

the class ShardStateActionTests method testMasterChannelException.

public void testMasterChannelException() throws InterruptedException {
    final String index = "test";
    setState(clusterService, ClusterStateCreationUtils.stateWithActivePrimary(index, true, randomInt(5)));
    CountDownLatch latch = new CountDownLatch(1);
    AtomicInteger retries = new AtomicInteger();
    AtomicBoolean success = new AtomicBoolean();
    AtomicReference<Throwable> throwable = new AtomicReference<>();
    LongConsumer retryLoop = requestId -> {
        if (randomBoolean()) {
            transport.handleRemoteError(requestId, randomFrom(new NotMasterException("simulated"), new FailedToCommitClusterStateException("simulated")));
        } else {
            if (randomBoolean()) {
                transport.handleLocalError(requestId, new NodeNotConnectedException(null, "simulated"));
            } else {
                transport.handleError(requestId, new NodeDisconnectedException(null, ShardStateAction.SHARD_FAILED_ACTION_NAME));
            }
        }
    };
    final int numberOfRetries = randomIntBetween(1, 256);
    setUpMasterRetryVerification(numberOfRetries, retries, latch, retryLoop);
    ShardRouting failedShard = getRandomShardRouting(index);
    shardStateAction.localShardFailed(failedShard, "test", getSimulatedFailure(), new ActionListener<Void>() {

        @Override
        public void onResponse(Void aVoid) {
            success.set(true);
            latch.countDown();
        }

        @Override
        public void onFailure(Exception e) {
            success.set(false);
            throwable.set(e);
            latch.countDown();
            assert false;
        }
    });
    final CapturingTransport.CapturedRequest[] capturedRequests = transport.getCapturedRequestsAndClear();
    assertThat(capturedRequests.length, equalTo(1));
    assertFalse(success.get());
    assertThat(retries.get(), equalTo(0));
    retryLoop.accept(capturedRequests[0].requestId);
    latch.await();
    assertNull(throwable.get());
    assertThat(retries.get(), equalTo(numberOfRetries));
    assertTrue(success.get());
}
Also used : FailedShardEntry(org.opensearch.cluster.action.shard.ShardStateAction.FailedShardEntry) VersionUtils.randomCompatibleVersion(org.opensearch.test.VersionUtils.randomCompatibleVersion) AllocationService(org.opensearch.cluster.routing.allocation.AllocationService) TestThreadPool(org.opensearch.threadpool.TestThreadPool) Version(org.opensearch.Version) ClusterServiceUtils.setState(org.opensearch.test.ClusterServiceUtils.setState) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) ClusterStateCreationUtils(org.opensearch.action.support.replication.ClusterStateCreationUtils) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Matchers.nullValue(org.hamcrest.Matchers.nullValue) NotMasterException(org.opensearch.cluster.NotMasterException) ActionListener(org.opensearch.action.ActionListener) NodeNotConnectedException(org.opensearch.transport.NodeNotConnectedException) AfterClass(org.junit.AfterClass) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Predicate(java.util.function.Predicate) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) TransportResponse(org.opensearch.transport.TransportResponse) UUID(java.util.UUID) TransportService(org.opensearch.transport.TransportService) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) CountDownLatch(java.util.concurrent.CountDownLatch) NodeDisconnectedException(org.opensearch.transport.NodeDisconnectedException) Matchers.is(org.hamcrest.Matchers.is) TransportException(org.opensearch.transport.TransportException) Matchers.arrayWithSize(org.hamcrest.Matchers.arrayWithSize) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) BytesReference(org.opensearch.common.bytes.BytesReference) BeforeClass(org.junit.BeforeClass) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) ThreadPool(org.opensearch.threadpool.ThreadPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Writeable(org.opensearch.common.io.stream.Writeable) AtomicReference(java.util.concurrent.atomic.AtomicReference) IndexRoutingTable(org.opensearch.cluster.routing.IndexRoutingTable) ClusterState(org.opensearch.cluster.ClusterState) RerouteService(org.opensearch.cluster.routing.RerouteService) ClusterStateObserver(org.opensearch.cluster.ClusterStateObserver) Before(org.junit.Before) StreamInput(org.opensearch.common.io.stream.StreamInput) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) ClusterServiceUtils.createClusterService(org.opensearch.test.ClusterServiceUtils.createClusterService) SetOnce(org.apache.lucene.util.SetOnce) TransportRequest(org.opensearch.transport.TransportRequest) IOException(java.io.IOException) ShardsIterator(org.opensearch.cluster.routing.ShardsIterator) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) LongConsumer(java.util.function.LongConsumer) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) TimeUnit(java.util.concurrent.TimeUnit) Phaser(java.util.concurrent.Phaser) StartedShardEntry(org.opensearch.cluster.action.shard.ShardStateAction.StartedShardEntry) ClusterService(org.opensearch.cluster.service.ClusterService) RoutingTable(org.opensearch.cluster.routing.RoutingTable) CapturingTransport(org.opensearch.test.transport.CapturingTransport) Collections(java.util.Collections) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) NodeDisconnectedException(org.opensearch.transport.NodeDisconnectedException) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NotMasterException(org.opensearch.cluster.NotMasterException) NodeNotConnectedException(org.opensearch.transport.NodeNotConnectedException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) NodeDisconnectedException(org.opensearch.transport.NodeDisconnectedException) TransportException(org.opensearch.transport.TransportException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LongConsumer(java.util.function.LongConsumer) NodeNotConnectedException(org.opensearch.transport.NodeNotConnectedException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NotMasterException(org.opensearch.cluster.NotMasterException) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Aggregations

ClusterState (org.opensearch.cluster.ClusterState)4 FailedToCommitClusterStateException (org.opensearch.cluster.coordination.FailedToCommitClusterStateException)4 CountDownLatch (java.util.concurrent.CountDownLatch)3 TimeUnit (java.util.concurrent.TimeUnit)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Before (org.junit.Before)3 OpenSearchException (org.opensearch.OpenSearchException)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 List (java.util.List)2 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)2 CyclicBarrier (java.util.concurrent.CyclicBarrier)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 After (org.junit.After)2 ActionListener (org.opensearch.action.ActionListener)2 NotMasterException (org.opensearch.cluster.NotMasterException)2 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)2 ClusterService (org.opensearch.cluster.service.ClusterService)2