Search in sources :

Example 11 with RemoteTransportException

use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.

the class BulkProcessorRetryIT method executeBulkRejectionLoad.

private void executeBulkRejectionLoad(BackoffPolicy backoffPolicy, boolean rejectedExecutionExpected) throws Exception {
    final CorrelatingBackoffPolicy internalPolicy = new CorrelatingBackoffPolicy(backoffPolicy);
    final int numberOfAsyncOps = randomIntBetween(600, 700);
    final CountDownLatch latch = new CountDownLatch(numberOfAsyncOps);
    final Set<Object> responses = Collections.newSetFromMap(new ConcurrentHashMap<>());
    BulkProcessor bulkProcessor = initBulkProcessorBuilder(new BulkProcessor.Listener() {

        @Override
        public void beforeBulk(long executionId, BulkRequest request) {
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
            internalPolicy.logResponse(response);
            responses.add(response);
            latch.countDown();
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
            internalPolicy.logResponse(failure);
            responses.add(failure);
            latch.countDown();
        }
    }).setBulkActions(1).setConcurrentRequests(randomIntBetween(0, 100)).setBackoffPolicy(internalPolicy).build();
    MultiGetRequest multiGetRequest = indexDocs(bulkProcessor, numberOfAsyncOps);
    latch.await(10, TimeUnit.SECONDS);
    bulkProcessor.close();
    assertEquals(responses.size(), numberOfAsyncOps);
    boolean rejectedAfterAllRetries = false;
    for (Object response : responses) {
        if (response instanceof BulkResponse) {
            BulkResponse bulkResponse = (BulkResponse) response;
            for (BulkItemResponse bulkItemResponse : bulkResponse.getItems()) {
                if (bulkItemResponse.isFailed()) {
                    BulkItemResponse.Failure failure = bulkItemResponse.getFailure();
                    if (failure.getStatus() == RestStatus.TOO_MANY_REQUESTS) {
                        if (rejectedExecutionExpected == false) {
                            assertRetriedCorrectly(internalPolicy, bulkResponse, failure.getCause());
                            rejectedAfterAllRetries = true;
                        }
                    } else {
                        throw new AssertionError("Unexpected failure with status: " + failure.getStatus());
                    }
                }
            }
        } else {
            if (response instanceof RemoteTransportException && ((RemoteTransportException) response).status() == RestStatus.TOO_MANY_REQUESTS) {
                if (rejectedExecutionExpected == false) {
                    assertRetriedCorrectly(internalPolicy, response, ((Throwable) response).getCause());
                    rejectedAfterAllRetries = true;
                }
            // ignored, we exceeded the write queue size when dispatching the initial bulk request
            } else {
                Throwable t = (Throwable) response;
                // we're not expecting any other errors
                throw new AssertionError("Unexpected failure", t);
            }
        }
    }
    highLevelClient().indices().refresh(new RefreshRequest(), RequestOptions.DEFAULT);
    int multiGetResponsesCount = highLevelClient().mget(multiGetRequest, RequestOptions.DEFAULT).getResponses().length;
    if (rejectedExecutionExpected) {
        assertThat(multiGetResponsesCount, lessThanOrEqualTo(numberOfAsyncOps));
    } else if (rejectedAfterAllRetries) {
        assertThat(multiGetResponsesCount, lessThan(numberOfAsyncOps));
    } else {
        assertThat(multiGetResponsesCount, equalTo(numberOfAsyncOps));
    }
}
Also used : RefreshRequest(org.opensearch.action.admin.indices.refresh.RefreshRequest) RemoteTransportException(org.opensearch.transport.RemoteTransportException) BulkItemResponse(org.opensearch.action.bulk.BulkItemResponse) BulkResponse(org.opensearch.action.bulk.BulkResponse) CountDownLatch(java.util.concurrent.CountDownLatch) MultiGetRequest(org.opensearch.action.get.MultiGetRequest) BulkProcessor(org.opensearch.action.bulk.BulkProcessor) BulkRequest(org.opensearch.action.bulk.BulkRequest)

Example 12 with RemoteTransportException

use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.

the class BulkProcessorRetryIT method executeBulkRejectionLoad.

private void executeBulkRejectionLoad(BackoffPolicy backoffPolicy, boolean rejectedExecutionExpected) throws Throwable {
    final CorrelatingBackoffPolicy internalPolicy = new CorrelatingBackoffPolicy(backoffPolicy);
    int numberOfAsyncOps = randomIntBetween(600, 700);
    final CountDownLatch latch = new CountDownLatch(numberOfAsyncOps);
    final Set<Object> responses = Collections.newSetFromMap(new ConcurrentHashMap<>());
    assertAcked(prepareCreate(INDEX_NAME));
    ensureGreen();
    BulkProcessor bulkProcessor = BulkProcessor.builder(client()::bulk, new BulkProcessor.Listener() {

        @Override
        public void beforeBulk(long executionId, BulkRequest request) {
        // no op
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
            internalPolicy.logResponse(response);
            responses.add(response);
            latch.countDown();
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
            internalPolicy.logResponse(failure);
            responses.add(failure);
            latch.countDown();
        }
    }).setBulkActions(1).setConcurrentRequests(randomIntBetween(0, 100)).setBackoffPolicy(internalPolicy).build();
    indexDocs(bulkProcessor, numberOfAsyncOps);
    latch.await(10, TimeUnit.SECONDS);
    bulkProcessor.close();
    assertThat(responses.size(), equalTo(numberOfAsyncOps));
    // validate all responses
    boolean rejectedAfterAllRetries = false;
    for (Object response : responses) {
        if (response instanceof BulkResponse) {
            BulkResponse bulkResponse = (BulkResponse) response;
            for (BulkItemResponse bulkItemResponse : bulkResponse.getItems()) {
                if (bulkItemResponse.isFailed()) {
                    BulkItemResponse.Failure failure = bulkItemResponse.getFailure();
                    if (failure.getStatus() == RestStatus.TOO_MANY_REQUESTS) {
                        if (rejectedExecutionExpected == false) {
                            assertRetriedCorrectly(internalPolicy, bulkResponse, failure.getCause());
                            rejectedAfterAllRetries = true;
                        }
                    } else {
                        throw new AssertionError("Unexpected failure status: " + failure.getStatus());
                    }
                }
            }
        } else {
            if (response instanceof RemoteTransportException && ((RemoteTransportException) response).status() == RestStatus.TOO_MANY_REQUESTS) {
                if (rejectedExecutionExpected == false) {
                    assertRetriedCorrectly(internalPolicy, response, ((Throwable) response).getCause());
                    rejectedAfterAllRetries = true;
                }
            // ignored, we exceeded the write queue size when dispatching the initial bulk request
            } else {
                Throwable t = (Throwable) response;
                // we're not expecting any other errors
                throw new AssertionError("Unexpected failure", t);
            }
        }
    }
    client().admin().indices().refresh(new RefreshRequest()).get();
    SearchResponse results = client().prepareSearch(INDEX_NAME).setQuery(QueryBuilders.matchAllQuery()).setSize(0).get();
    if (rejectedExecutionExpected) {
        assertThat((int) results.getHits().getTotalHits().value, lessThanOrEqualTo(numberOfAsyncOps));
    } else if (rejectedAfterAllRetries) {
        assertThat((int) results.getHits().getTotalHits().value, lessThan(numberOfAsyncOps));
    } else {
        assertThat((int) results.getHits().getTotalHits().value, equalTo(numberOfAsyncOps));
    }
}
Also used : RefreshRequest(org.opensearch.action.admin.indices.refresh.RefreshRequest) RemoteTransportException(org.opensearch.transport.RemoteTransportException) CountDownLatch(java.util.concurrent.CountDownLatch) SearchResponse(org.opensearch.action.search.SearchResponse)

Example 13 with RemoteTransportException

use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.

the class StepListenerTests method testNoUnwrap.

/**
 * This test checks that we no longer unwrap exceptions when using StepListener.
 */
public void testNoUnwrap() {
    StepListener<String> step = new StepListener<>();
    step.onFailure(new RemoteTransportException("test", new RuntimeException("expected")));
    AtomicReference<RuntimeException> exception = new AtomicReference<>();
    step.whenComplete(null, e -> {
        exception.set((RuntimeException) e);
    });
    assertEquals(RemoteTransportException.class, exception.get().getClass());
    RuntimeException e = expectThrows(RuntimeException.class, () -> step.result());
    assertEquals(RemoteTransportException.class, e.getClass());
}
Also used : RemoteTransportException(org.opensearch.transport.RemoteTransportException) AtomicReference(java.util.concurrent.atomic.AtomicReference)

Example 14 with RemoteTransportException

use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.

the class ReplicationOperationTests method testRetryTransientReplicationFailure.

public void testRetryTransientReplicationFailure() throws Exception {
    final String index = "test";
    final ShardId shardId = new ShardId(index, "_na_", 0);
    ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
    IndexMetadata indexMetadata = initialState.getMetadata().index(index);
    final long primaryTerm = indexMetadata.primaryTerm(0);
    final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
    ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
    if (primaryShard.relocating() && randomBoolean()) {
        // simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
        initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
        primaryShard = primaryShard.getTargetRelocatingShard();
    }
    // add a few in-sync allocation ids that don't have corresponding routing entries
    final Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
    final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
    final Set<String> trackedShards = new HashSet<>();
    final Set<String> untrackedShards = new HashSet<>();
    addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
    trackedShards.addAll(staleAllocationIds);
    final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
    final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
    final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
    for (ShardRouting replica : expectedReplicas) {
        Exception cause;
        Exception exception;
        if (randomBoolean()) {
            if (randomBoolean()) {
                cause = new CircuitBreakingException("broken", CircuitBreaker.Durability.PERMANENT);
            } else {
                cause = new OpenSearchRejectedExecutionException("rejected");
            }
            exception = new RemoteTransportException("remote", cause);
        } else {
            TransportAddress address = new TransportAddress(InetAddress.getLoopbackAddress(), 9300);
            DiscoveryNode node = new DiscoveryNode("replica", address, Version.CURRENT);
            cause = new ConnectTransportException(node, "broken");
            exception = cause;
        }
        logger.debug("--> simulating failure on {} with [{}]", replica, exception.getClass().getSimpleName());
        simulatedFailures.put(replica, exception);
    }
    Request request = new Request(shardId);
    PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
    final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures, true);
    final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool);
    final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm, TimeValue.timeValueMillis(20), TimeValue.timeValueSeconds(60));
    op.execute();
    assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
    assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
    assertThat(replicasProxy.failedReplicas.size(), equalTo(0));
    assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
    assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
    ShardInfo shardInfo = listener.actionGet().getShardInfo();
    assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size()));
    final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
    final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
    assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
    assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
    assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
    assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
    assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) TransportAddress(org.opensearch.common.transport.TransportAddress) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) ReplicationGroup(org.opensearch.index.shard.ReplicationGroup) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo) ClusterState(org.opensearch.cluster.ClusterState) RemoteTransportException(org.opensearch.transport.RemoteTransportException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NodeClosedException(org.opensearch.node.NodeClosedException) UnavailableShardsException(org.opensearch.action.UnavailableShardsException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) SendRequestTransportException(org.opensearch.transport.SendRequestTransportException) ExecutionException(java.util.concurrent.ExecutionException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 15 with RemoteTransportException

use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.

the class RecoverySourceHandler method handleErrorOnSendFiles.

private void handleErrorOnSendFiles(Store store, Exception e, StoreFileMetadata[] mds) throws Exception {
    final IOException corruptIndexException = ExceptionsHelper.unwrapCorruption(e);
    assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[handle error on send/clean files]");
    if (corruptIndexException != null) {
        Exception localException = null;
        for (StoreFileMetadata md : mds) {
            cancellableThreads.checkForCancel();
            logger.debug("checking integrity for file {} after remove corruption exception", md);
            if (store.checkIntegrityNoException(md) == false) {
                // we are corrupted on the primary -- fail!
                logger.warn("{} Corrupted file detected {} checksum mismatch", shardId, md);
                if (localException == null) {
                    localException = corruptIndexException;
                }
                failEngine(corruptIndexException);
            }
        }
        if (localException != null) {
            throw localException;
        } else {
            // corruption has happened on the way to replica
            RemoteTransportException remoteException = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
            remoteException.addSuppressed(e);
            logger.warn(() -> new ParameterizedMessage("{} Remote file corruption on node {}, recovering {}. local checksum OK", shardId, request.targetNode(), mds), corruptIndexException);
            throw remoteException;
        }
    }
    throw e;
}
Also used : RemoteTransportException(org.opensearch.transport.RemoteTransportException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) IOException(java.io.IOException) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) RecoveryEngineException(org.opensearch.index.engine.RecoveryEngineException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) IndexShardClosedException(org.opensearch.index.shard.IndexShardClosedException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) IndexShardRelocatedException(org.opensearch.index.shard.IndexShardRelocatedException) RetentionLeaseNotFoundException(org.opensearch.index.seqno.RetentionLeaseNotFoundException)

Aggregations

RemoteTransportException (org.opensearch.transport.RemoteTransportException)19 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)7 IOException (java.io.IOException)5 CountDownLatch (java.util.concurrent.CountDownLatch)5 ClusterState (org.opensearch.cluster.ClusterState)5 AtomicReference (java.util.concurrent.atomic.AtomicReference)4 TransportException (org.opensearch.transport.TransportException)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 ExecutionException (java.util.concurrent.ExecutionException)3 Matchers.containsString (org.hamcrest.Matchers.containsString)3 OpenSearchException (org.opensearch.OpenSearchException)3 TransportAddress (org.opensearch.common.transport.TransportAddress)3 ShardId (org.opensearch.index.shard.ShardId)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2