use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.
the class BulkProcessorRetryIT method executeBulkRejectionLoad.
private void executeBulkRejectionLoad(BackoffPolicy backoffPolicy, boolean rejectedExecutionExpected) throws Exception {
final CorrelatingBackoffPolicy internalPolicy = new CorrelatingBackoffPolicy(backoffPolicy);
final int numberOfAsyncOps = randomIntBetween(600, 700);
final CountDownLatch latch = new CountDownLatch(numberOfAsyncOps);
final Set<Object> responses = Collections.newSetFromMap(new ConcurrentHashMap<>());
BulkProcessor bulkProcessor = initBulkProcessorBuilder(new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
}
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
internalPolicy.logResponse(response);
responses.add(response);
latch.countDown();
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
internalPolicy.logResponse(failure);
responses.add(failure);
latch.countDown();
}
}).setBulkActions(1).setConcurrentRequests(randomIntBetween(0, 100)).setBackoffPolicy(internalPolicy).build();
MultiGetRequest multiGetRequest = indexDocs(bulkProcessor, numberOfAsyncOps);
latch.await(10, TimeUnit.SECONDS);
bulkProcessor.close();
assertEquals(responses.size(), numberOfAsyncOps);
boolean rejectedAfterAllRetries = false;
for (Object response : responses) {
if (response instanceof BulkResponse) {
BulkResponse bulkResponse = (BulkResponse) response;
for (BulkItemResponse bulkItemResponse : bulkResponse.getItems()) {
if (bulkItemResponse.isFailed()) {
BulkItemResponse.Failure failure = bulkItemResponse.getFailure();
if (failure.getStatus() == RestStatus.TOO_MANY_REQUESTS) {
if (rejectedExecutionExpected == false) {
assertRetriedCorrectly(internalPolicy, bulkResponse, failure.getCause());
rejectedAfterAllRetries = true;
}
} else {
throw new AssertionError("Unexpected failure with status: " + failure.getStatus());
}
}
}
} else {
if (response instanceof RemoteTransportException && ((RemoteTransportException) response).status() == RestStatus.TOO_MANY_REQUESTS) {
if (rejectedExecutionExpected == false) {
assertRetriedCorrectly(internalPolicy, response, ((Throwable) response).getCause());
rejectedAfterAllRetries = true;
}
// ignored, we exceeded the write queue size when dispatching the initial bulk request
} else {
Throwable t = (Throwable) response;
// we're not expecting any other errors
throw new AssertionError("Unexpected failure", t);
}
}
}
highLevelClient().indices().refresh(new RefreshRequest(), RequestOptions.DEFAULT);
int multiGetResponsesCount = highLevelClient().mget(multiGetRequest, RequestOptions.DEFAULT).getResponses().length;
if (rejectedExecutionExpected) {
assertThat(multiGetResponsesCount, lessThanOrEqualTo(numberOfAsyncOps));
} else if (rejectedAfterAllRetries) {
assertThat(multiGetResponsesCount, lessThan(numberOfAsyncOps));
} else {
assertThat(multiGetResponsesCount, equalTo(numberOfAsyncOps));
}
}
use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.
the class BulkProcessorRetryIT method executeBulkRejectionLoad.
private void executeBulkRejectionLoad(BackoffPolicy backoffPolicy, boolean rejectedExecutionExpected) throws Throwable {
final CorrelatingBackoffPolicy internalPolicy = new CorrelatingBackoffPolicy(backoffPolicy);
int numberOfAsyncOps = randomIntBetween(600, 700);
final CountDownLatch latch = new CountDownLatch(numberOfAsyncOps);
final Set<Object> responses = Collections.newSetFromMap(new ConcurrentHashMap<>());
assertAcked(prepareCreate(INDEX_NAME));
ensureGreen();
BulkProcessor bulkProcessor = BulkProcessor.builder(client()::bulk, new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
// no op
}
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
internalPolicy.logResponse(response);
responses.add(response);
latch.countDown();
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
internalPolicy.logResponse(failure);
responses.add(failure);
latch.countDown();
}
}).setBulkActions(1).setConcurrentRequests(randomIntBetween(0, 100)).setBackoffPolicy(internalPolicy).build();
indexDocs(bulkProcessor, numberOfAsyncOps);
latch.await(10, TimeUnit.SECONDS);
bulkProcessor.close();
assertThat(responses.size(), equalTo(numberOfAsyncOps));
// validate all responses
boolean rejectedAfterAllRetries = false;
for (Object response : responses) {
if (response instanceof BulkResponse) {
BulkResponse bulkResponse = (BulkResponse) response;
for (BulkItemResponse bulkItemResponse : bulkResponse.getItems()) {
if (bulkItemResponse.isFailed()) {
BulkItemResponse.Failure failure = bulkItemResponse.getFailure();
if (failure.getStatus() == RestStatus.TOO_MANY_REQUESTS) {
if (rejectedExecutionExpected == false) {
assertRetriedCorrectly(internalPolicy, bulkResponse, failure.getCause());
rejectedAfterAllRetries = true;
}
} else {
throw new AssertionError("Unexpected failure status: " + failure.getStatus());
}
}
}
} else {
if (response instanceof RemoteTransportException && ((RemoteTransportException) response).status() == RestStatus.TOO_MANY_REQUESTS) {
if (rejectedExecutionExpected == false) {
assertRetriedCorrectly(internalPolicy, response, ((Throwable) response).getCause());
rejectedAfterAllRetries = true;
}
// ignored, we exceeded the write queue size when dispatching the initial bulk request
} else {
Throwable t = (Throwable) response;
// we're not expecting any other errors
throw new AssertionError("Unexpected failure", t);
}
}
}
client().admin().indices().refresh(new RefreshRequest()).get();
SearchResponse results = client().prepareSearch(INDEX_NAME).setQuery(QueryBuilders.matchAllQuery()).setSize(0).get();
if (rejectedExecutionExpected) {
assertThat((int) results.getHits().getTotalHits().value, lessThanOrEqualTo(numberOfAsyncOps));
} else if (rejectedAfterAllRetries) {
assertThat((int) results.getHits().getTotalHits().value, lessThan(numberOfAsyncOps));
} else {
assertThat((int) results.getHits().getTotalHits().value, equalTo(numberOfAsyncOps));
}
}
use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.
the class StepListenerTests method testNoUnwrap.
/**
* This test checks that we no longer unwrap exceptions when using StepListener.
*/
public void testNoUnwrap() {
StepListener<String> step = new StepListener<>();
step.onFailure(new RemoteTransportException("test", new RuntimeException("expected")));
AtomicReference<RuntimeException> exception = new AtomicReference<>();
step.whenComplete(null, e -> {
exception.set((RuntimeException) e);
});
assertEquals(RemoteTransportException.class, exception.get().getClass());
RuntimeException e = expectThrows(RuntimeException.class, () -> step.result());
assertEquals(RemoteTransportException.class, e.getClass());
}
use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testRetryTransientReplicationFailure.
public void testRetryTransientReplicationFailure() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
IndexMetadata indexMetadata = initialState.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add a few in-sync allocation ids that don't have corresponding routing entries
final Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
final Set<String> trackedShards = new HashSet<>();
final Set<String> untrackedShards = new HashSet<>();
addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
trackedShards.addAll(staleAllocationIds);
final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
for (ShardRouting replica : expectedReplicas) {
Exception cause;
Exception exception;
if (randomBoolean()) {
if (randomBoolean()) {
cause = new CircuitBreakingException("broken", CircuitBreaker.Durability.PERMANENT);
} else {
cause = new OpenSearchRejectedExecutionException("rejected");
}
exception = new RemoteTransportException("remote", cause);
} else {
TransportAddress address = new TransportAddress(InetAddress.getLoopbackAddress(), 9300);
DiscoveryNode node = new DiscoveryNode("replica", address, Version.CURRENT);
cause = new ConnectTransportException(node, "broken");
exception = cause;
}
logger.debug("--> simulating failure on {} with [{}]", replica, exception.getClass().getSimpleName());
simulatedFailures.put(replica, exception);
}
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures, true);
final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool);
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm, TimeValue.timeValueMillis(20), TimeValue.timeValueSeconds(60));
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
assertThat(replicasProxy.failedReplicas.size(), equalTo(0));
assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
ShardInfo shardInfo = listener.actionGet().getShardInfo();
assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size()));
final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
use of org.opensearch.transport.RemoteTransportException in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method handleErrorOnSendFiles.
private void handleErrorOnSendFiles(Store store, Exception e, StoreFileMetadata[] mds) throws Exception {
final IOException corruptIndexException = ExceptionsHelper.unwrapCorruption(e);
assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[handle error on send/clean files]");
if (corruptIndexException != null) {
Exception localException = null;
for (StoreFileMetadata md : mds) {
cancellableThreads.checkForCancel();
logger.debug("checking integrity for file {} after remove corruption exception", md);
if (store.checkIntegrityNoException(md) == false) {
// we are corrupted on the primary -- fail!
logger.warn("{} Corrupted file detected {} checksum mismatch", shardId, md);
if (localException == null) {
localException = corruptIndexException;
}
failEngine(corruptIndexException);
}
}
if (localException != null) {
throw localException;
} else {
// corruption has happened on the way to replica
RemoteTransportException remoteException = new RemoteTransportException("File corruption occurred on recovery but checksums are ok", null);
remoteException.addSuppressed(e);
logger.warn(() -> new ParameterizedMessage("{} Remote file corruption on node {}, recovering {}. local checksum OK", shardId, request.targetNode(), mds), corruptIndexException);
throw remoteException;
}
}
throw e;
}
Aggregations