use of org.opensearch.node.NodeClosedException in project OpenSearch by opensearch-project.
the class TransportClusterHealthAction method executeHealth.
private void executeHealth(final ClusterHealthRequest request, final ClusterState currentState, final ActionListener<ClusterHealthResponse> listener, final int waitCount, final Consumer<ClusterState> onNewClusterStateAfterDelay) {
if (request.timeout().millis() == 0) {
listener.onResponse(getResponse(request, currentState, waitCount, TimeoutState.ZERO_TIMEOUT));
return;
}
final Predicate<ClusterState> validationPredicate = newState -> validateRequest(request, newState, waitCount);
if (validationPredicate.test(currentState)) {
listener.onResponse(getResponse(request, currentState, waitCount, TimeoutState.OK));
} else {
final ClusterStateObserver observer = new ClusterStateObserver(currentState, clusterService, null, logger, threadPool.getThreadContext());
final ClusterStateObserver.Listener stateListener = new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState newState) {
onNewClusterStateAfterDelay.accept(newState);
}
@Override
public void onClusterServiceClose() {
listener.onFailure(new NodeClosedException(clusterService.localNode()));
}
@Override
public void onTimeout(TimeValue timeout) {
listener.onResponse(getResponse(request, observer.setAndGetObservedState(), waitCount, TimeoutState.TIMED_OUT));
}
};
observer.waitForNextChange(stateListener, validationPredicate, request.timeout());
}
}
use of org.opensearch.node.NodeClosedException in project OpenSearch by opensearch-project.
the class TransportWriteActionTests method testReplicaProxy.
public void testReplicaProxy() throws InterruptedException, ExecutionException {
CapturingTransport transport = new CapturingTransport();
TransportService transportService = transport.createTransportService(clusterService.getSettings(), threadPool, TransportService.NOOP_TRANSPORT_INTERCEPTOR, x -> clusterService.localNode(), null, Collections.emptySet());
transportService.start();
transportService.acceptIncomingRequests();
ShardStateAction shardStateAction = new ShardStateAction(clusterService, transportService, null, null, threadPool);
TestAction action = new TestAction(Settings.EMPTY, "internal:testAction", transportService, clusterService, shardStateAction, threadPool);
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState state = ClusterStateCreationUtils.stateWithActivePrimary(index, true, 1 + randomInt(3), randomInt(2));
logger.info("using state: {}", state);
ClusterServiceUtils.setState(clusterService, state);
final long primaryTerm = state.metadata().index(index).primaryTerm(0);
ReplicationOperation.Replicas<TestRequest> proxy = action.newReplicasProxy();
// check that at unknown node fails
PlainActionFuture<ReplicaResponse> listener = new PlainActionFuture<>();
ShardRoutingState routingState = randomFrom(ShardRoutingState.INITIALIZING, ShardRoutingState.STARTED, ShardRoutingState.RELOCATING);
proxy.performOn(TestShardRouting.newShardRouting(shardId, "NOT THERE", routingState == ShardRoutingState.RELOCATING ? state.nodes().iterator().next().getId() : null, false, routingState), new TestRequest(), primaryTerm, randomNonNegativeLong(), randomNonNegativeLong(), listener);
assertTrue(listener.isDone());
assertListenerThrows("non existent node should throw a NoNodeAvailableException", listener, NoNodeAvailableException.class);
final IndexShardRoutingTable shardRoutings = state.routingTable().shardRoutingTable(shardId);
final ShardRouting replica = randomFrom(shardRoutings.replicaShards().stream().filter(ShardRouting::assignedToNode).collect(Collectors.toList()));
listener = new PlainActionFuture<>();
proxy.performOn(replica, new TestRequest(), primaryTerm, randomNonNegativeLong(), randomNonNegativeLong(), listener);
assertFalse(listener.isDone());
CapturingTransport.CapturedRequest[] captures = transport.getCapturedRequestsAndClear();
assertThat(captures, arrayWithSize(1));
if (randomBoolean()) {
final TransportReplicationAction.ReplicaResponse response = new TransportReplicationAction.ReplicaResponse(randomLong(), randomLong());
transport.handleResponse(captures[0].requestId, response);
assertTrue(listener.isDone());
assertThat(listener.get(), equalTo(response));
} else if (randomBoolean()) {
transport.handleRemoteError(captures[0].requestId, new OpenSearchException("simulated"));
assertTrue(listener.isDone());
assertListenerThrows("listener should reflect remote error", listener, OpenSearchException.class);
} else {
transport.handleError(captures[0].requestId, new TransportException("simulated"));
assertTrue(listener.isDone());
assertListenerThrows("listener should reflect remote error", listener, TransportException.class);
}
AtomicReference<Object> failure = new AtomicReference<>();
AtomicBoolean success = new AtomicBoolean();
proxy.failShardIfNeeded(replica, primaryTerm, "test", new OpenSearchException("simulated"), ActionListener.wrap(r -> success.set(true), failure::set));
CapturingTransport.CapturedRequest[] shardFailedRequests = transport.getCapturedRequestsAndClear();
// A write replication action proxy should fail the shard
assertEquals(1, shardFailedRequests.length);
CapturingTransport.CapturedRequest shardFailedRequest = shardFailedRequests[0];
ShardStateAction.FailedShardEntry shardEntry = (ShardStateAction.FailedShardEntry) shardFailedRequest.request;
// the shard the request was sent to and the shard to be failed should be the same
assertEquals(shardEntry.getShardId(), replica.shardId());
assertEquals(shardEntry.getAllocationId(), replica.allocationId().getId());
if (randomBoolean()) {
// simulate success
transport.handleResponse(shardFailedRequest.requestId, TransportResponse.Empty.INSTANCE);
assertTrue(success.get());
assertNull(failure.get());
} else if (randomBoolean()) {
// simulate the primary has been demoted
transport.handleRemoteError(shardFailedRequest.requestId, new ShardStateAction.NoLongerPrimaryShardException(replica.shardId(), "shard-failed-test"));
assertFalse(success.get());
assertNotNull(failure.get());
} else {
// simulated a node closing exception
transport.handleRemoteError(shardFailedRequest.requestId, new NodeClosedException(state.nodes().getLocalNode()));
assertFalse(success.get());
assertNotNull(failure.get());
}
}
use of org.opensearch.node.NodeClosedException in project OpenSearch by opensearch-project.
the class OpenSearchExceptionTests method testFailureToAndFromXContentWithDetails.
public void testFailureToAndFromXContentWithDetails() throws IOException {
final XContent xContent = randomFrom(XContentType.values()).xContent();
Exception failure;
Throwable failureCause;
OpenSearchException expected;
OpenSearchException expectedCause;
OpenSearchException suppressed;
switch(randomIntBetween(0, 6)) {
case // Simple opensearch exception without cause
0:
failure = new NoNodeAvailableException("A");
expected = new OpenSearchException("OpenSearch exception [type=no_node_available_exception, reason=A]");
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=no_node_available_exception, reason=A]"));
break;
case // Simple opensearch exception with headers (other metadata of type number are not parsed)
1:
failure = new ParsingException(3, 2, "B", null);
((OpenSearchException) failure).addHeader("header_name", "0", "1");
expected = new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=B]");
expected.addHeader("header_name", "0", "1");
suppressed = new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=B]");
suppressed.addHeader("header_name", "0", "1");
expected.addSuppressed(suppressed);
break;
case // OpenSearch exception with a cause, headers and parsable metadata
2:
failureCause = new NullPointerException("var is null");
failure = new ScriptException("C", failureCause, singletonList("stack"), "test", "painless");
((OpenSearchException) failure).addHeader("script_name", "my_script");
expectedCause = new OpenSearchException("OpenSearch exception [type=null_pointer_exception, reason=var is null]");
expected = new OpenSearchException("OpenSearch exception [type=script_exception, reason=C]", expectedCause);
expected.addHeader("script_name", "my_script");
expected.addMetadata("opensearch.lang", "painless");
expected.addMetadata("opensearch.script", "test");
expected.addMetadata("opensearch.script_stack", "stack");
suppressed = new OpenSearchException("OpenSearch exception [type=script_exception, reason=C]");
suppressed.addHeader("script_name", "my_script");
suppressed.addMetadata("opensearch.lang", "painless");
suppressed.addMetadata("opensearch.script", "test");
suppressed.addMetadata("opensearch.script_stack", "stack");
expected.addSuppressed(suppressed);
break;
case // JDK exception without cause
3:
failure = new IllegalStateException("D");
expected = new OpenSearchException("OpenSearch exception [type=illegal_state_exception, reason=D]");
suppressed = new OpenSearchException("OpenSearch exception [type=illegal_state_exception, reason=D]");
expected.addSuppressed(suppressed);
break;
case // JDK exception with cause
4:
failureCause = new RoutingMissingException("idx", "id");
failure = new RuntimeException("E", failureCause);
expectedCause = new OpenSearchException("OpenSearch exception [type=routing_missing_exception, " + "reason=routing is required for [idx]/[id]]");
expectedCause.addMetadata("opensearch.index", "idx");
expectedCause.addMetadata("opensearch.index_uuid", "_na_");
expected = new OpenSearchException("OpenSearch exception [type=runtime_exception, reason=E]", expectedCause);
suppressed = new OpenSearchException("OpenSearch exception [type=runtime_exception, reason=E]");
expected.addSuppressed(suppressed);
break;
case // Wrapped exception with cause
5:
failureCause = new FileAlreadyExistsException("File exists");
failure = new BroadcastShardOperationFailedException(new ShardId("_index", "_uuid", 5), "F", failureCause);
expected = new OpenSearchException("OpenSearch exception [type=file_already_exists_exception, reason=File exists]");
suppressed = new OpenSearchException("OpenSearch exception [type=file_already_exists_exception, reason=File exists]");
expected.addSuppressed(suppressed);
break;
case // SearchPhaseExecutionException with cause and multiple failures
6:
DiscoveryNode node = new DiscoveryNode("node_g", buildNewFakeTransportAddress(), Version.CURRENT);
failureCause = new NodeClosedException(node);
failureCause = new NoShardAvailableActionException(new ShardId("_index_g", "_uuid_g", 6), "node_g", failureCause);
ShardSearchFailure[] shardFailures = new ShardSearchFailure[] { new ShardSearchFailure(new ParsingException(0, 0, "Parsing g", null), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 61), null, OriginalIndices.NONE)), new ShardSearchFailure(new RepositoryException("repository_g", "Repo"), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 62), null, OriginalIndices.NONE)), new ShardSearchFailure(new SearchContextMissingException(new ShardSearchContextId(UUIDs.randomBase64UUID(), 0L)), null) };
failure = new SearchPhaseExecutionException("phase_g", "G", failureCause, shardFailures);
expectedCause = new OpenSearchException("OpenSearch exception [type=node_closed_exception, " + "reason=node closed " + node + "]");
expectedCause = new OpenSearchException("OpenSearch exception [type=no_shard_available_action_exception, " + "reason=node_g]", expectedCause);
expectedCause.addMetadata("opensearch.index", "_index_g");
expectedCause.addMetadata("opensearch.index_uuid", "_uuid_g");
expectedCause.addMetadata("opensearch.shard", "6");
expected = new OpenSearchException("OpenSearch exception [type=search_phase_execution_exception, " + "reason=G]", expectedCause);
expected.addMetadata("opensearch.phase", "phase_g");
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=Parsing g]"));
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=repository_exception, " + "reason=[repository_g] Repo]"));
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=search_context_missing_exception, " + "reason=No search context found for id [0]]"));
break;
default:
throw new UnsupportedOperationException("Failed to generate randomized failure");
}
Exception finalFailure = failure;
BytesReference failureBytes = toShuffledXContent((builder, params) -> {
OpenSearchException.generateFailureXContent(builder, params, finalFailure, true);
return builder;
}, xContent.type(), ToXContent.EMPTY_PARAMS, randomBoolean());
try (XContentParser parser = createParser(xContent, failureBytes)) {
failureBytes = BytesReference.bytes(shuffleXContent(parser, randomBoolean()));
}
OpenSearchException parsedFailure;
try (XContentParser parser = createParser(xContent, failureBytes)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
parsedFailure = OpenSearchException.failureFromXContent(parser);
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}
assertDeepEquals(expected, parsedFailure);
}
use of org.opensearch.node.NodeClosedException in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testNoLongerPrimary.
public void testNoLongerPrimary() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState initialState = stateWithActivePrimary(index, true, 1 + randomInt(2), randomInt(2));
IndexMetadata indexMetadata = initialState.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add an in-sync allocation id that doesn't have a corresponding routing entry
final Set<String> staleAllocationIds = Sets.newHashSet(randomAlphaOfLength(10));
final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
final Set<String> trackedShards = new HashSet<>();
addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, new HashSet<>());
trackedShards.addAll(staleAllocationIds);
final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
final Map<ShardRouting, Exception> expectedFailures = new HashMap<>();
if (expectedReplicas.isEmpty()) {
return;
}
final ShardRouting failedReplica = randomFrom(new ArrayList<>(expectedReplicas));
expectedFailures.put(failedReplica, new CorruptIndexException("simulated", (String) null));
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final boolean testPrimaryDemotedOnStaleShardCopies = randomBoolean();
final Exception shardActionFailure;
if (randomBoolean()) {
shardActionFailure = new NodeClosedException(new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT));
} else if (randomBoolean()) {
DiscoveryNode node = new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT);
shardActionFailure = new SendRequestTransportException(node, ShardStateAction.SHARD_FAILED_ACTION_NAME, new NodeClosedException(node));
} else {
shardActionFailure = new ShardStateAction.NoLongerPrimaryShardException(failedReplica.shardId(), "the king is dead");
}
final TestReplicaProxy replicasProxy = new TestReplicaProxy(expectedFailures) {
@Override
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
super.failShardIfNeeded(replica, primaryTerm, message, exception, shardActionListener);
} else {
assertThat(replica, equalTo(failedReplica));
shardActionListener.onFailure(shardActionFailure);
}
}
@Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
shardActionListener.onFailure(shardActionFailure);
} else {
super.markShardCopyAsStaleIfNeeded(shardId, allocationId, primaryTerm, shardActionListener);
}
}
};
AtomicBoolean primaryFailed = new AtomicBoolean();
final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool) {
@Override
public void failShard(String message, Exception exception) {
assertThat(exception, instanceOf(ShardStateAction.NoLongerPrimaryShardException.class));
assertTrue(primaryFailed.compareAndSet(false, true));
}
};
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertTrue("listener is not marked as done", listener.isDone());
if (shardActionFailure instanceof ShardStateAction.NoLongerPrimaryShardException) {
assertTrue(primaryFailed.get());
} else {
assertFalse(primaryFailed.get());
}
assertListenerThrows("should throw exception to trigger retry", listener, ReplicationOperation.RetryOnPrimaryException.class);
}
use of org.opensearch.node.NodeClosedException in project OpenSearch by opensearch-project.
the class PersistentTasksService method waitForPersistentTaskCondition.
/**
* Waits for a given persistent task to comply with a given predicate, then call back the listener accordingly.
*
* @param taskId the persistent task id
* @param predicate the persistent task predicate to evaluate
* @param timeout a timeout for waiting
* @param listener the callback listener
*/
public void waitForPersistentTaskCondition(final String taskId, final Predicate<PersistentTask<?>> predicate, @Nullable final TimeValue timeout, final WaitForPersistentTaskListener<?> listener) {
final Predicate<ClusterState> clusterStatePredicate = clusterState -> predicate.test(PersistentTasksCustomMetadata.getTaskWithId(clusterState, taskId));
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext());
final ClusterState clusterState = observer.setAndGetObservedState();
if (clusterStatePredicate.test(clusterState)) {
listener.onResponse(PersistentTasksCustomMetadata.getTaskWithId(clusterState, taskId));
} else {
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
listener.onResponse(PersistentTasksCustomMetadata.getTaskWithId(state, taskId));
}
@Override
public void onClusterServiceClose() {
listener.onFailure(new NodeClosedException(clusterService.localNode()));
}
@Override
public void onTimeout(TimeValue timeout) {
listener.onTimeout(timeout);
}
}, clusterStatePredicate);
}
}
Aggregations