use of org.opensearch.action.NoShardAvailableActionException in project OpenSearch by opensearch-project.
the class AbstractSearchAsyncAction method performPhaseOnShard.
private void performPhaseOnShard(final int shardIndex, final SearchShardIterator shardIt, final SearchShardTarget shard) {
/*
* We capture the thread that this phase is starting on. When we are called back after executing the phase, we are either on the
* same thread (because we never went async, or the same thread was selected from the thread pool) or a different thread. If we
* continue on the same thread in the case that we never went async and this happens repeatedly we will end up recursing deeply and
* could stack overflow. To prevent this, we fork if we are called back on the same thread that execution started on and otherwise
* we can continue (cf. InitialSearchPhase#maybeFork).
*/
if (shard == null) {
fork(() -> onShardFailure(shardIndex, null, shardIt, new NoShardAvailableActionException(shardIt.shardId())));
} else {
final PendingExecutions pendingExecutions = throttleConcurrentRequests ? pendingExecutionsPerNode.computeIfAbsent(shard.getNodeId(), n -> new PendingExecutions(maxConcurrentRequestsPerNode)) : null;
Runnable r = () -> {
final Thread thread = Thread.currentThread();
try {
executePhaseOnShard(shardIt, shard, new SearchActionListener<Result>(shard, shardIndex) {
@Override
public void innerOnResponse(Result result) {
try {
onShardResult(result, shardIt);
} finally {
executeNext(pendingExecutions, thread);
}
}
@Override
public void onFailure(Exception t) {
try {
onShardFailure(shardIndex, shard, shardIt, t);
} finally {
executeNext(pendingExecutions, thread);
}
}
});
} catch (final Exception e) {
try {
/*
* It is possible to run into connection exceptions here because we are getting the connection early and might
* run into nodes that are not connected. In this case, on shard failure will move us to the next shard copy.
*/
fork(() -> onShardFailure(shardIndex, shard, shardIt, e));
} finally {
executeNext(pendingExecutions, thread);
}
}
};
if (throttleConcurrentRequests) {
pendingExecutions.tryRun(r);
} else {
r.run();
}
}
}
use of org.opensearch.action.NoShardAvailableActionException in project OpenSearch by opensearch-project.
the class OpenSearchExceptionTests method testFailureToAndFromXContentWithDetails.
public void testFailureToAndFromXContentWithDetails() throws IOException {
final XContent xContent = randomFrom(XContentType.values()).xContent();
Exception failure;
Throwable failureCause;
OpenSearchException expected;
OpenSearchException expectedCause;
OpenSearchException suppressed;
switch(randomIntBetween(0, 6)) {
case // Simple opensearch exception without cause
0:
failure = new NoNodeAvailableException("A");
expected = new OpenSearchException("OpenSearch exception [type=no_node_available_exception, reason=A]");
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=no_node_available_exception, reason=A]"));
break;
case // Simple opensearch exception with headers (other metadata of type number are not parsed)
1:
failure = new ParsingException(3, 2, "B", null);
((OpenSearchException) failure).addHeader("header_name", "0", "1");
expected = new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=B]");
expected.addHeader("header_name", "0", "1");
suppressed = new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=B]");
suppressed.addHeader("header_name", "0", "1");
expected.addSuppressed(suppressed);
break;
case // OpenSearch exception with a cause, headers and parsable metadata
2:
failureCause = new NullPointerException("var is null");
failure = new ScriptException("C", failureCause, singletonList("stack"), "test", "painless");
((OpenSearchException) failure).addHeader("script_name", "my_script");
expectedCause = new OpenSearchException("OpenSearch exception [type=null_pointer_exception, reason=var is null]");
expected = new OpenSearchException("OpenSearch exception [type=script_exception, reason=C]", expectedCause);
expected.addHeader("script_name", "my_script");
expected.addMetadata("opensearch.lang", "painless");
expected.addMetadata("opensearch.script", "test");
expected.addMetadata("opensearch.script_stack", "stack");
suppressed = new OpenSearchException("OpenSearch exception [type=script_exception, reason=C]");
suppressed.addHeader("script_name", "my_script");
suppressed.addMetadata("opensearch.lang", "painless");
suppressed.addMetadata("opensearch.script", "test");
suppressed.addMetadata("opensearch.script_stack", "stack");
expected.addSuppressed(suppressed);
break;
case // JDK exception without cause
3:
failure = new IllegalStateException("D");
expected = new OpenSearchException("OpenSearch exception [type=illegal_state_exception, reason=D]");
suppressed = new OpenSearchException("OpenSearch exception [type=illegal_state_exception, reason=D]");
expected.addSuppressed(suppressed);
break;
case // JDK exception with cause
4:
failureCause = new RoutingMissingException("idx", "id");
failure = new RuntimeException("E", failureCause);
expectedCause = new OpenSearchException("OpenSearch exception [type=routing_missing_exception, " + "reason=routing is required for [idx]/[id]]");
expectedCause.addMetadata("opensearch.index", "idx");
expectedCause.addMetadata("opensearch.index_uuid", "_na_");
expected = new OpenSearchException("OpenSearch exception [type=runtime_exception, reason=E]", expectedCause);
suppressed = new OpenSearchException("OpenSearch exception [type=runtime_exception, reason=E]");
expected.addSuppressed(suppressed);
break;
case // Wrapped exception with cause
5:
failureCause = new FileAlreadyExistsException("File exists");
failure = new BroadcastShardOperationFailedException(new ShardId("_index", "_uuid", 5), "F", failureCause);
expected = new OpenSearchException("OpenSearch exception [type=file_already_exists_exception, reason=File exists]");
suppressed = new OpenSearchException("OpenSearch exception [type=file_already_exists_exception, reason=File exists]");
expected.addSuppressed(suppressed);
break;
case // SearchPhaseExecutionException with cause and multiple failures
6:
DiscoveryNode node = new DiscoveryNode("node_g", buildNewFakeTransportAddress(), Version.CURRENT);
failureCause = new NodeClosedException(node);
failureCause = new NoShardAvailableActionException(new ShardId("_index_g", "_uuid_g", 6), "node_g", failureCause);
ShardSearchFailure[] shardFailures = new ShardSearchFailure[] { new ShardSearchFailure(new ParsingException(0, 0, "Parsing g", null), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 61), null, OriginalIndices.NONE)), new ShardSearchFailure(new RepositoryException("repository_g", "Repo"), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 62), null, OriginalIndices.NONE)), new ShardSearchFailure(new SearchContextMissingException(new ShardSearchContextId(UUIDs.randomBase64UUID(), 0L)), null) };
failure = new SearchPhaseExecutionException("phase_g", "G", failureCause, shardFailures);
expectedCause = new OpenSearchException("OpenSearch exception [type=node_closed_exception, " + "reason=node closed " + node + "]");
expectedCause = new OpenSearchException("OpenSearch exception [type=no_shard_available_action_exception, " + "reason=node_g]", expectedCause);
expectedCause.addMetadata("opensearch.index", "_index_g");
expectedCause.addMetadata("opensearch.index_uuid", "_uuid_g");
expectedCause.addMetadata("opensearch.shard", "6");
expected = new OpenSearchException("OpenSearch exception [type=search_phase_execution_exception, " + "reason=G]", expectedCause);
expected.addMetadata("opensearch.phase", "phase_g");
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=parsing_exception, reason=Parsing g]"));
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=repository_exception, " + "reason=[repository_g] Repo]"));
expected.addSuppressed(new OpenSearchException("OpenSearch exception [type=search_context_missing_exception, " + "reason=No search context found for id [0]]"));
break;
default:
throw new UnsupportedOperationException("Failed to generate randomized failure");
}
Exception finalFailure = failure;
BytesReference failureBytes = toShuffledXContent((builder, params) -> {
OpenSearchException.generateFailureXContent(builder, params, finalFailure, true);
return builder;
}, xContent.type(), ToXContent.EMPTY_PARAMS, randomBoolean());
try (XContentParser parser = createParser(xContent, failureBytes)) {
failureBytes = BytesReference.bytes(shuffleXContent(parser, randomBoolean()));
}
OpenSearchException parsedFailure;
try (XContentParser parser = createParser(xContent, failureBytes)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
parsedFailure = OpenSearchException.failureFromXContent(parser);
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}
assertDeepEquals(expected, parsedFailure);
}
use of org.opensearch.action.NoShardAvailableActionException in project OpenSearch by opensearch-project.
the class ClusterDisruptionIT method testAckedIndexing.
/**
* Test that we do not loose document whose indexing request was successful, under a randomly selected disruption scheme
* We also collect & report the type of indexing failures that occur.
* <p>
* This test is a superset of tests run in the Jepsen test suite, with the exception of versioned updates
*/
@TestIssueLogging(value = "_root:DEBUG,org.opensearch.action.bulk:TRACE,org.opensearch.action.get:TRACE," + "org.opensearch.discovery:TRACE,org.opensearch.action.support.replication:TRACE," + "org.opensearch.cluster.service:TRACE,org.opensearch.indices.recovery:TRACE," + "org.opensearch.indices.cluster:TRACE,org.opensearch.index.shard:TRACE", issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
public void testAckedIndexing() throws Exception {
final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
final String timeout = seconds + "s";
final List<String> nodes = startCluster(rarely() ? 5 : 3);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomInt(2))));
ensureGreen();
ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
logger.info("disruption scheme [{}] added", disruptionScheme);
// id -> node sent.
final ConcurrentHashMap<String, String> ackedDocs = new ConcurrentHashMap<>();
final AtomicBoolean stop = new AtomicBoolean(false);
List<Thread> indexers = new ArrayList<>(nodes.size());
List<Semaphore> semaphores = new ArrayList<>(nodes.size());
final AtomicInteger idGenerator = new AtomicInteger(0);
final AtomicReference<CountDownLatch> countDownLatchRef = new AtomicReference<>();
final List<Exception> exceptedExceptions = new CopyOnWriteArrayList<>();
final ConflictMode conflictMode = ConflictMode.randomMode();
final List<String> fieldNames = IntStream.rangeClosed(0, randomInt(10)).mapToObj(n -> "f" + n).collect(Collectors.toList());
logger.info("starting indexers using conflict mode " + conflictMode);
try {
for (final String node : nodes) {
final Semaphore semaphore = new Semaphore(0);
semaphores.add(semaphore);
final Client client = client(node);
final String name = "indexer_" + indexers.size();
final int numPrimaries = getNumShards("test").numPrimaries;
Thread thread = new Thread(() -> {
while (!stop.get()) {
String id = null;
try {
if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
continue;
}
logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
try {
id = Integer.toString(idGenerator.incrementAndGet());
int shard = Math.floorMod(Murmur3HashFunction.hash(id), numPrimaries);
logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
IndexRequestBuilder indexRequestBuilder = client.prepareIndex("test").setId(id).setSource(Collections.singletonMap(randomFrom(fieldNames), randomNonNegativeLong()), XContentType.JSON).setTimeout(timeout);
if (conflictMode == ConflictMode.external) {
indexRequestBuilder.setVersion(randomIntBetween(1, 10)).setVersionType(VersionType.EXTERNAL);
} else if (conflictMode == ConflictMode.create) {
indexRequestBuilder.setCreate(true);
}
IndexResponse response = indexRequestBuilder.get(timeout);
assertThat(response.getResult(), is(oneOf(CREATED, UPDATED)));
ackedDocs.put(id, node);
logger.trace("[{}] indexed id [{}] through node [{}], response [{}]", name, id, node, response);
} catch (OpenSearchException e) {
exceptedExceptions.add(e);
final String docId = id;
logger.trace(() -> new ParameterizedMessage("[{}] failed id [{}] through node [{}]", name, docId, node), e);
} finally {
countDownLatchRef.get().countDown();
logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
}
} catch (InterruptedException e) {
// fine - semaphore interrupt
} catch (AssertionError | Exception e) {
logger.info(() -> new ParameterizedMessage("unexpected exception in background thread of [{}]", node), e);
}
}
});
thread.setName(name);
thread.start();
indexers.add(thread);
}
int docsPerIndexer = randomInt(3);
logger.info("indexing {} docs per indexer before partition", docsPerIndexer);
countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
for (Semaphore semaphore : semaphores) {
semaphore.release(docsPerIndexer);
}
assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES));
for (int iter = 1 + randomInt(2); iter > 0; iter--) {
logger.info("starting disruptions & indexing (iteration [{}])", iter);
disruptionScheme.startDisrupting();
docsPerIndexer = 1 + randomInt(5);
logger.info("indexing {} docs per indexer during partition", docsPerIndexer);
countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
Collections.shuffle(semaphores, random());
for (Semaphore semaphore : semaphores) {
assertThat(semaphore.availablePermits(), equalTo(0));
semaphore.release(docsPerIndexer);
}
logger.info("waiting for indexing requests to complete");
assertTrue(countDownLatchRef.get().await(docsPerIndexer * seconds * 1000 + 2000, TimeUnit.MILLISECONDS));
logger.info("stopping disruption");
disruptionScheme.stopDisrupting();
for (String node : internalCluster().getNodeNames()) {
ensureStableCluster(nodes.size(), TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()), true, node);
}
// is the super-connected node and recovery source and target are on opposite sides of the bridge
if (disruptionScheme instanceof NetworkDisruption && ((NetworkDisruption) disruptionScheme).getDisruptedLinks() instanceof Bridge) {
assertBusy(() -> assertAcked(client().admin().cluster().prepareReroute().setRetryFailed(true)));
}
ensureGreen("test");
logger.info("validating successful docs");
assertBusy(() -> {
for (String node : nodes) {
try {
logger.debug("validating through node [{}] ([{}] acked docs)", node, ackedDocs.size());
for (String id : ackedDocs.keySet()) {
assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found", client(node).prepareGet("test", id).setPreference("_local").get().isExists());
}
} catch (AssertionError | NoShardAvailableActionException e) {
throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e);
}
}
}, 30, TimeUnit.SECONDS);
logger.info("done validating (iteration [{}])", iter);
}
} finally {
logger.info("shutting down indexers");
stop.set(true);
for (Thread indexer : indexers) {
indexer.interrupt();
indexer.join(60000);
}
if (exceptedExceptions.size() > 0) {
StringBuilder sb = new StringBuilder();
for (Exception e : exceptedExceptions) {
sb.append("\n").append(e.getMessage());
}
logger.debug("Indexing exceptions during disruption: {}", sb);
}
}
}
use of org.opensearch.action.NoShardAvailableActionException in project OpenSearch by opensearch-project.
the class BroadcastReplicationTests method testNotStartedPrimary.
public void testNotStartedPrimary() throws InterruptedException, ExecutionException {
final String index = "test";
setState(clusterService, state(index, randomBoolean(), randomBoolean() ? ShardRoutingState.INITIALIZING : ShardRoutingState.UNASSIGNED, ShardRoutingState.UNASSIGNED));
logger.debug("--> using initial state:\n{}", clusterService.state());
PlainActionFuture<BroadcastResponse> response = PlainActionFuture.newFuture();
broadcastReplicationAction.execute(new DummyBroadcastRequest(index), response);
for (Tuple<ShardId, ActionListener<ReplicationResponse>> shardRequests : broadcastReplicationAction.capturedShardRequests) {
if (randomBoolean()) {
shardRequests.v2().onFailure(new NoShardAvailableActionException(shardRequests.v1()));
} else {
shardRequests.v2().onFailure(new UnavailableShardsException(shardRequests.v1(), "test exception"));
}
}
response.get();
logger.info("total shards: {}, ", response.get().getTotalShards());
// we expect no failures here because UnavailableShardsException does not count as failed
assertBroadcastResponse(2, 0, 0, response.get(), null);
}
Aggregations