use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.
the class AsyncShardFetch method processAsyncFetch.
/**
* Called by the response handler of the async action to fetch data. Verifies that its still working
* on the same cache generation, otherwise the results are discarded. It then goes and fills the relevant data for
* the shard (response + failures), issuing a reroute at the end of it to make sure there will be another round
* of allocations taking this new data into account.
*/
protected synchronized void processAsyncFetch(List<T> responses, List<FailedNodeException> failures, long fetchingRound) {
if (closed) {
// we are closed, no need to process this async fetch at all
logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type);
return;
}
logger.trace("{} processing fetched [{}] results", shardId, type);
if (responses != null) {
for (T response : responses) {
NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
if (nodeEntry != null) {
if (nodeEntry.getFetchingRound() != fetchingRound) {
assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
logger.trace("{} received response for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
} else if (nodeEntry.isFailed()) {
logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure());
} else {
// if the entry is there, for the right fetching round and not marked as failed already, process it
logger.trace("{} marking {} as done for [{}], result is [{}]", shardId, nodeEntry.getNodeId(), type, response);
nodeEntry.doneFetching(response);
}
}
}
}
if (failures != null) {
for (FailedNodeException failure : failures) {
logger.trace("{} processing failure {} for [{}]", shardId, failure, type);
NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
if (nodeEntry != null) {
if (nodeEntry.getFetchingRound() != fetchingRound) {
assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
logger.trace("{} received failure for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
} else if (nodeEntry.isFailed() == false) {
// if the entry is there, for the right fetching round and not marked as failed already, process it
Throwable unwrappedCause = ExceptionsHelper.unwrapCause(failure.getCause());
// if the request got rejected or timed out, we need to try it again next time...
if (unwrappedCause instanceof OpenSearchRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof OpenSearchTimeoutException) {
nodeEntry.restartFetching();
} else {
logger.warn(() -> new ParameterizedMessage("{}: failed to list shard for {} on node [{}]", shardId, type, failure.nodeId()), failure);
nodeEntry.doneFetching(failure.getCause());
}
}
}
}
}
reroute(shardId, "post_response");
}
use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.
the class Gateway method performStateRecovery.
public void performStateRecovery(final GatewayStateRecoveredListener listener) throws GatewayException {
final String[] nodesIds = clusterService.state().nodes().getMasterNodes().keys().toArray(String.class);
logger.trace("performing state recovery from {}", Arrays.toString(nodesIds));
final TransportNodesListGatewayMetaState.NodesGatewayMetaState nodesState = listGatewayMetaState.list(nodesIds, null).actionGet();
final int requiredAllocation = 1;
if (nodesState.hasFailures()) {
for (final FailedNodeException failedNodeException : nodesState.failures()) {
logger.warn("failed to fetch state from node", failedNodeException);
}
}
final ObjectFloatHashMap<Index> indices = new ObjectFloatHashMap<>();
Metadata electedGlobalState = null;
int found = 0;
for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
if (nodeState.metadata() == null) {
continue;
}
found++;
if (electedGlobalState == null) {
electedGlobalState = nodeState.metadata();
} else if (nodeState.metadata().version() > electedGlobalState.version()) {
electedGlobalState = nodeState.metadata();
}
for (final ObjectCursor<IndexMetadata> cursor : nodeState.metadata().indices().values()) {
indices.addTo(cursor.value.getIndex(), 1);
}
}
if (found < requiredAllocation) {
listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]");
return;
}
// update the global state, and clean the indices, we elect them in the next phase
final Metadata.Builder metadataBuilder = Metadata.builder(electedGlobalState).removeAllIndices();
assert !indices.containsKey(null);
final Object[] keys = indices.keys;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) {
final Index index = (Index) keys[i];
IndexMetadata electedIndexMetadata = null;
int indexMetadataCount = 0;
for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
if (nodeState.metadata() == null) {
continue;
}
final IndexMetadata indexMetadata = nodeState.metadata().index(index);
if (indexMetadata == null) {
continue;
}
if (electedIndexMetadata == null) {
electedIndexMetadata = indexMetadata;
} else if (indexMetadata.getVersion() > electedIndexMetadata.getVersion()) {
electedIndexMetadata = indexMetadata;
}
indexMetadataCount++;
}
if (electedIndexMetadata != null) {
if (indexMetadataCount < requiredAllocation) {
logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetadataCount, requiredAllocation);
}
// TODO if this logging statement is correct then we are missing an else here
metadataBuilder.put(electedIndexMetadata, false);
}
}
}
ClusterState recoveredState = Function.<ClusterState>identity().andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings())).apply(ClusterState.builder(clusterService.getClusterName()).metadata(metadataBuilder).build());
listener.onSuccess(recoveredState);
}
use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.
the class TransportRethrottleActionTests method testRethrottleNodeFailure.
public void testRethrottleNodeFailure() {
FailedNodeException e = new FailedNodeException("test", "test", new Exception());
rethrottleTestCase(slices, listener -> listener.onResponse(new ListTasksResponse(emptyList(), emptyList(), singletonList(e))), expectException(theInstance(e)));
}
use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.
the class TransportBroadcastByNodeAction method newResponse.
private Response newResponse(Request request, AtomicReferenceArray responses, List<NoShardAvailableActionException> unavailableShardExceptions, Map<String, List<ShardRouting>> nodes, ClusterState clusterState) {
int totalShards = 0;
int successfulShards = 0;
List<ShardOperationResult> broadcastByNodeResponses = new ArrayList<>();
List<DefaultShardOperationFailedException> exceptions = new ArrayList<>();
for (int i = 0; i < responses.length(); i++) {
if (responses.get(i) instanceof FailedNodeException) {
FailedNodeException exception = (FailedNodeException) responses.get(i);
totalShards += nodes.get(exception.nodeId()).size();
for (ShardRouting shard : nodes.get(exception.nodeId())) {
exceptions.add(new DefaultShardOperationFailedException(shard.getIndexName(), shard.getId(), exception));
}
} else {
NodeResponse response = (NodeResponse) responses.get(i);
broadcastByNodeResponses.addAll(response.results);
totalShards += response.getTotalShards();
successfulShards += response.getSuccessfulShards();
for (BroadcastShardOperationFailedException throwable : response.getExceptions()) {
if (!TransportActions.isShardNotAvailableException(throwable)) {
exceptions.add(new DefaultShardOperationFailedException(throwable.getShardId().getIndexName(), throwable.getShardId().getId(), throwable));
}
}
}
}
totalShards += unavailableShardExceptions.size();
int failedShards = exceptions.size();
return newResponse(request, totalShards, successfulShards, failedShards, broadcastByNodeResponses, exceptions, clusterState);
}
use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.
the class TransportNodesActionTests method testNewResponse.
public void testNewResponse() {
TestTransportNodesAction action = getTestTransportNodesAction();
TestNodesRequest request = new TestNodesRequest();
List<TestNodeResponse> expectedNodeResponses = mockList(TestNodeResponse::new, randomIntBetween(0, 2));
expectedNodeResponses.add(new TestNodeResponse());
List<BaseNodeResponse> nodeResponses = new ArrayList<>(expectedNodeResponses);
// This should be ignored:
nodeResponses.add(new OtherNodeResponse());
List<FailedNodeException> failures = mockList(() -> new FailedNodeException(randomAlphaOfLength(8), randomAlphaOfLength(8), new IllegalStateException(randomAlphaOfLength(8))), randomIntBetween(0, 2));
List<Object> allResponses = new ArrayList<>(expectedNodeResponses);
allResponses.addAll(failures);
Collections.shuffle(allResponses, random());
AtomicReferenceArray<?> atomicArray = new AtomicReferenceArray<>(allResponses.toArray());
TestNodesResponse response = action.newResponse(request, atomicArray);
assertSame(request, response.request);
// note: I shuffled the overall list, so it's not possible to guarantee that it's in the right order
assertTrue(expectedNodeResponses.containsAll(response.getNodes()));
assertTrue(failures.containsAll(response.failures()));
}
Aggregations