Search in sources :

Example 1 with FailedNodeException

use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.

the class AsyncShardFetch method processAsyncFetch.

/**
 * Called by the response handler of the async action to fetch data. Verifies that its still working
 * on the same cache generation, otherwise the results are discarded. It then goes and fills the relevant data for
 * the shard (response + failures), issuing a reroute at the end of it to make sure there will be another round
 * of allocations taking this new data into account.
 */
protected synchronized void processAsyncFetch(List<T> responses, List<FailedNodeException> failures, long fetchingRound) {
    if (closed) {
        // we are closed, no need to process this async fetch at all
        logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type);
        return;
    }
    logger.trace("{} processing fetched [{}] results", shardId, type);
    if (responses != null) {
        for (T response : responses) {
            NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
            if (nodeEntry != null) {
                if (nodeEntry.getFetchingRound() != fetchingRound) {
                    assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
                    logger.trace("{} received response for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
                } else if (nodeEntry.isFailed()) {
                    logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure());
                } else {
                    // if the entry is there, for the right fetching round and not marked as failed already, process it
                    logger.trace("{} marking {} as done for [{}], result is [{}]", shardId, nodeEntry.getNodeId(), type, response);
                    nodeEntry.doneFetching(response);
                }
            }
        }
    }
    if (failures != null) {
        for (FailedNodeException failure : failures) {
            logger.trace("{} processing failure {} for [{}]", shardId, failure, type);
            NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
            if (nodeEntry != null) {
                if (nodeEntry.getFetchingRound() != fetchingRound) {
                    assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
                    logger.trace("{} received failure for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
                } else if (nodeEntry.isFailed() == false) {
                    // if the entry is there, for the right fetching round and not marked as failed already, process it
                    Throwable unwrappedCause = ExceptionsHelper.unwrapCause(failure.getCause());
                    // if the request got rejected or timed out, we need to try it again next time...
                    if (unwrappedCause instanceof OpenSearchRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof OpenSearchTimeoutException) {
                        nodeEntry.restartFetching();
                    } else {
                        logger.warn(() -> new ParameterizedMessage("{}: failed to list shard for {} on node [{}]", shardId, type, failure.nodeId()), failure);
                        nodeEntry.doneFetching(failure.getCause());
                    }
                }
            }
        }
    }
    reroute(shardId, "post_response");
}
Also used : ReceiveTimeoutTransportException(org.opensearch.transport.ReceiveTimeoutTransportException) OpenSearchTimeoutException(org.opensearch.OpenSearchTimeoutException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) FailedNodeException(org.opensearch.action.FailedNodeException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 2 with FailedNodeException

use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.

the class Gateway method performStateRecovery.

public void performStateRecovery(final GatewayStateRecoveredListener listener) throws GatewayException {
    final String[] nodesIds = clusterService.state().nodes().getMasterNodes().keys().toArray(String.class);
    logger.trace("performing state recovery from {}", Arrays.toString(nodesIds));
    final TransportNodesListGatewayMetaState.NodesGatewayMetaState nodesState = listGatewayMetaState.list(nodesIds, null).actionGet();
    final int requiredAllocation = 1;
    if (nodesState.hasFailures()) {
        for (final FailedNodeException failedNodeException : nodesState.failures()) {
            logger.warn("failed to fetch state from node", failedNodeException);
        }
    }
    final ObjectFloatHashMap<Index> indices = new ObjectFloatHashMap<>();
    Metadata electedGlobalState = null;
    int found = 0;
    for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
        if (nodeState.metadata() == null) {
            continue;
        }
        found++;
        if (electedGlobalState == null) {
            electedGlobalState = nodeState.metadata();
        } else if (nodeState.metadata().version() > electedGlobalState.version()) {
            electedGlobalState = nodeState.metadata();
        }
        for (final ObjectCursor<IndexMetadata> cursor : nodeState.metadata().indices().values()) {
            indices.addTo(cursor.value.getIndex(), 1);
        }
    }
    if (found < requiredAllocation) {
        listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]");
        return;
    }
    // update the global state, and clean the indices, we elect them in the next phase
    final Metadata.Builder metadataBuilder = Metadata.builder(electedGlobalState).removeAllIndices();
    assert !indices.containsKey(null);
    final Object[] keys = indices.keys;
    for (int i = 0; i < keys.length; i++) {
        if (keys[i] != null) {
            final Index index = (Index) keys[i];
            IndexMetadata electedIndexMetadata = null;
            int indexMetadataCount = 0;
            for (final TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
                if (nodeState.metadata() == null) {
                    continue;
                }
                final IndexMetadata indexMetadata = nodeState.metadata().index(index);
                if (indexMetadata == null) {
                    continue;
                }
                if (electedIndexMetadata == null) {
                    electedIndexMetadata = indexMetadata;
                } else if (indexMetadata.getVersion() > electedIndexMetadata.getVersion()) {
                    electedIndexMetadata = indexMetadata;
                }
                indexMetadataCount++;
            }
            if (electedIndexMetadata != null) {
                if (indexMetadataCount < requiredAllocation) {
                    logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetadataCount, requiredAllocation);
                }
                // TODO if this logging statement is correct then we are missing an else here
                metadataBuilder.put(electedIndexMetadata, false);
            }
        }
    }
    ClusterState recoveredState = Function.<ClusterState>identity().andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings())).apply(ClusterState.builder(clusterService.getClusterName()).metadata(metadataBuilder).build());
    listener.onSuccess(recoveredState);
}
Also used : Arrays(java.util.Arrays) FailedNodeException(org.opensearch.action.FailedNodeException) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) Settings(org.opensearch.common.settings.Settings) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) Function(java.util.function.Function) ClusterState(org.opensearch.cluster.ClusterState) Logger(org.apache.logging.log4j.Logger) ClusterService(org.opensearch.cluster.service.ClusterService) LogManager(org.apache.logging.log4j.LogManager) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap) ClusterState(org.opensearch.cluster.ClusterState) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) FailedNodeException(org.opensearch.action.FailedNodeException) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap)

Example 3 with FailedNodeException

use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.

the class TransportRethrottleActionTests method testRethrottleNodeFailure.

public void testRethrottleNodeFailure() {
    FailedNodeException e = new FailedNodeException("test", "test", new Exception());
    rethrottleTestCase(slices, listener -> listener.onResponse(new ListTasksResponse(emptyList(), emptyList(), singletonList(e))), expectException(theInstance(e)));
}
Also used : FailedNodeException(org.opensearch.action.FailedNodeException) ListTasksResponse(org.opensearch.action.admin.cluster.node.tasks.list.ListTasksResponse) FailedNodeException(org.opensearch.action.FailedNodeException)

Example 4 with FailedNodeException

use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.

the class TransportBroadcastByNodeAction method newResponse.

private Response newResponse(Request request, AtomicReferenceArray responses, List<NoShardAvailableActionException> unavailableShardExceptions, Map<String, List<ShardRouting>> nodes, ClusterState clusterState) {
    int totalShards = 0;
    int successfulShards = 0;
    List<ShardOperationResult> broadcastByNodeResponses = new ArrayList<>();
    List<DefaultShardOperationFailedException> exceptions = new ArrayList<>();
    for (int i = 0; i < responses.length(); i++) {
        if (responses.get(i) instanceof FailedNodeException) {
            FailedNodeException exception = (FailedNodeException) responses.get(i);
            totalShards += nodes.get(exception.nodeId()).size();
            for (ShardRouting shard : nodes.get(exception.nodeId())) {
                exceptions.add(new DefaultShardOperationFailedException(shard.getIndexName(), shard.getId(), exception));
            }
        } else {
            NodeResponse response = (NodeResponse) responses.get(i);
            broadcastByNodeResponses.addAll(response.results);
            totalShards += response.getTotalShards();
            successfulShards += response.getSuccessfulShards();
            for (BroadcastShardOperationFailedException throwable : response.getExceptions()) {
                if (!TransportActions.isShardNotAvailableException(throwable)) {
                    exceptions.add(new DefaultShardOperationFailedException(throwable.getShardId().getIndexName(), throwable.getShardId().getId(), throwable));
                }
            }
        }
    }
    totalShards += unavailableShardExceptions.size();
    int failedShards = exceptions.size();
    return newResponse(request, totalShards, successfulShards, failedShards, broadcastByNodeResponses, exceptions, clusterState);
}
Also used : ArrayList(java.util.ArrayList) FailedNodeException(org.opensearch.action.FailedNodeException) BroadcastShardOperationFailedException(org.opensearch.action.support.broadcast.BroadcastShardOperationFailedException) ShardRouting(org.opensearch.cluster.routing.ShardRouting) DefaultShardOperationFailedException(org.opensearch.action.support.DefaultShardOperationFailedException)

Example 5 with FailedNodeException

use of org.opensearch.action.FailedNodeException in project OpenSearch by opensearch-project.

the class TransportNodesActionTests method testNewResponse.

public void testNewResponse() {
    TestTransportNodesAction action = getTestTransportNodesAction();
    TestNodesRequest request = new TestNodesRequest();
    List<TestNodeResponse> expectedNodeResponses = mockList(TestNodeResponse::new, randomIntBetween(0, 2));
    expectedNodeResponses.add(new TestNodeResponse());
    List<BaseNodeResponse> nodeResponses = new ArrayList<>(expectedNodeResponses);
    // This should be ignored:
    nodeResponses.add(new OtherNodeResponse());
    List<FailedNodeException> failures = mockList(() -> new FailedNodeException(randomAlphaOfLength(8), randomAlphaOfLength(8), new IllegalStateException(randomAlphaOfLength(8))), randomIntBetween(0, 2));
    List<Object> allResponses = new ArrayList<>(expectedNodeResponses);
    allResponses.addAll(failures);
    Collections.shuffle(allResponses, random());
    AtomicReferenceArray<?> atomicArray = new AtomicReferenceArray<>(allResponses.toArray());
    TestNodesResponse response = action.newResponse(request, atomicArray);
    assertSame(request, response.request);
    // note: I shuffled the overall list, so it's not possible to guarantee that it's in the right order
    assertTrue(expectedNodeResponses.containsAll(response.getNodes()));
    assertTrue(failures.containsAll(response.failures()));
}
Also used : ArrayList(java.util.ArrayList) AtomicReferenceArray(java.util.concurrent.atomic.AtomicReferenceArray) FailedNodeException(org.opensearch.action.FailedNodeException)

Aggregations

FailedNodeException (org.opensearch.action.FailedNodeException)13 ArrayList (java.util.ArrayList)6 ConnectException (java.net.ConnectException)2 OpenSearchException (org.opensearch.OpenSearchException)2 TaskOperationFailure (org.opensearch.action.TaskOperationFailure)2 ListTasksResponse (org.opensearch.action.admin.cluster.node.tasks.list.ListTasksResponse)2 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)2 ObjectFloatHashMap (com.carrotsearch.hppc.ObjectFloatHashMap)1 ObjectCursor (com.carrotsearch.hppc.cursors.ObjectCursor)1 Arrays (java.util.Arrays)1 Collections.singletonList (java.util.Collections.singletonList)1 List (java.util.List)1 AtomicReferenceArray (java.util.concurrent.atomic.AtomicReferenceArray)1 Function (java.util.function.Function)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)1 OpenSearchTimeoutException (org.opensearch.OpenSearchTimeoutException)1 CancelTasksResponse (org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksResponse)1 FindDanglingIndexRequest (org.opensearch.action.admin.indices.dangling.find.FindDanglingIndexRequest)1