Search in sources :

Example 6 with FailedNodeException

use of org.elasticsearch.action.FailedNodeException in project elasticsearch by elastic.

the class Gateway method performStateRecovery.

public void performStateRecovery(final GatewayStateRecoveredListener listener) throws GatewayException {
    String[] nodesIds = clusterService.state().nodes().getMasterNodes().keys().toArray(String.class);
    logger.trace("performing state recovery from {}", Arrays.toString(nodesIds));
    TransportNodesListGatewayMetaState.NodesGatewayMetaState nodesState = listGatewayMetaState.list(nodesIds, null).actionGet();
    int requiredAllocation = Math.max(1, minimumMasterNodesProvider.get());
    if (nodesState.hasFailures()) {
        for (FailedNodeException failedNodeException : nodesState.failures()) {
            logger.warn("failed to fetch state from node", failedNodeException);
        }
    }
    ObjectFloatHashMap<Index> indices = new ObjectFloatHashMap<>();
    MetaData electedGlobalState = null;
    int found = 0;
    for (TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
        if (nodeState.metaData() == null) {
            continue;
        }
        found++;
        if (electedGlobalState == null) {
            electedGlobalState = nodeState.metaData();
        } else if (nodeState.metaData().version() > electedGlobalState.version()) {
            electedGlobalState = nodeState.metaData();
        }
        for (ObjectCursor<IndexMetaData> cursor : nodeState.metaData().indices().values()) {
            indices.addTo(cursor.value.getIndex(), 1);
        }
    }
    if (found < requiredAllocation) {
        listener.onFailure("found [" + found + "] metadata states, required [" + requiredAllocation + "]");
        return;
    }
    // update the global state, and clean the indices, we elect them in the next phase
    MetaData.Builder metaDataBuilder = MetaData.builder(electedGlobalState).removeAllIndices();
    assert !indices.containsKey(null);
    final Object[] keys = indices.keys;
    for (int i = 0; i < keys.length; i++) {
        if (keys[i] != null) {
            Index index = (Index) keys[i];
            IndexMetaData electedIndexMetaData = null;
            int indexMetaDataCount = 0;
            for (TransportNodesListGatewayMetaState.NodeGatewayMetaState nodeState : nodesState.getNodes()) {
                if (nodeState.metaData() == null) {
                    continue;
                }
                IndexMetaData indexMetaData = nodeState.metaData().index(index);
                if (indexMetaData == null) {
                    continue;
                }
                if (electedIndexMetaData == null) {
                    electedIndexMetaData = indexMetaData;
                } else if (indexMetaData.getVersion() > electedIndexMetaData.getVersion()) {
                    electedIndexMetaData = indexMetaData;
                }
                indexMetaDataCount++;
            }
            if (electedIndexMetaData != null) {
                if (indexMetaDataCount < requiredAllocation) {
                    logger.debug("[{}] found [{}], required [{}], not adding", index, indexMetaDataCount, requiredAllocation);
                }
                // TODO if this logging statement is correct then we are missing an else here
                try {
                    if (electedIndexMetaData.getState() == IndexMetaData.State.OPEN) {
                        // verify that we can actually create this index - if not we recover it as closed with lots of warn logs
                        indicesService.verifyIndexMetadata(electedIndexMetaData, electedIndexMetaData);
                    }
                } catch (Exception e) {
                    final Index electedIndex = electedIndexMetaData.getIndex();
                    logger.warn((org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage("recovering index {} failed - recovering as closed", electedIndex), e);
                    electedIndexMetaData = IndexMetaData.builder(electedIndexMetaData).state(IndexMetaData.State.CLOSE).build();
                }
                metaDataBuilder.put(electedIndexMetaData, false);
            }
        }
    }
    final ClusterSettings clusterSettings = clusterService.getClusterSettings();
    metaDataBuilder.persistentSettings(clusterSettings.archiveUnknownOrInvalidSettings(metaDataBuilder.persistentSettings(), e -> logUnknownSetting("persistent", e), (e, ex) -> logInvalidSetting("persistent", e, ex)));
    metaDataBuilder.transientSettings(clusterSettings.archiveUnknownOrInvalidSettings(metaDataBuilder.transientSettings(), e -> logUnknownSetting("transient", e), (e, ex) -> logInvalidSetting("transient", e, ex)));
    ClusterState.Builder builder = ClusterState.builder(clusterService.getClusterName());
    builder.metaData(metaDataBuilder);
    listener.onSuccess(builder.build());
}
Also used : MetaData(org.elasticsearch.cluster.metadata.MetaData) Arrays(java.util.Arrays) FailedNodeException(org.elasticsearch.action.FailedNodeException) AbstractComponent(org.elasticsearch.common.component.AbstractComponent) Discovery(org.elasticsearch.discovery.Discovery) ClusterService(org.elasticsearch.cluster.service.ClusterService) Index(org.elasticsearch.index.Index) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ClusterChangedEvent(org.elasticsearch.cluster.ClusterChangedEvent) Supplier(java.util.function.Supplier) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) Settings(org.elasticsearch.common.settings.Settings) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) Map(java.util.Map) IndicesService(org.elasticsearch.indices.IndicesService) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap) ClusterStateApplier(org.elasticsearch.cluster.ClusterStateApplier) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) Index(org.elasticsearch.index.Index) MetaData(org.elasticsearch.cluster.metadata.MetaData) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) FailedNodeException(org.elasticsearch.action.FailedNodeException) Supplier(java.util.function.Supplier) ClusterState(org.elasticsearch.cluster.ClusterState) FailedNodeException(org.elasticsearch.action.FailedNodeException) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ObjectFloatHashMap(com.carrotsearch.hppc.ObjectFloatHashMap)

Example 7 with FailedNodeException

use of org.elasticsearch.action.FailedNodeException in project elasticsearch by elastic.

the class ExceptionSerializationTests method testFailedNodeException.

public void testFailedNodeException() throws IOException {
    FailedNodeException ex = serialize(new FailedNodeException("the node", "the message", null));
    assertEquals("the node", ex.nodeId());
    assertEquals("the message", ex.getMessage());
}
Also used : FailedNodeException(org.elasticsearch.action.FailedNodeException)

Example 8 with FailedNodeException

use of org.elasticsearch.action.FailedNodeException in project elasticsearch by elastic.

the class BaseTasksResponse method writeTo.

@Override
public void writeTo(StreamOutput out) throws IOException {
    super.writeTo(out);
    out.writeVInt(taskFailures.size());
    for (TaskOperationFailure exp : taskFailures) {
        exp.writeTo(out);
    }
    out.writeVInt(nodeFailures.size());
    for (FailedNodeException exp : nodeFailures) {
        exp.writeTo(out);
    }
}
Also used : TaskOperationFailure(org.elasticsearch.action.TaskOperationFailure) FailedNodeException(org.elasticsearch.action.FailedNodeException)

Example 9 with FailedNodeException

use of org.elasticsearch.action.FailedNodeException in project elasticsearch by elastic.

the class RestActions method buildNodesHeader.

/**
     * Create the XContent header for any {@link BaseNodesResponse}. This looks like:
     * <code>
     * "_nodes" : {
     *   "total" : 3,
     *   "successful" : 1,
     *   "failed" : 2,
     *   "failures" : [ { ... }, { ... } ]
     * }
     * </code>
     * Prefer the overload that properly invokes this method to calling this directly.
     *
     * @param builder XContent builder.
     * @param params XContent parameters.
     * @param total The total number of nodes touched.
     * @param successful The successful number of responses received.
     * @param failed The number of failures (effectively {@code total - successful}).
     * @param failures The failure exceptions related to {@code failed}.
     * @see #buildNodesHeader(XContentBuilder, Params, BaseNodesResponse)
     */
public static void buildNodesHeader(final XContentBuilder builder, final Params params, final int total, final int successful, final int failed, final List<FailedNodeException> failures) throws IOException {
    builder.startObject("_nodes");
    builder.field("total", total);
    builder.field("successful", successful);
    builder.field("failed", failed);
    if (failures.isEmpty() == false) {
        builder.startArray("failures");
        for (FailedNodeException failure : failures) {
            builder.startObject();
            failure.toXContent(builder, params);
            builder.endObject();
        }
        builder.endArray();
    }
    builder.endObject();
}
Also used : FailedNodeException(org.elasticsearch.action.FailedNodeException)

Example 10 with FailedNodeException

use of org.elasticsearch.action.FailedNodeException in project elasticsearch by elastic.

the class AsyncShardFetch method processAsyncFetch.

/**
     * Called by the response handler of the async action to fetch data. Verifies that its still working
     * on the same cache generation, otherwise the results are discarded. It then goes and fills the relevant data for
     * the shard (response + failures), issuing a reroute at the end of it to make sure there will be another round
     * of allocations taking this new data into account.
     */
protected synchronized void processAsyncFetch(ShardId shardId, List<T> responses, List<FailedNodeException> failures) {
    if (closed) {
        // we are closed, no need to process this async fetch at all
        logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type);
        return;
    }
    logger.trace("{} processing fetched [{}] results", shardId, type);
    if (responses != null) {
        for (T response : responses) {
            NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
            // if the entry is there, and not marked as failed already, process it
            if (nodeEntry == null) {
                continue;
            }
            if (nodeEntry.isFailed()) {
                logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure());
            } else {
                logger.trace("{} marking {} as done for [{}], result is [{}]", shardId, nodeEntry.getNodeId(), type, response);
                nodeEntry.doneFetching(response);
            }
        }
    }
    if (failures != null) {
        for (FailedNodeException failure : failures) {
            logger.trace("{} processing failure {} for [{}]", shardId, failure, type);
            NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
            // if the entry is there, and not marked as failed already, process it
            if (nodeEntry != null && nodeEntry.isFailed() == false) {
                Throwable unwrappedCause = ExceptionsHelper.unwrapCause(failure.getCause());
                // if the request got rejected or timed out, we need to try it again next time...
                if (unwrappedCause instanceof EsRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof ElasticsearchTimeoutException) {
                    nodeEntry.restartFetching();
                } else {
                    logger.warn((Supplier<?>) () -> new ParameterizedMessage("{}: failed to list shard for {} on node [{}]", shardId, type, failure.nodeId()), failure);
                    nodeEntry.doneFetching(failure.getCause());
                }
            }
        }
    }
    reroute(shardId, "post_response");
}
Also used : ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) FailedNodeException(org.elasticsearch.action.FailedNodeException) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) EsRejectedExecutionException(org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)

Aggregations

FailedNodeException (org.elasticsearch.action.FailedNodeException)10 ArrayList (java.util.ArrayList)3 TaskOperationFailure (org.elasticsearch.action.TaskOperationFailure)3 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)2 ObjectFloatHashMap (com.carrotsearch.hppc.ObjectFloatHashMap)1 ObjectCursor (com.carrotsearch.hppc.cursors.ObjectCursor)1 Arrays (java.util.Arrays)1 Map (java.util.Map)1 AtomicReferenceArray (java.util.concurrent.atomic.AtomicReferenceArray)1 Supplier (java.util.function.Supplier)1 ElasticsearchTimeoutException (org.elasticsearch.ElasticsearchTimeoutException)1 ShardOperationFailedException (org.elasticsearch.action.ShardOperationFailedException)1 ListTasksResponse (org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse)1 DefaultShardOperationFailedException (org.elasticsearch.action.support.DefaultShardOperationFailedException)1 BroadcastShardOperationFailedException (org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException)1 ClusterChangedEvent (org.elasticsearch.cluster.ClusterChangedEvent)1 ClusterState (org.elasticsearch.cluster.ClusterState)1 ClusterStateApplier (org.elasticsearch.cluster.ClusterStateApplier)1 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)1 MetaData (org.elasticsearch.cluster.metadata.MetaData)1