Search in sources :

Example 81 with DiscoveryNode

use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.

the class AsyncShardFetch method fetchData.

/**
     * Fetches the data for the relevant shard. If there any ongoing async fetches going on, or new ones have
     * been initiated by this call, the result will have no data.
     * <p>
     * The ignoreNodes are nodes that are supposed to be ignored for this round, since fetching is async, we need
     * to keep them around and make sure we add them back when all the responses are fetched and returned.
     */
public synchronized FetchResult<T> fetchData(DiscoveryNodes nodes, Set<String> ignoreNodes) {
    if (closed) {
        throw new IllegalStateException(shardId + ": can't fetch data on closed async fetch");
    }
    nodesToIgnore.addAll(ignoreNodes);
    fillShardCacheWithDataNodes(cache, nodes);
    Set<NodeEntry<T>> nodesToFetch = findNodesToFetch(cache);
    if (nodesToFetch.isEmpty() == false) {
        // mark all node as fetching and go ahead and async fetch them
        for (NodeEntry<T> nodeEntry : nodesToFetch) {
            nodeEntry.markAsFetching();
        }
        DiscoveryNode[] discoNodesToFetch = nodesToFetch.stream().map(NodeEntry::getNodeId).map(nodes::get).toArray(DiscoveryNode[]::new);
        asyncFetch(shardId, discoNodesToFetch);
    }
    // if we are still fetching, return null to indicate it
    if (hasAnyNodeFetching(cache)) {
        return new FetchResult<>(shardId, null, emptySet(), emptySet());
    } else {
        // nothing to fetch, yay, build the return value
        Map<DiscoveryNode, T> fetchData = new HashMap<>();
        Set<String> failedNodes = new HashSet<>();
        for (Iterator<Map.Entry<String, NodeEntry<T>>> it = cache.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry<String, NodeEntry<T>> entry = it.next();
            String nodeId = entry.getKey();
            NodeEntry<T> nodeEntry = entry.getValue();
            DiscoveryNode node = nodes.get(nodeId);
            if (node != null) {
                if (nodeEntry.isFailed()) {
                    // if its failed, remove it from the list of nodes, so if this run doesn't work
                    // we try again next round to fetch it again
                    it.remove();
                    failedNodes.add(nodeEntry.getNodeId());
                } else {
                    if (nodeEntry.getValue() != null) {
                        fetchData.put(node, nodeEntry.getValue());
                    }
                }
            }
        }
        Set<String> allIgnoreNodes = unmodifiableSet(new HashSet<>(nodesToIgnore));
        // clear the nodes to ignore, we had a successful run in fetching everything we can
        // we need to try them if another full run is needed
        nodesToIgnore.clear();
        // here, just case this round won't find anything, and we need to retry fetching data
        if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) {
            reroute(shardId, "nodes failed [" + failedNodes.size() + "], ignored [" + allIgnoreNodes.size() + "]");
        }
        return new FetchResult<>(shardId, fetchData, failedNodes, allIgnoreNodes);
    }
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 82 with DiscoveryNode

use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.

the class ElectMasterService method sortedMasterNodes.

private List<DiscoveryNode> sortedMasterNodes(Iterable<DiscoveryNode> nodes) {
    List<DiscoveryNode> possibleNodes = CollectionUtils.iterableAsArrayList(nodes);
    if (possibleNodes.isEmpty()) {
        return null;
    }
    // clean non master nodes
    for (Iterator<DiscoveryNode> it = possibleNodes.iterator(); it.hasNext(); ) {
        DiscoveryNode node = it.next();
        if (!node.isMasterNode()) {
            it.remove();
        }
    }
    CollectionUtil.introSort(possibleNodes, ElectMasterService::compareNodes);
    return possibleNodes;
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode)

Example 83 with DiscoveryNode

use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.

the class IndicesClusterStateService method removeShards.

/**
     * Removes shards that are currently loaded by indicesService but have disappeared from the routing table of the current node.
     * Also removes shards where the recovery source node has changed.
     * This method does not delete the shard data.
     *
     * @param state new cluster state
     */
private void removeShards(final ClusterState state) {
    final RoutingTable routingTable = state.routingTable();
    final DiscoveryNodes nodes = state.nodes();
    final String localNodeId = state.nodes().getLocalNodeId();
    assert localNodeId != null;
    // remove shards based on routing nodes (no deletion of data)
    RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId);
    for (AllocatedIndex<? extends Shard> indexService : indicesService) {
        for (Shard shard : indexService) {
            ShardRouting currentRoutingEntry = shard.routingEntry();
            ShardId shardId = currentRoutingEntry.shardId();
            ShardRouting newShardRouting = localRoutingNode == null ? null : localRoutingNode.getByShardId(shardId);
            if (newShardRouting == null) {
                // we can just remove the shard without cleaning it locally, since we will clean it in IndicesStore
                // once all shards are allocated
                logger.debug("{} removing shard (not allocated)", shardId);
                indexService.removeShard(shardId.id(), "removing shard (not allocated)");
            } else if (newShardRouting.isSameAllocation(currentRoutingEntry) == false) {
                logger.debug("{} removing shard (stale allocation id, stale {}, new {})", shardId, currentRoutingEntry, newShardRouting);
                indexService.removeShard(shardId.id(), "removing shard (stale copy)");
            } else if (newShardRouting.initializing() && currentRoutingEntry.active()) {
                // this can happen if the node was isolated/gc-ed, rejoins the cluster and a new shard with the same allocation id
                // is assigned to it. Batch cluster state processing or if shard fetching completes before the node gets a new cluster
                // state may result in a new shard being initialized while having the same allocation id as the currently started shard.
                logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
                indexService.removeShard(shardId.id(), "removing shard (stale copy)");
            } else {
                // remove shards where recovery source has changed. This re-initializes shards later in createOrUpdateShards
                if (newShardRouting.recoverySource() != null && newShardRouting.recoverySource().getType() == Type.PEER) {
                    RecoveryState recoveryState = shard.recoveryState();
                    final DiscoveryNode sourceNode = findSourceNodeForPeerRecovery(logger, routingTable, nodes, newShardRouting);
                    if (recoveryState.getSourceNode().equals(sourceNode) == false) {
                        if (recoveryTargetService.cancelRecoveriesForShard(shardId, "recovery source node changed")) {
                            // getting here means that the shard was still recovering
                            logger.debug("{} removing shard (recovery source changed), current [{}], global [{}], shard [{}])", shardId, recoveryState.getSourceNode(), sourceNode, newShardRouting);
                            indexService.removeShard(shardId.id(), "removing shard (recovery source node changed)");
                        }
                    }
                }
            }
        }
    }
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) RoutingTable(org.elasticsearch.cluster.routing.RoutingTable) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) IndexShard(org.elasticsearch.index.shard.IndexShard) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Example 84 with DiscoveryNode

use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.

the class IndicesClusterStateService method updateFailedShardsCache.

/**
     * Removes shard entries from the failed shards cache that are no longer allocated to this node by the master.
     * Sends shard failures for shards that are marked as actively allocated to this node but don't actually exist on the node.
     * Resends shard failures for shards that are still marked as allocated to this node but previously failed.
     *
     * @param state new cluster state
     */
private void updateFailedShardsCache(final ClusterState state) {
    RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
    if (localRoutingNode == null) {
        failedShardsCache.clear();
        return;
    }
    DiscoveryNode masterNode = state.nodes().getMasterNode();
    // remove items from cache which are not in our routing table anymore and resend failures that have not executed on master yet
    for (Iterator<Map.Entry<ShardId, ShardRouting>> iterator = failedShardsCache.entrySet().iterator(); iterator.hasNext(); ) {
        ShardRouting failedShardRouting = iterator.next().getValue();
        ShardRouting matchedRouting = localRoutingNode.getByShardId(failedShardRouting.shardId());
        if (matchedRouting == null || matchedRouting.isSameAllocation(failedShardRouting) == false) {
            iterator.remove();
        } else {
            if (masterNode != null) {
                // TODO: can we remove this? Is resending shard failures the responsibility of shardStateAction?
                String message = "master " + masterNode + " has not removed previously failed shard. resending shard failure";
                logger.trace("[{}] re-sending failed shard [{}], reason [{}]", matchedRouting.shardId(), matchedRouting, message);
                shardStateAction.localShardFailed(matchedRouting, message, null, SHARD_STATE_ACTION_LISTENER, state);
            }
        }
    }
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting)

Example 85 with DiscoveryNode

use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.

the class ElasticsearchExceptionTests method testFailureToAndFromXContentWithDetails.

public void testFailureToAndFromXContentWithDetails() throws IOException {
    final XContent xContent = randomFrom(XContentType.values()).xContent();
    Exception failure;
    Throwable failureCause;
    ElasticsearchException expected;
    ElasticsearchException expectedCause;
    ElasticsearchException suppressed;
    switch(randomIntBetween(0, 6)) {
        case // Simple elasticsearch exception without cause
        0:
            failure = new NoNodeAvailableException("A");
            expected = new ElasticsearchException("Elasticsearch exception [type=no_node_available_exception, reason=A]");
            expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=no_node_available_exception, reason=A]"));
            break;
        case // Simple elasticsearch exception with headers (other metadata of type number are not parsed)
        1:
            failure = new CircuitBreakingException("B", 5_000, 2_000);
            ((ElasticsearchException) failure).addHeader("header_name", "0", "1");
            expected = new ElasticsearchException("Elasticsearch exception [type=circuit_breaking_exception, reason=B]");
            expected.addHeader("header_name", "0", "1");
            suppressed = new ElasticsearchException("Elasticsearch exception [type=circuit_breaking_exception, reason=B]");
            suppressed.addHeader("header_name", "0", "1");
            expected.addSuppressed(suppressed);
            break;
        case // Elasticsearch exception with a cause, headers and parsable metadata
        2:
            failureCause = new NullPointerException("var is null");
            failure = new ScriptException("C", failureCause, singletonList("stack"), "test", "painless");
            ((ElasticsearchException) failure).addHeader("script_name", "my_script");
            expectedCause = new ElasticsearchException("Elasticsearch exception [type=null_pointer_exception, reason=var is null]");
            expected = new ElasticsearchException("Elasticsearch exception [type=script_exception, reason=C]", expectedCause);
            expected.addHeader("script_name", "my_script");
            expected.addMetadata("es.lang", "painless");
            expected.addMetadata("es.script", "test");
            expected.addMetadata("es.script_stack", "stack");
            suppressed = new ElasticsearchException("Elasticsearch exception [type=script_exception, reason=C]");
            suppressed.addHeader("script_name", "my_script");
            suppressed.addMetadata("es.lang", "painless");
            suppressed.addMetadata("es.script", "test");
            suppressed.addMetadata("es.script_stack", "stack");
            expected.addSuppressed(suppressed);
            break;
        case // JDK exception without cause
        3:
            failure = new IllegalStateException("D");
            expected = new ElasticsearchException("Elasticsearch exception [type=illegal_state_exception, reason=D]");
            suppressed = new ElasticsearchException("Elasticsearch exception [type=illegal_state_exception, reason=D]");
            expected.addSuppressed(suppressed);
            break;
        case // JDK exception with cause
        4:
            failureCause = new RoutingMissingException("idx", "type", "id");
            failure = new RuntimeException("E", failureCause);
            expectedCause = new ElasticsearchException("Elasticsearch exception [type=routing_missing_exception, " + "reason=routing is required for [idx]/[type]/[id]]");
            expectedCause.addMetadata("es.index", "idx");
            expectedCause.addMetadata("es.index_uuid", "_na_");
            expected = new ElasticsearchException("Elasticsearch exception [type=runtime_exception, reason=E]", expectedCause);
            suppressed = new ElasticsearchException("Elasticsearch exception [type=runtime_exception, reason=E]");
            expected.addSuppressed(suppressed);
            break;
        case // Wrapped exception with cause
        5:
            failureCause = new FileAlreadyExistsException("File exists");
            failure = new BroadcastShardOperationFailedException(new ShardId("_index", "_uuid", 5), "F", failureCause);
            expected = new ElasticsearchException("Elasticsearch exception [type=file_already_exists_exception, reason=File exists]");
            // strangely, the wrapped exception appears as the root cause...
            suppressed = new ElasticsearchException("Elasticsearch exception [type=broadcast_shard_operation_failed_exception, " + "reason=F]");
            expected.addSuppressed(suppressed);
            break;
        case // SearchPhaseExecutionException with cause and multiple failures
        6:
            DiscoveryNode node = new DiscoveryNode("node_g", buildNewFakeTransportAddress(), Version.CURRENT);
            failureCause = new NodeClosedException(node);
            failureCause = new NoShardAvailableActionException(new ShardId("_index_g", "_uuid_g", 6), "node_g", failureCause);
            ShardSearchFailure[] shardFailures = new ShardSearchFailure[] { new ShardSearchFailure(new ParsingException(0, 0, "Parsing g", null), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 61))), new ShardSearchFailure(new RepositoryException("repository_g", "Repo"), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 62))), new ShardSearchFailure(new SearchContextMissingException(0L), null) };
            failure = new SearchPhaseExecutionException("phase_g", "G", failureCause, shardFailures);
            expectedCause = new ElasticsearchException("Elasticsearch exception [type=node_closed_exception, " + "reason=node closed " + node + "]");
            expectedCause = new ElasticsearchException("Elasticsearch exception [type=no_shard_available_action_exception, " + "reason=node_g]", expectedCause);
            expectedCause.addMetadata("es.index", "_index_g");
            expectedCause.addMetadata("es.index_uuid", "_uuid_g");
            expectedCause.addMetadata("es.shard", "6");
            expected = new ElasticsearchException("Elasticsearch exception [type=search_phase_execution_exception, " + "reason=G]", expectedCause);
            expected.addMetadata("es.phase", "phase_g");
            expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=parsing_exception, reason=Parsing g]"));
            expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=repository_exception, " + "reason=[repository_g] Repo]"));
            expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=search_context_missing_exception, " + "reason=No search context found for id [0]]"));
            break;
        default:
            throw new UnsupportedOperationException("Failed to generate randomized failure");
    }
    Exception finalFailure = failure;
    BytesReference failureBytes = XContentHelper.toXContent((builder, params) -> {
        ElasticsearchException.generateFailureXContent(builder, params, finalFailure, true);
        return builder;
    }, xContent.type(), randomBoolean());
    try (XContentParser parser = createParser(xContent, failureBytes)) {
        failureBytes = shuffleXContent(parser, randomBoolean()).bytes();
    }
    ElasticsearchException parsedFailure;
    try (XContentParser parser = createParser(xContent, failureBytes)) {
        assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
        assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
        parsedFailure = ElasticsearchException.failureFromXContent(parser);
        assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
        assertNull(parser.nextToken());
    }
    assertDeepEquals(expected, parsedFailure);
}
Also used : FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) SearchPhaseExecutionException(org.elasticsearch.action.search.SearchPhaseExecutionException) Index(org.elasticsearch.index.Index) ShardId(org.elasticsearch.index.shard.ShardId) ScriptException(org.elasticsearch.script.ScriptException) XContent(org.elasticsearch.common.xcontent.XContent) ToXContent(org.elasticsearch.common.xcontent.ToXContent) ParsingException(org.elasticsearch.common.ParsingException) NodeClosedException(org.elasticsearch.node.NodeClosedException) BroadcastShardOperationFailedException(org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException) ShardSearchFailure(org.elasticsearch.action.search.ShardSearchFailure) RoutingMissingException(org.elasticsearch.action.RoutingMissingException) BytesReference(org.elasticsearch.common.bytes.BytesReference) SearchContextMissingException(org.elasticsearch.search.SearchContextMissingException) RepositoryException(org.elasticsearch.repositories.RepositoryException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) SearchParseException(org.elasticsearch.search.SearchParseException) NodeClosedException(org.elasticsearch.node.NodeClosedException) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) RoutingMissingException(org.elasticsearch.action.RoutingMissingException) BroadcastShardOperationFailedException(org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException) RepositoryException(org.elasticsearch.repositories.RepositoryException) QueryShardException(org.elasticsearch.index.query.QueryShardException) SearchContextMissingException(org.elasticsearch.search.SearchContextMissingException) ScriptException(org.elasticsearch.script.ScriptException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) SearchPhaseExecutionException(org.elasticsearch.action.search.SearchPhaseExecutionException) RemoteTransportException(org.elasticsearch.transport.RemoteTransportException) ParsingException(org.elasticsearch.common.ParsingException) IndexShardRecoveringException(org.elasticsearch.index.shard.IndexShardRecoveringException) ClusterBlockException(org.elasticsearch.cluster.block.ClusterBlockException) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) CircuitBreakingException(org.elasticsearch.common.breaker.CircuitBreakingException) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) CircuitBreakingException(org.elasticsearch.common.breaker.CircuitBreakingException) SearchShardTarget(org.elasticsearch.search.SearchShardTarget) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Aggregations

DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)352 ClusterState (org.elasticsearch.cluster.ClusterState)83 ArrayList (java.util.ArrayList)82 Settings (org.elasticsearch.common.settings.Settings)79 DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)74 IOException (java.io.IOException)69 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)52 HashMap (java.util.HashMap)45 ShardId (org.elasticsearch.index.shard.ShardId)45 HashSet (java.util.HashSet)43 List (java.util.List)41 TransportAddress (org.elasticsearch.common.transport.TransportAddress)41 CountDownLatch (java.util.concurrent.CountDownLatch)39 MockTransportService (org.elasticsearch.test.transport.MockTransportService)39 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)37 Map (java.util.Map)35 ExecutionException (java.util.concurrent.ExecutionException)35 Version (org.elasticsearch.Version)35 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)31 ClusterName (org.elasticsearch.cluster.ClusterName)30