Search in sources :

Example 56 with RoutingNode

use of org.elasticsearch.cluster.routing.RoutingNode in project elasticsearch by elastic.

the class ReplicaShardAllocator method makeAllocationDecision.

@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
    if (isResponsibleFor(unassignedShard) == false) {
        // this allocator is not responsible for deciding on this shard
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    final RoutingNodes routingNodes = allocation.routingNodes();
    final boolean explain = allocation.debugDecision();
    // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
    Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
    Decision allocateDecision = result.v1();
    if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
        // only return early if we are not in explain mode, or we are in explain mode but we have not
        // yet attempted to fetch any shard data
        logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
    }
    AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> shardStores = fetchData(unassignedShard, allocation);
    if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
        allocation.setHasPendingAsyncFetch();
        List<NodeAllocationResult> nodeDecisions = null;
        if (explain) {
            nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
        }
        return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
    }
    ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
    if (primaryShard == null) {
        assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
    }
    TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore = findStore(primaryShard, allocation, shardStores);
    if (primaryStore == null) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
        logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, primaryStore, shardStores, explain);
    assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
    List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
    if (allocateDecision.type() != Decision.Type.YES) {
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
    } else if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
        // we only check on THROTTLE since we checked before before on NO
        Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
            logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
            return AllocateUnassignedDecision.throttle(nodeDecisions);
        } else {
            logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we found a match
            return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
        }
    } else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
        // if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
        // unassigned due to a node leaving, so we delay allocation of this replica to see if the
        // node with the shard copy will rejoin so we can re-use the copy it has
        logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
        long remainingDelayMillis = 0L;
        long totalDelayMillis = 0L;
        if (explain) {
            UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
            MetaData metadata = allocation.metaData();
            IndexMetaData indexMetaData = metadata.index(unassignedShard.index());
            totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).getMillis();
            long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetaData.getSettings());
            remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
        }
        return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
    }
    return AllocateUnassignedDecision.NOT_TAKEN;
}
Also used : RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) TransportNodesListShardStoreMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) MetaData(org.elasticsearch.cluster.metadata.MetaData) NodeStoreFilesMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) TransportNodesListShardStoreMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) HashMap(java.util.HashMap) ObjectLongHashMap(com.carrotsearch.hppc.ObjectLongHashMap) Map(java.util.Map) ObjectLongMap(com.carrotsearch.hppc.ObjectLongMap) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult) NodeStoreFilesMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData)

Example 57 with RoutingNode

use of org.elasticsearch.cluster.routing.RoutingNode in project elasticsearch by elastic.

the class PrimaryShardAllocator method buildNodesToAllocate.

/**
     * Split the list of node shard states into groups yes/no/throttle based on allocation deciders
     */
private NodesToAllocate buildNodesToAllocate(RoutingAllocation allocation, List<NodeGatewayStartedShards> nodeShardStates, ShardRouting shardRouting, boolean forceAllocate) {
    List<DecidedNode> yesNodeShards = new ArrayList<>();
    List<DecidedNode> throttledNodeShards = new ArrayList<>();
    List<DecidedNode> noNodeShards = new ArrayList<>();
    for (NodeGatewayStartedShards nodeShardState : nodeShardStates) {
        RoutingNode node = allocation.routingNodes().node(nodeShardState.getNode().getId());
        if (node == null) {
            continue;
        }
        Decision decision = forceAllocate ? allocation.deciders().canForceAllocatePrimary(shardRouting, node, allocation) : allocation.deciders().canAllocate(shardRouting, node, allocation);
        DecidedNode decidedNode = new DecidedNode(nodeShardState, decision);
        if (decision.type() == Type.THROTTLE) {
            throttledNodeShards.add(decidedNode);
        } else if (decision.type() == Type.NO) {
            noNodeShards.add(decidedNode);
        } else {
            yesNodeShards.add(decidedNode);
        }
    }
    return new NodesToAllocate(Collections.unmodifiableList(yesNodeShards), Collections.unmodifiableList(throttledNodeShards), Collections.unmodifiableList(noNodeShards));
}
Also used : RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) ArrayList(java.util.ArrayList) NodeGatewayStartedShards(org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.NodeGatewayStartedShards) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision)

Example 58 with RoutingNode

use of org.elasticsearch.cluster.routing.RoutingNode in project elasticsearch by elastic.

the class ReplicaShardAllocator method processExistingRecoveries.

/**
     * Process existing recoveries of replicas and see if we need to cancel them if we find a better
     * match. Today, a better match is one that has full sync id match compared to not having one in
     * the previous recovery.
     */
public void processExistingRecoveries(RoutingAllocation allocation) {
    MetaData metaData = allocation.metaData();
    RoutingNodes routingNodes = allocation.routingNodes();
    List<Runnable> shardCancellationActions = new ArrayList<>();
    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shard : routingNode) {
            if (shard.primary()) {
                continue;
            }
            if (shard.initializing() == false) {
                continue;
            }
            if (shard.relocatingNodeId() != null) {
                continue;
            }
            // if we are allocating a replica because of index creation, no need to go and find a copy, there isn't one...
            if (shard.unassignedInfo() != null && shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) {
                continue;
            }
            AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> shardStores = fetchData(shard, allocation);
            if (shardStores.hasData() == false) {
                logger.trace("{}: fetching new stores for initializing shard", shard);
                // still fetching
                continue;
            }
            ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard.shardId());
            assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
            TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore = findStore(primaryShard, allocation, shardStores);
            if (primaryStore == null) {
                // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
                // just let the recovery find it out, no need to do anything about it for the initializing shard
                logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", shard);
                continue;
            }
            MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, primaryStore, shardStores, false);
            if (matchingNodes.getNodeWithHighestMatch() != null) {
                DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId());
                DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch();
                // current node will not be in matchingNodes as it is filtered away by SameShardAllocationDecider
                final String currentSyncId;
                if (shardStores.getData().containsKey(currentNode)) {
                    currentSyncId = shardStores.getData().get(currentNode).storeFilesMetaData().syncId();
                } else {
                    currentSyncId = null;
                }
                if (currentNode.equals(nodeWithHighestMatch) == false && Objects.equals(currentSyncId, primaryStore.syncId()) == false && matchingNodes.isNodeMatchBySyncID(nodeWithHighestMatch)) {
                    // we found a better match that has a full sync id match, the existing allocation is not fully synced
                    // so we found a better one, cancel this one
                    logger.debug("cancelling allocation of replica on [{}], sync id match found on node [{}]", currentNode, nodeWithHighestMatch);
                    UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT);
                    // don't cancel shard in the loop as it will cause a ConcurrentModificationException
                    shardCancellationActions.add(() -> routingNodes.failShard(logger, shard, unassignedInfo, metaData.getIndexSafe(shard.index()), allocation.changes()));
                }
            }
        }
    }
    for (Runnable action : shardCancellationActions) {
        action.run();
    }
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) TransportNodesListShardStoreMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) MetaData(org.elasticsearch.cluster.metadata.MetaData) NodeStoreFilesMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) TransportNodesListShardStoreMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) NodeStoreFilesMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData)

Example 59 with RoutingNode

use of org.elasticsearch.cluster.routing.RoutingNode in project elasticsearch by elastic.

the class ReplicaShardAllocator method findMatchingNodes.

private MatchingNodes findMatchingNodes(ShardRouting shard, RoutingAllocation allocation, TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore, AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> data, boolean explain) {
    ObjectLongMap<DiscoveryNode> nodesToSize = new ObjectLongHashMap<>();
    Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null;
    for (Map.Entry<DiscoveryNode, NodeStoreFilesMetaData> nodeStoreEntry : data.getData().entrySet()) {
        DiscoveryNode discoNode = nodeStoreEntry.getKey();
        TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData = nodeStoreEntry.getValue().storeFilesMetaData();
        // we don't have any files at all, it is an empty index
        if (storeFilesMetaData.isEmpty()) {
            continue;
        }
        RoutingNode node = allocation.routingNodes().node(discoNode.getId());
        if (node == null) {
            continue;
        }
        // check if we can allocate on that node...
        // we only check for NO, since if this node is THROTTLING and it has enough "same data"
        // then we will try and assign it next time
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        long matchingBytes = -1;
        if (explain) {
            matchingBytes = computeMatchingBytes(primaryStore, storeFilesMetaData);
            ShardStoreInfo shardStoreInfo = new ShardStoreInfo(matchingBytes);
            nodeDecisions.put(node.nodeId(), new NodeAllocationResult(discoNode, shardStoreInfo, decision));
        }
        if (decision.type() == Decision.Type.NO) {
            continue;
        }
        if (matchingBytes < 0) {
            matchingBytes = computeMatchingBytes(primaryStore, storeFilesMetaData);
        }
        nodesToSize.put(discoNode, matchingBytes);
        if (logger.isTraceEnabled()) {
            if (matchingBytes == Long.MAX_VALUE) {
                logger.trace("{}: node [{}] has same sync id {} as primary", shard, discoNode.getName(), storeFilesMetaData.syncId());
            } else {
                logger.trace("{}: node [{}] has [{}/{}] bytes of re-usable data", shard, discoNode.getName(), new ByteSizeValue(matchingBytes), matchingBytes);
            }
        }
    }
    return new MatchingNodes(nodesToSize, nodeDecisions);
}
Also used : ObjectLongHashMap(com.carrotsearch.hppc.ObjectLongHashMap) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) TransportNodesListShardStoreMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) ShardStoreInfo(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult.ShardStoreInfo) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) HashMap(java.util.HashMap) ObjectLongHashMap(com.carrotsearch.hppc.ObjectLongHashMap) Map(java.util.Map) ObjectLongMap(com.carrotsearch.hppc.ObjectLongMap) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult) NodeStoreFilesMetaData(org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData)

Example 60 with RoutingNode

use of org.elasticsearch.cluster.routing.RoutingNode in project elasticsearch by elastic.

the class BaseGatewayShardAllocator method buildDecisionsForAllNodes.

/**
     * Builds decisions for all nodes in the cluster, so that the explain API can provide information on
     * allocation decisions for each node, while still waiting to allocate the shard (e.g. due to fetching shard data).
     */
protected List<NodeAllocationResult> buildDecisionsForAllNodes(ShardRouting shard, RoutingAllocation allocation) {
    List<NodeAllocationResult> results = new ArrayList<>();
    for (RoutingNode node : allocation.routingNodes()) {
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        results.add(new NodeAllocationResult(node.node(), null, decision));
    }
    return results;
}
Also used : RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) ArrayList(java.util.ArrayList) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) AllocationDecision(org.elasticsearch.cluster.routing.allocation.AllocationDecision) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision)

Aggregations

RoutingNode (org.elasticsearch.cluster.routing.RoutingNode)61 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)45 ClusterState (org.elasticsearch.cluster.ClusterState)28 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)23 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)20 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)16 RoutingNodes (org.elasticsearch.cluster.routing.RoutingNodes)15 MetaData (org.elasticsearch.cluster.metadata.MetaData)13 Settings (org.elasticsearch.common.settings.Settings)12 Decision (org.elasticsearch.cluster.routing.allocation.decider.Decision)10 ShardId (org.elasticsearch.index.shard.ShardId)10 DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)9 IndexShardRoutingTable (org.elasticsearch.cluster.routing.IndexShardRoutingTable)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)8 RoutingAllocation (org.elasticsearch.cluster.routing.allocation.RoutingAllocation)8 ClusterSettings (org.elasticsearch.common.settings.ClusterSettings)8 IndexShard (org.elasticsearch.index.shard.IndexShard)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)7