Search in sources :

Example 31 with Decision

use of org.elasticsearch.cluster.routing.allocation.decider.Decision in project elasticsearch by elastic.

the class ClusterAllocationExplainIT method testWorseBalance.

public void testWorseBalance() throws Exception {
    logger.info("--> starting a single node");
    internalCluster().startNode();
    ensureStableCluster(1);
    logger.info("--> creating an index with 5 shards, all allocated to the single node");
    createIndexAndIndexData(5, 0);
    logger.info("--> setting balancing threshold really high, so it won't be met");
    client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("cluster.routing.allocation.balance.threshold", 1000.0f)).get();
    logger.info("--> starting another node, with the rebalance threshold so high, it should not get any shards");
    internalCluster().startNode();
    ensureStableCluster(2);
    boolean includeYesDecisions = randomBoolean();
    boolean includeDiskInfo = randomBoolean();
    ClusterAllocationExplanation explanation = runExplain(true, includeYesDecisions, includeDiskInfo);
    ShardId shardId = explanation.getShard();
    boolean isPrimary = explanation.isPrimary();
    ShardRoutingState shardRoutingState = explanation.getShardState();
    DiscoveryNode currentNode = explanation.getCurrentNode();
    UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
    ClusterInfo clusterInfo = explanation.getClusterInfo();
    AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
    MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
    // verify shard info
    assertEquals("idx", shardId.getIndexName());
    assertEquals(0, shardId.getId());
    assertTrue(isPrimary);
    // verify current node info
    assertEquals(ShardRoutingState.STARTED, shardRoutingState);
    assertNotNull(currentNode);
    // verify unassigned info
    assertNull(unassignedInfo);
    // verify cluster info
    verifyClusterInfo(clusterInfo, includeDiskInfo, 2);
    // verify decision object
    assertFalse(allocateDecision.isDecisionTaken());
    assertTrue(moveDecision.isDecisionTaken());
    assertEquals(AllocationDecision.NO, moveDecision.getAllocationDecision());
    assertEquals("cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance", moveDecision.getExplanation());
    assertTrue(moveDecision.canRemain());
    assertFalse(moveDecision.forceMove());
    assertTrue(moveDecision.canRebalanceCluster());
    assertNotNull(moveDecision.getCanRemainDecision());
    assertNull(moveDecision.getTargetNode());
    assertEquals(1, moveDecision.getCurrentNodeRanking());
    // verifying cluster rebalance decision object
    assertNotNull(moveDecision.getClusterRebalanceDecision());
    assertEquals(Decision.Type.YES, moveDecision.getClusterRebalanceDecision().type());
    for (Decision d : moveDecision.getClusterRebalanceDecision().getDecisions()) {
        assertEquals(Decision.Type.YES, d.type());
        assertNotNull(d.getExplanation());
    }
    // verify node decisions
    assertEquals(1, moveDecision.getNodeDecisions().size());
    NodeAllocationResult result = moveDecision.getNodeDecisions().get(0);
    assertNotNull(result.getNode());
    assertEquals(1, result.getWeightRanking());
    assertEquals(AllocationDecision.WORSE_BALANCE, result.getNodeDecision());
    if (includeYesDecisions) {
        assertThat(result.getCanAllocateDecision().getDecisions().size(), greaterThan(0));
    } else {
        assertEquals(0, result.getCanAllocateDecision().getDecisions().size());
    }
    for (Decision d : result.getCanAllocateDecision().getDecisions()) {
        assertEquals(Decision.Type.YES, d.type());
        assertNotNull(d.getExplanation());
    }
    // verify JSON output
    try (XContentParser parser = getParser(explanation)) {
        verifyShardInfo(parser, true, includeDiskInfo, ShardRoutingState.STARTED);
        parser.nextToken();
        assertEquals("can_remain_on_current_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.YES.toString(), parser.text());
        parser.nextToken();
        assertEquals("can_rebalance_cluster", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.YES.toString(), parser.text());
        parser.nextToken();
        assertEquals("can_rebalance_to_other_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.NO.toString(), parser.text());
        parser.nextToken();
        assertEquals("rebalance_explanation", parser.currentName());
        parser.nextToken();
        assertEquals("cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance", parser.text());
        verifyNodeDecisions(parser, allNodeDecisions(AllocationDecision.WORSE_BALANCE, true), includeYesDecisions, false);
        assertEquals(Token.END_OBJECT, parser.nextToken());
    }
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ClusterInfo(org.elasticsearch.cluster.ClusterInfo) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) MoveDecision(org.elasticsearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) ShardRoutingState(org.elasticsearch.cluster.routing.ShardRoutingState) MoveDecision(org.elasticsearch.cluster.routing.allocation.MoveDecision) AllocationDecision(org.elasticsearch.cluster.routing.allocation.AllocationDecision) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Example 32 with Decision

use of org.elasticsearch.cluster.routing.allocation.decider.Decision in project crate by crate.

the class ReplicaShardAllocator method makeAllocationDecision.

@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
    if (isResponsibleFor(unassignedShard) == false) {
        // this allocator is not responsible for deciding on this shard
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    final RoutingNodes routingNodes = allocation.routingNodes();
    final boolean explain = allocation.debugDecision();
    // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
    Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
    Decision allocateDecision = result.v1();
    if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
        // only return early if we are not in explain mode, or we are in explain mode but we have not
        // yet attempted to fetch any shard data
        logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
    }
    AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
    if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
        allocation.setHasPendingAsyncFetch();
        List<NodeAllocationResult> nodeDecisions = null;
        if (explain) {
            nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
        }
        return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
    }
    ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
    if (primaryShard == null) {
        assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
    }
    assert primaryShard.currentNodeId() != null;
    final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
    final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
    if (primaryStore == null) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
        logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
    assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
    List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
    if (allocateDecision.type() != Decision.Type.YES) {
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
    } else if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
        // we only check on THROTTLE since we checked before before on NO
        Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
            logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
            return AllocateUnassignedDecision.throttle(nodeDecisions);
        } else {
            logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we found a match
            return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
        }
    } else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
        // if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
        // unassigned due to a node leaving, so we delay allocation of this replica to see if the
        // node with the shard copy will rejoin so we can re-use the copy it has
        logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
        long remainingDelayMillis = 0L;
        long totalDelayMillis = 0L;
        if (explain) {
            UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
            Metadata metadata = allocation.metadata();
            IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
            totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
            long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
            remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
        }
        return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
    }
    return AllocateUnassignedDecision.NOT_TAKEN;
}
Also used : NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) StoreFileMetadata(org.elasticsearch.index.store.StoreFileMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)

Example 33 with Decision

use of org.elasticsearch.cluster.routing.allocation.decider.Decision in project crate by crate.

the class ReplicaShardAllocator method canBeAllocatedToAtLeastOneNode.

/**
 * Determines if the shard can be allocated on at least one node based on the allocation deciders.
 *
 * Returns the best allocation decision for allocating the shard on any node (i.e. YES if at least one
 * node decided YES, THROTTLE if at least one node decided THROTTLE, and NO if none of the nodes decided
 * YES or THROTTLE).  If in explain mode, also returns the node-level explanations as the second element
 * in the returned tuple.
 */
private static Tuple<Decision, Map<String, NodeAllocationResult>> canBeAllocatedToAtLeastOneNode(ShardRouting shard, RoutingAllocation allocation) {
    Decision madeDecision = Decision.NO;
    final boolean explain = allocation.debugDecision();
    Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null;
    for (ObjectCursor<DiscoveryNode> cursor : allocation.nodes().getDataNodes().values()) {
        RoutingNode node = allocation.routingNodes().node(cursor.value.getId());
        if (node == null) {
            continue;
        }
        // if we can't allocate it on a node, ignore it, for example, this handles
        // cases for only allocating a replica after a primary
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.YES && madeDecision.type() != Decision.Type.YES) {
            if (explain) {
                madeDecision = decision;
            } else {
                return Tuple.tuple(decision, null);
            }
        } else if (madeDecision.type() == Decision.Type.NO && decision.type() == Decision.Type.THROTTLE) {
            madeDecision = decision;
        }
        if (explain) {
            nodeDecisions.put(node.nodeId(), new NodeAllocationResult(node.node(), null, decision));
        }
    }
    return Tuple.tuple(madeDecision, nodeDecisions);
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)

Example 34 with Decision

use of org.elasticsearch.cluster.routing.allocation.decider.Decision in project crate by crate.

the class ReplicaShardAllocator method findMatchingNodes.

private MatchingNodes findMatchingNodes(ShardRouting shard, RoutingAllocation allocation, boolean noMatchFailedNodes, DiscoveryNode primaryNode, TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore, AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> data, boolean explain) {
    Map<DiscoveryNode, MatchingNode> matchingNodes = new HashMap<>();
    Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null;
    for (Map.Entry<DiscoveryNode, NodeStoreFilesMetadata> nodeStoreEntry : data.getData().entrySet()) {
        DiscoveryNode discoNode = nodeStoreEntry.getKey();
        if (noMatchFailedNodes && shard.unassignedInfo() != null && shard.unassignedInfo().getFailedNodeIds().contains(discoNode.getId())) {
            continue;
        }
        TransportNodesListShardStoreMetadata.StoreFilesMetadata storeFilesMetadata = nodeStoreEntry.getValue().storeFilesMetadata();
        // we don't have any files at all, it is an empty index
        if (storeFilesMetadata.isEmpty()) {
            continue;
        }
        RoutingNode node = allocation.routingNodes().node(discoNode.getId());
        if (node == null) {
            continue;
        }
        // check if we can allocate on that node...
        // we only check for NO, since if this node is THROTTLING and it has enough "same data"
        // then we will try and assign it next time
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        MatchingNode matchingNode = null;
        if (explain) {
            matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
            ShardStoreInfo shardStoreInfo = new ShardStoreInfo(matchingNode.matchingBytes);
            nodeDecisions.put(node.nodeId(), new NodeAllocationResult(discoNode, shardStoreInfo, decision));
        }
        if (decision.type() == Decision.Type.NO) {
            continue;
        }
        if (matchingNode == null) {
            matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
        }
        matchingNodes.put(discoNode, matchingNode);
        if (logger.isTraceEnabled()) {
            if (matchingNode.isNoopRecovery) {
                logger.trace("{}: node [{}] can perform a noop recovery", shard, discoNode.getName());
            } else if (matchingNode.retainingSeqNo >= 0) {
                logger.trace("{}: node [{}] can perform operation-based recovery with retaining sequence number [{}]", shard, discoNode.getName(), matchingNode.retainingSeqNo);
            } else {
                logger.trace("{}: node [{}] has [{}/{}] bytes of re-usable data", shard, discoNode.getName(), new ByteSizeValue(matchingNode.matchingBytes), matchingNode.matchingBytes);
            }
        }
    }
    return new MatchingNodes(matchingNodes, nodeDecisions);
}
Also used : NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) ShardStoreInfo(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult.ShardStoreInfo) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)

Example 35 with Decision

use of org.elasticsearch.cluster.routing.allocation.decider.Decision in project crate by crate.

the class BaseGatewayShardAllocator method buildDecisionsForAllNodes.

/**
 * Builds decisions for all nodes in the cluster, so that the explain API can provide information on
 * allocation decisions for each node, while still waiting to allocate the shard (e.g. due to fetching shard data).
 */
protected static List<NodeAllocationResult> buildDecisionsForAllNodes(ShardRouting shard, RoutingAllocation allocation) {
    List<NodeAllocationResult> results = new ArrayList<>();
    for (RoutingNode node : allocation.routingNodes()) {
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        results.add(new NodeAllocationResult(node.node(), null, decision));
    }
    return results;
}
Also used : RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) ArrayList(java.util.ArrayList) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) AllocationDecision(org.elasticsearch.cluster.routing.allocation.AllocationDecision)

Aggregations

Decision (org.elasticsearch.cluster.routing.allocation.decider.Decision)41 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)24 RoutingNode (org.elasticsearch.cluster.routing.RoutingNode)18 AllocateUnassignedDecision (org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision)18 NodeAllocationResult (org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)16 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)12 ArrayList (java.util.ArrayList)10 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)10 AllocationDecision (org.elasticsearch.cluster.routing.allocation.AllocationDecision)10 ShardId (org.elasticsearch.index.shard.ShardId)9 ClusterInfo (org.elasticsearch.cluster.ClusterInfo)8 ShardRoutingState (org.elasticsearch.cluster.routing.ShardRoutingState)8 MoveDecision (org.elasticsearch.cluster.routing.allocation.MoveDecision)8 XContentParser (org.elasticsearch.common.xcontent.XContentParser)8 HashMap (java.util.HashMap)7 RoutingNodes (org.elasticsearch.cluster.routing.RoutingNodes)6 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)5 AllocationDeciders (org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders)5 Settings (org.elasticsearch.common.settings.Settings)5 Test (org.junit.Test)5