Search in sources :

Example 1 with UnassignedInfo

use of org.opensearch.cluster.routing.UnassignedInfo in project OpenSearch by opensearch-project.

the class ReplicaShardAllocator method makeAllocationDecision.

@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
    if (isResponsibleFor(unassignedShard) == false) {
        // this allocator is not responsible for deciding on this shard
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    final RoutingNodes routingNodes = allocation.routingNodes();
    final boolean explain = allocation.debugDecision();
    // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
    Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
    Decision allocateDecision = result.v1();
    if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
        // only return early if we are not in explain mode, or we are in explain mode but we have not
        // yet attempted to fetch any shard data
        logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
    }
    AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
    if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
        allocation.setHasPendingAsyncFetch();
        List<NodeAllocationResult> nodeDecisions = null;
        if (explain) {
            nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
        }
        return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
    }
    ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
    if (primaryShard == null) {
        assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
    }
    assert primaryShard.currentNodeId() != null;
    final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
    final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
    if (primaryStore == null) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
        logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
    assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
    List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
    if (allocateDecision.type() != Decision.Type.YES) {
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
    } else if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
        // we only check on THROTTLE since we checked before on NO
        Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
            logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
            return AllocateUnassignedDecision.throttle(nodeDecisions);
        } else {
            logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we found a match
            return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
        }
    } else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
        // if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
        // unassigned due to a node leaving, so we delay allocation of this replica to see if the
        // node with the shard copy will rejoin so we can re-use the copy it has
        logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
        long remainingDelayMillis = 0L;
        long totalDelayMillis = 0L;
        if (explain) {
            UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
            Metadata metadata = allocation.metadata();
            IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
            totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
            long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
            remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
        }
        return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
    }
    return AllocateUnassignedDecision.NOT_TAKEN;
}
Also used : NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) RoutingNode(org.opensearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult)

Example 2 with UnassignedInfo

use of org.opensearch.cluster.routing.UnassignedInfo in project OpenSearch by opensearch-project.

the class ClusterAllocationExplainIT method testAllocationFilteringPreventsShardMove.

public void testAllocationFilteringPreventsShardMove() throws Exception {
    logger.info("--> starting 2 nodes");
    internalCluster().startNodes(2);
    prepareIndex(1, 0);
    logger.info("--> setting up allocation filtering to prevent allocation to both nodes");
    client().admin().indices().prepareUpdateSettings("idx").setSettings(Settings.builder().put("index.routing.allocation.include._name", "non_existent_node")).get();
    boolean includeYesDecisions = randomBoolean();
    boolean includeDiskInfo = randomBoolean();
    ClusterAllocationExplanation explanation = runExplain(true, includeYesDecisions, includeDiskInfo);
    ShardId shardId = explanation.getShard();
    boolean isPrimary = explanation.isPrimary();
    ShardRoutingState shardRoutingState = explanation.getShardState();
    DiscoveryNode currentNode = explanation.getCurrentNode();
    UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
    ClusterInfo clusterInfo = explanation.getClusterInfo();
    AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
    MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
    // verify shard info
    assertEquals("idx", shardId.getIndexName());
    assertEquals(0, shardId.getId());
    assertTrue(isPrimary);
    // verify current node info
    assertEquals(ShardRoutingState.STARTED, shardRoutingState);
    assertNotNull(currentNode);
    // verify unassigned info
    assertNull(unassignedInfo);
    // verify cluster info
    verifyClusterInfo(clusterInfo, includeDiskInfo, 2);
    // verify decision object
    assertFalse(allocateDecision.isDecisionTaken());
    assertTrue(moveDecision.isDecisionTaken());
    assertEquals(AllocationDecision.NO, moveDecision.getAllocationDecision());
    assertEquals("cannot move shard to another node, even though it is not allowed to remain on its current node", moveDecision.getExplanation());
    assertFalse(moveDecision.canRemain());
    assertFalse(moveDecision.forceMove());
    assertFalse(moveDecision.canRebalanceCluster());
    assertNull(moveDecision.getClusterRebalanceDecision());
    assertNull(moveDecision.getTargetNode());
    assertEquals(0, moveDecision.getCurrentNodeRanking());
    // verifying can remain decision object
    assertNotNull(moveDecision.getCanRemainDecision());
    assertEquals(Decision.Type.NO, moveDecision.getCanRemainDecision().type());
    for (Decision d : moveDecision.getCanRemainDecision().getDecisions()) {
        if (d.label().equals("filter")) {
            assertEquals(Decision.Type.NO, d.type());
            assertEquals("node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]", d.getExplanation());
        } else {
            assertEquals(Decision.Type.YES, d.type());
            assertNotNull(d.getExplanation());
        }
    }
    // verify node decisions
    assertEquals(1, moveDecision.getNodeDecisions().size());
    NodeAllocationResult result = moveDecision.getNodeDecisions().get(0);
    assertNotNull(result.getNode());
    assertEquals(1, result.getWeightRanking());
    assertEquals(AllocationDecision.NO, result.getNodeDecision());
    if (includeYesDecisions) {
        assertThat(result.getCanAllocateDecision().getDecisions().size(), greaterThan(1));
    } else {
        assertEquals(1, result.getCanAllocateDecision().getDecisions().size());
    }
    for (Decision d : result.getCanAllocateDecision().getDecisions()) {
        if (d.label().equals("filter")) {
            assertEquals(Decision.Type.NO, d.type());
            assertEquals("node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]", d.getExplanation());
        } else {
            assertEquals(Decision.Type.YES, d.type());
            assertNotNull(d.getExplanation());
        }
    }
    // verify JSON output
    try (XContentParser parser = getParser(explanation)) {
        verifyShardInfo(parser, true, includeDiskInfo, ShardRoutingState.STARTED);
        parser.nextToken();
        assertEquals("can_remain_on_current_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.NO.toString(), parser.text());
        parser.nextToken();
        assertEquals("can_remain_decisions", parser.currentName());
        verifyDeciders(parser, AllocationDecision.NO);
        parser.nextToken();
        assertEquals("can_move_to_other_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.NO.toString(), parser.text());
        parser.nextToken();
        assertEquals("move_explanation", parser.currentName());
        parser.nextToken();
        assertEquals("cannot move shard to another node, even though it is not allowed to remain on its current node", parser.text());
        verifyNodeDecisions(parser, allNodeDecisions(AllocationDecision.NO, true), includeYesDecisions, false);
        assertEquals(Token.END_OBJECT, parser.nextToken());
    }
}
Also used : ShardId(org.opensearch.index.shard.ShardId) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ClusterInfo(org.opensearch.cluster.ClusterInfo) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) AllocationDecision(org.opensearch.cluster.routing.allocation.AllocationDecision) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult) XContentParser(org.opensearch.common.xcontent.XContentParser)

Example 3 with UnassignedInfo

use of org.opensearch.cluster.routing.UnassignedInfo in project OpenSearch by opensearch-project.

the class ClusterAllocationExplainIT method testBetterBalanceButCannotAllocate.

public void testBetterBalanceButCannotAllocate() throws Exception {
    logger.info("--> starting a single node");
    String firstNode = internalCluster().startNode();
    ensureStableCluster(1);
    prepareIndex(5, 0);
    logger.info("--> setting up allocation filtering to only allow allocation to the current node");
    client().admin().indices().prepareUpdateSettings("idx").setSettings(Settings.builder().put("index.routing.allocation.include._name", firstNode)).get();
    logger.info("--> starting another node, with filtering not allowing allocation to the new node, it should not get any shards");
    internalCluster().startNode();
    ensureStableCluster(2);
    boolean includeYesDecisions = randomBoolean();
    boolean includeDiskInfo = randomBoolean();
    ClusterAllocationExplanation explanation = runExplain(true, includeYesDecisions, includeDiskInfo);
    ShardId shardId = explanation.getShard();
    boolean isPrimary = explanation.isPrimary();
    ShardRoutingState shardRoutingState = explanation.getShardState();
    DiscoveryNode currentNode = explanation.getCurrentNode();
    UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
    ClusterInfo clusterInfo = explanation.getClusterInfo();
    AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
    MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
    // verify shard info
    assertEquals("idx", shardId.getIndexName());
    assertEquals(0, shardId.getId());
    assertTrue(isPrimary);
    // verify current node info
    assertEquals(ShardRoutingState.STARTED, shardRoutingState);
    assertNotNull(currentNode);
    // verify unassigned info
    assertNull(unassignedInfo);
    // verify cluster info
    verifyClusterInfo(clusterInfo, includeDiskInfo, 2);
    // verify decision object
    assertFalse(allocateDecision.isDecisionTaken());
    assertTrue(moveDecision.isDecisionTaken());
    assertEquals(AllocationDecision.NO, moveDecision.getAllocationDecision());
    assertEquals("cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance", moveDecision.getExplanation());
    assertTrue(moveDecision.canRemain());
    assertFalse(moveDecision.forceMove());
    assertTrue(moveDecision.canRebalanceCluster());
    assertNotNull(moveDecision.getCanRemainDecision());
    assertNull(moveDecision.getTargetNode());
    assertEquals(2, moveDecision.getCurrentNodeRanking());
    // verifying cluster rebalance decision object
    assertNotNull(moveDecision.getClusterRebalanceDecision());
    assertEquals(Decision.Type.YES, moveDecision.getClusterRebalanceDecision().type());
    for (Decision d : moveDecision.getClusterRebalanceDecision().getDecisions()) {
        assertEquals(Decision.Type.YES, d.type());
        assertNotNull(d.getExplanation());
    }
    // verify node decisions
    assertEquals(1, moveDecision.getNodeDecisions().size());
    NodeAllocationResult result = moveDecision.getNodeDecisions().get(0);
    assertNotNull(result.getNode());
    assertEquals(1, result.getWeightRanking());
    assertEquals(AllocationDecision.NO, result.getNodeDecision());
    if (includeYesDecisions) {
        assertThat(result.getCanAllocateDecision().getDecisions().size(), greaterThan(1));
    } else {
        assertEquals(1, result.getCanAllocateDecision().getDecisions().size());
    }
    String primaryNodeName = primaryNodeName();
    for (Decision d : result.getCanAllocateDecision().getDecisions()) {
        if (d.label().equals("filter")) {
            assertEquals(Decision.Type.NO, d.type());
            assertEquals("node does not match index setting [index.routing.allocation.include] filters [_name:\"" + primaryNodeName + "\"]", d.getExplanation());
        } else {
            assertEquals(Decision.Type.YES, d.type());
            assertNotNull(d.getExplanation());
        }
    }
    // verify JSON output
    try (XContentParser parser = getParser(explanation)) {
        verifyShardInfo(parser, true, includeDiskInfo, ShardRoutingState.STARTED);
        parser.nextToken();
        assertEquals("can_remain_on_current_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.YES.toString(), parser.text());
        parser.nextToken();
        assertEquals("can_rebalance_cluster", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.YES.toString(), parser.text());
        parser.nextToken();
        assertEquals("can_rebalance_to_other_node", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.NO.toString(), parser.text());
        parser.nextToken();
        assertEquals("rebalance_explanation", parser.currentName());
        parser.nextToken();
        assertEquals("cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance", parser.text());
        verifyNodeDecisions(parser, allNodeDecisions(AllocationDecision.NO, true), includeYesDecisions, false);
        assertEquals(Token.END_OBJECT, parser.nextToken());
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) Matchers.containsString(org.hamcrest.Matchers.containsString) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) AllocationDecision(org.opensearch.cluster.routing.allocation.AllocationDecision) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) ShardId(org.opensearch.index.shard.ShardId) ClusterInfo(org.opensearch.cluster.ClusterInfo) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult) XContentParser(org.opensearch.common.xcontent.XContentParser)

Example 4 with UnassignedInfo

use of org.opensearch.cluster.routing.UnassignedInfo in project OpenSearch by opensearch-project.

the class ClusterAllocationExplainIT method testUnassignedReplicaDelayedAllocation.

public void testUnassignedReplicaDelayedAllocation() throws Exception {
    logger.info("--> starting 3 nodes");
    internalCluster().startNodes(3);
    prepareIndex(1, 1);
    logger.info("--> stopping the node with the replica");
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(replicaNode().getName()));
    ensureStableCluster(2);
    assertBusy(() -> assertEquals(AllocationDecision.ALLOCATION_DELAYED, client().admin().cluster().prepareAllocationExplain().setIndex("idx").setShard(0).setPrimary(false).get().getExplanation().getShardAllocationDecision().getAllocateDecision().getAllocationDecision()));
    logger.info("--> observing delayed allocation...");
    boolean includeYesDecisions = randomBoolean();
    boolean includeDiskInfo = randomBoolean();
    ClusterAllocationExplanation explanation = runExplain(false, includeYesDecisions, includeDiskInfo);
    ShardId shardId = explanation.getShard();
    boolean isPrimary = explanation.isPrimary();
    ShardRoutingState shardRoutingState = explanation.getShardState();
    DiscoveryNode currentNode = explanation.getCurrentNode();
    UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
    ClusterInfo clusterInfo = explanation.getClusterInfo();
    AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
    MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
    // verify shard info
    assertEquals("idx", shardId.getIndexName());
    assertEquals(0, shardId.getId());
    assertFalse(isPrimary);
    // verify current node info
    assertNotEquals(ShardRoutingState.STARTED, shardRoutingState);
    assertNull(currentNode);
    // verify unassigned info
    assertNotNull(unassignedInfo);
    assertEquals(Reason.NODE_LEFT, unassignedInfo.getReason());
    assertEquals(AllocationStatus.NO_ATTEMPT, unassignedInfo.getLastAllocationStatus());
    // verify cluster info
    verifyClusterInfo(clusterInfo, includeDiskInfo, 2);
    // verify decision objects
    assertTrue(allocateDecision.isDecisionTaken());
    assertFalse(moveDecision.isDecisionTaken());
    assertEquals(AllocationDecision.ALLOCATION_DELAYED, allocateDecision.getAllocationDecision());
    assertThat(allocateDecision.getExplanation(), startsWith("cannot allocate because the cluster is still waiting"));
    assertThat(allocateDecision.getExplanation(), containsString("despite being allowed to allocate the shard to at least one other node"));
    assertNull(allocateDecision.getAllocationId());
    assertNull(allocateDecision.getTargetNode());
    assertEquals(60000L, allocateDecision.getConfiguredDelayInMillis());
    assertThat(allocateDecision.getRemainingDelayInMillis(), greaterThan(0L));
    assertEquals(2, allocateDecision.getNodeDecisions().size());
    String primaryNodeName = primaryNodeName();
    for (NodeAllocationResult result : allocateDecision.getNodeDecisions()) {
        assertNotNull(result.getNode());
        boolean nodeHoldingPrimary = result.getNode().getName().equals(primaryNodeName);
        if (nodeHoldingPrimary) {
            // shouldn't be able to allocate to the same node as the primary, the same shard decider should say no
            assertEquals(AllocationDecision.NO, result.getNodeDecision());
            assertThat(result.getShardStoreInfo().getMatchingBytes(), greaterThan(0L));
        } else {
            assertEquals(AllocationDecision.YES, result.getNodeDecision());
            assertNull(result.getShardStoreInfo());
        }
        if (includeYesDecisions) {
            assertThat(result.getCanAllocateDecision().getDecisions().size(), greaterThan(1));
        } else {
            // if we are not including YES decisions, then the node holding the primary should have 1 NO decision,
            // the other node should have zero NO decisions
            assertEquals(nodeHoldingPrimary ? 1 : 0, result.getCanAllocateDecision().getDecisions().size());
        }
        for (Decision d : result.getCanAllocateDecision().getDecisions()) {
            if (d.label().equals("same_shard") && nodeHoldingPrimary) {
                assertEquals(Decision.Type.NO, d.type());
                assertThat(d.getExplanation(), startsWith("a copy of this shard is already allocated to this node ["));
            } else {
                assertEquals(Decision.Type.YES, d.type());
                assertNotNull(d.getExplanation());
            }
        }
    }
    // verify JSON output
    try (XContentParser parser = getParser(explanation)) {
        verifyShardInfo(parser, false, includeDiskInfo, ShardRoutingState.UNASSIGNED);
        parser.nextToken();
        assertEquals("can_allocate", parser.currentName());
        parser.nextToken();
        assertEquals(AllocationDecision.ALLOCATION_DELAYED.toString(), parser.text());
        parser.nextToken();
        assertEquals("allocate_explanation", parser.currentName());
        parser.nextToken();
        assertThat(parser.text(), startsWith("cannot allocate because the cluster is still waiting"));
        parser.nextToken();
        assertEquals("configured_delay_in_millis", parser.currentName());
        parser.nextToken();
        assertEquals(60000L, parser.longValue());
        parser.nextToken();
        assertEquals("remaining_delay_in_millis", parser.currentName());
        parser.nextToken();
        assertThat(parser.longValue(), greaterThan(0L));
        Map<String, AllocationDecision> nodes = new HashMap<>();
        nodes.put(primaryNodeName, AllocationDecision.NO);
        String[] currentNodes = internalCluster().getNodeNames();
        nodes.put(currentNodes[0].equals(primaryNodeName) ? currentNodes[1] : currentNodes[0], AllocationDecision.YES);
        verifyNodeDecisions(parser, nodes, includeYesDecisions, true);
        assertEquals(Token.END_OBJECT, parser.nextToken());
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) HashMap(java.util.HashMap) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) Matchers.containsString(org.hamcrest.Matchers.containsString) AllocationDecision(org.opensearch.cluster.routing.allocation.AllocationDecision) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) ShardId(org.opensearch.index.shard.ShardId) ClusterInfo(org.opensearch.cluster.ClusterInfo) AllocationDecision(org.opensearch.cluster.routing.allocation.AllocationDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult) XContentParser(org.opensearch.common.xcontent.XContentParser)

Example 5 with UnassignedInfo

use of org.opensearch.cluster.routing.UnassignedInfo in project OpenSearch by opensearch-project.

the class ClusterAllocationExplainIT method testUnassignedPrimaryWithExistingIndex.

public void testUnassignedPrimaryWithExistingIndex() throws Exception {
    logger.info("--> starting 2 nodes");
    internalCluster().startNodes(2);
    prepareIndex(1, 0);
    logger.info("--> stopping the node with the primary");
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName()));
    ensureStableCluster(1);
    boolean includeYesDecisions = randomBoolean();
    boolean includeDiskInfo = randomBoolean();
    ClusterAllocationExplanation explanation = runExplain(true, includeYesDecisions, includeDiskInfo);
    ShardId shardId = explanation.getShard();
    boolean isPrimary = explanation.isPrimary();
    ShardRoutingState shardState = explanation.getShardState();
    DiscoveryNode currentNode = explanation.getCurrentNode();
    UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
    ClusterInfo clusterInfo = explanation.getClusterInfo();
    AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
    MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
    // verify shard info
    assertEquals("idx", shardId.getIndexName());
    assertEquals(0, shardId.getId());
    assertTrue(isPrimary);
    // verify current node info
    assertNotEquals(ShardRoutingState.STARTED, shardState);
    assertNull(currentNode);
    // verify unassigned info
    assertNotNull(unassignedInfo);
    assertEquals(Reason.NODE_LEFT, unassignedInfo.getReason());
    assertTrue(unassignedInfo.getLastAllocationStatus() == AllocationStatus.FETCHING_SHARD_DATA || unassignedInfo.getLastAllocationStatus() == AllocationStatus.NO_VALID_SHARD_COPY);
    // verify cluster info
    verifyClusterInfo(clusterInfo, includeDiskInfo, 1);
    // verify decision objects
    assertTrue(allocateDecision.isDecisionTaken());
    assertFalse(moveDecision.isDecisionTaken());
    assertTrue(allocateDecision.getAllocationDecision() == AllocationDecision.NO_VALID_SHARD_COPY || allocateDecision.getAllocationDecision() == AllocationDecision.AWAITING_INFO);
    if (allocateDecision.getAllocationDecision() == AllocationDecision.NO_VALID_SHARD_COPY) {
        assertEquals("cannot allocate because a previous copy of the primary shard existed but can no longer be " + "found on the nodes in the cluster", allocateDecision.getExplanation());
    } else {
        assertEquals("cannot allocate because information about existing shard data is still being retrieved from some of the nodes", allocateDecision.getExplanation());
    }
    assertNull(allocateDecision.getAllocationId());
    assertNull(allocateDecision.getTargetNode());
    assertEquals(0L, allocateDecision.getConfiguredDelayInMillis());
    assertEquals(0L, allocateDecision.getRemainingDelayInMillis());
    if (allocateDecision.getAllocationDecision() == AllocationDecision.NO_VALID_SHARD_COPY) {
        assertEquals(1, allocateDecision.getNodeDecisions().size());
        // verify JSON output
        try (XContentParser parser = getParser(explanation)) {
            verifyShardInfo(parser, true, includeDiskInfo, ShardRoutingState.UNASSIGNED);
            parser.nextToken();
            assertEquals("can_allocate", parser.currentName());
            parser.nextToken();
            assertEquals(AllocationDecision.NO_VALID_SHARD_COPY.toString(), parser.text());
            parser.nextToken();
            assertEquals("allocate_explanation", parser.currentName());
            parser.nextToken();
            assertEquals("cannot allocate because a previous copy of the primary shard existed but can no longer be found " + "on the nodes in the cluster", parser.text());
            verifyStaleShardCopyNodeDecisions(parser, 1, Collections.emptySet());
        }
    }
}
Also used : ShardId(org.opensearch.index.shard.ShardId) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ClusterInfo(org.opensearch.cluster.ClusterInfo) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) MoveDecision(org.opensearch.cluster.routing.allocation.MoveDecision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) XContentParser(org.opensearch.common.xcontent.XContentParser)

Aggregations

UnassignedInfo (org.opensearch.cluster.routing.UnassignedInfo)55 ShardRouting (org.opensearch.cluster.routing.ShardRouting)35 ShardId (org.opensearch.index.shard.ShardId)31 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)27 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)25 Index (org.opensearch.index.Index)16 Matchers.containsString (org.hamcrest.Matchers.containsString)15 ClusterState (org.opensearch.cluster.ClusterState)15 ShardRoutingState (org.opensearch.cluster.routing.ShardRoutingState)15 ClusterInfo (org.opensearch.cluster.ClusterInfo)14 Metadata (org.opensearch.cluster.metadata.Metadata)11 RoutingNodes (org.opensearch.cluster.routing.RoutingNodes)11 AllocateUnassignedDecision (org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision)11 Settings (org.opensearch.common.settings.Settings)11 RoutingNode (org.opensearch.cluster.routing.RoutingNode)10 RoutingTable (org.opensearch.cluster.routing.RoutingTable)10 MoveDecision (org.opensearch.cluster.routing.allocation.MoveDecision)10 NodeAllocationResult (org.opensearch.cluster.routing.allocation.NodeAllocationResult)10 RoutingAllocation (org.opensearch.cluster.routing.allocation.RoutingAllocation)10 XContentParser (org.opensearch.common.xcontent.XContentParser)10