Search in sources :

Example 61 with UnassignedInfo

use of org.elasticsearch.cluster.routing.UnassignedInfo in project crate by crate.

the class ClusterShardHealth method getInactivePrimaryHealth.

/**
 * Checks if an inactive primary shard should cause the cluster health to go RED.
 *
 * An inactive primary shard in an index should cause the cluster health to be RED to make it visible that some of the existing data is
 * unavailable. In case of index creation, snapshot restore or index shrinking, which are unexceptional events in the cluster lifecycle,
 * cluster health should not turn RED for the time where primaries are still in the initializing state but go to YELLOW instead.
 * However, in case of exceptional events, for example when the primary shard cannot be assigned to a node or initialization fails at
 * some point, cluster health should still turn RED.
 *
 * NB: this method should *not* be called on active shards nor on non-primary shards.
 */
public static ClusterHealthStatus getInactivePrimaryHealth(final ShardRouting shardRouting) {
    assert shardRouting.primary() : "cannot invoke on a replica shard: " + shardRouting;
    assert shardRouting.active() == false : "cannot invoke on an active shard: " + shardRouting;
    assert shardRouting.unassignedInfo() != null : "cannot invoke on a shard with no UnassignedInfo: " + shardRouting;
    assert shardRouting.recoverySource() != null : "cannot invoke on a shard that has no recovery source" + shardRouting;
    final UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
    RecoverySource.Type recoveryType = shardRouting.recoverySource().getType();
    if (unassignedInfo.getLastAllocationStatus() != AllocationStatus.DECIDERS_NO && unassignedInfo.getNumFailedAllocations() == 0 && (recoveryType == RecoverySource.Type.EMPTY_STORE || recoveryType == RecoverySource.Type.LOCAL_SHARDS || recoveryType == RecoverySource.Type.SNAPSHOT)) {
        return ClusterHealthStatus.YELLOW;
    } else {
        return ClusterHealthStatus.RED;
    }
}
Also used : UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) RecoverySource(org.elasticsearch.cluster.routing.RecoverySource)

Example 62 with UnassignedInfo

use of org.elasticsearch.cluster.routing.UnassignedInfo in project crate by crate.

the class AllocationService method applyFailedShards.

/**
 * Applies the failed shards. Note, only assigned ShardRouting instances that exist in the routing table should be
 * provided as parameter. Also applies a list of allocation ids to remove from the in-sync set for shard copies for which there
 * are no routing entries in the routing table.
 *
 * <p>
 * If the same instance of ClusterState is returned, then no change has been made.</p>
 */
public ClusterState applyFailedShards(final ClusterState clusterState, final List<FailedShard> failedShards, final List<StaleShard> staleShards) {
    if (staleShards.isEmpty() && failedShards.isEmpty()) {
        return clusterState;
    }
    ClusterState tmpState = IndexMetadataUpdater.removeStaleIdsWithoutRoutings(clusterState, staleShards, LOGGER);
    RoutingNodes routingNodes = getMutableRoutingNodes(tmpState);
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    long currentNanoTime = currentNanoTime();
    RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, tmpState, clusterInfoService.getClusterInfo(), currentNanoTime);
    for (FailedShard failedShardEntry : failedShards) {
        ShardRouting shardToFail = failedShardEntry.getRoutingEntry();
        IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardToFail.shardId().getIndex());
        allocation.addIgnoreShardForNode(shardToFail.shardId(), shardToFail.currentNodeId());
        // failing a primary also fails initializing replica shards, re-resolve ShardRouting
        ShardRouting failedShard = routingNodes.getByAllocationId(shardToFail.shardId(), shardToFail.allocationId().getId());
        if (failedShard != null) {
            if (failedShard != shardToFail) {
                LOGGER.trace("{} shard routing modified in an earlier iteration (previous: {}, current: {})", shardToFail.shardId(), shardToFail, failedShard);
            }
            int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
            final Set<String> failedNodeIds;
            if (failedShard.unassignedInfo() != null) {
                failedNodeIds = new HashSet<>(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
                failedNodeIds.addAll(failedShard.unassignedInfo().getFailedNodeIds());
                failedNodeIds.add(failedShard.currentNodeId());
            } else {
                failedNodeIds = Collections.emptySet();
            }
            String message = "failed shard on node [" + shardToFail.currentNodeId() + "]: " + failedShardEntry.getMessage();
            UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message, failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT, failedNodeIds);
            if (failedShardEntry.markAsStale()) {
                allocation.removeAllocationId(failedShard);
            }
            LOGGER.warn(new ParameterizedMessage("failing shard [{}]", failedShardEntry), failedShardEntry.getFailure());
            routingNodes.failShard(LOGGER, failedShard, unassignedInfo, indexMetadata, allocation.changes());
        } else {
            LOGGER.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);
        }
    }
    gatewayAllocator.applyFailedShards(allocation, failedShards);
    reroute(allocation);
    String failedShardsAsString = firstListElementsToCommaDelimitedString(failedShards, s -> s.getRoutingEntry().shardId().toString(), LOGGER.isDebugEnabled());
    return buildResultAndLogHealthChange(clusterState, allocation, "shards failed [" + failedShardsAsString + "]");
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata)

Example 63 with UnassignedInfo

use of org.elasticsearch.cluster.routing.UnassignedInfo in project crate by crate.

the class AllocationService method disassociateDeadNodes.

private void disassociateDeadNodes(RoutingAllocation allocation) {
    for (Iterator<RoutingNode> it = allocation.routingNodes().mutableIterator(); it.hasNext(); ) {
        RoutingNode node = it.next();
        if (allocation.nodes().getDataNodes().containsKey(node.nodeId())) {
            // its a live node, continue
            continue;
        }
        // now, go over all the shards routing on the node, and fail them
        for (ShardRouting shardRouting : node.copyShards()) {
            final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
            boolean delayed = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).nanos() > 0;
            UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left [" + node.nodeId() + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), delayed, AllocationStatus.NO_ATTEMPT, Collections.emptySet());
            allocation.routingNodes().failShard(LOGGER, shardRouting, unassignedInfo, indexMetadata, allocation.changes());
        }
        // its a dead node, remove it, note, its important to remove it *after* we apply failed shard
        // since it relies on the fact that the RoutingNode exists in the list of nodes
        it.remove();
    }
}
Also used : RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata)

Example 64 with UnassignedInfo

use of org.elasticsearch.cluster.routing.UnassignedInfo in project crate by crate.

the class SysShardsExpressionsTest method mockIndexShard.

private IndexShard mockIndexShard() {
    IndexService indexService = mock(IndexService.class);
    indexUUID = UUIDs.randomBase64UUID();
    Index index = new Index(indexName, indexUUID);
    ShardId shardId = new ShardId(indexName, indexUUID, 1);
    IndexShard indexShard = mock(IndexShard.class);
    when(indexService.index()).thenReturn(index);
    when(indexShard.shardId()).thenReturn(shardId);
    when(indexShard.state()).thenReturn(IndexShardState.STARTED);
    StoreStats storeStats = new StoreStats(123456L);
    when(indexShard.storeStats()).thenReturn(storeStats);
    Path dataPath = Paths.get("/dummy/" + indexUUID + "/" + shardId.id());
    when(indexShard.shardPath()).thenReturn(new ShardPath(false, dataPath, dataPath, shardId));
    DocsStats docsStats = new DocsStats(654321L, 0L, 200L);
    when(indexShard.docStats()).thenReturn(docsStats).thenThrow(IllegalIndexShardStateException.class);
    ShardRouting shardRouting = ShardRouting.newUnassigned(shardId, true, RecoverySource.PeerRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
    shardRouting = ShardRoutingHelper.initialize(shardRouting, "node1");
    shardRouting = ShardRoutingHelper.moveToStarted(shardRouting);
    shardRouting = ShardRoutingHelper.relocate(shardRouting, "node_X");
    when(indexShard.routingEntry()).thenReturn(shardRouting);
    when(indexShard.minimumCompatibleVersion()).thenReturn(Version.LATEST);
    RecoveryState recoveryState = mock(RecoveryState.class);
    when(indexShard.recoveryState()).thenReturn(recoveryState);
    RecoveryState.Index recoveryStateIndex = mock(RecoveryState.Index.class);
    RecoveryState.Timer recoveryStateTimer = mock(RecoveryState.Timer.class);
    when(recoveryState.getRecoverySource()).thenReturn(RecoverySource.PeerRecoverySource.INSTANCE);
    when(recoveryState.getIndex()).thenReturn(recoveryStateIndex);
    when(recoveryState.getStage()).thenReturn(RecoveryState.Stage.DONE);
    when(recoveryState.getTimer()).thenReturn(recoveryStateTimer);
    when(recoveryStateIndex.totalBytes()).thenReturn(2048L);
    when(recoveryStateIndex.reusedBytes()).thenReturn(1024L);
    when(recoveryStateIndex.recoveredBytes()).thenReturn(1024L);
    when(recoveryStateIndex.totalFileCount()).thenReturn(2);
    when(recoveryStateIndex.reusedFileCount()).thenReturn(1);
    when(recoveryStateIndex.recoveredFileCount()).thenReturn(1);
    when(recoveryStateTimer.time()).thenReturn(10000L);
    return indexShard;
}
Also used : Path(java.nio.file.Path) ShardPath(org.elasticsearch.index.shard.ShardPath) StoreStats(org.elasticsearch.index.store.StoreStats) IndexService(org.elasticsearch.index.IndexService) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) IndexShard(org.elasticsearch.index.shard.IndexShard) Index(org.elasticsearch.index.Index) ShardId(org.elasticsearch.index.shard.ShardId) ShardPath(org.elasticsearch.index.shard.ShardPath) DocsStats(org.elasticsearch.index.shard.DocsStats) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState)

Example 65 with UnassignedInfo

use of org.elasticsearch.cluster.routing.UnassignedInfo in project crate by crate.

the class DiskThresholdDeciderUnitTests method testCanRemainUsesLeastAvailableSpace.

@Test
public void testCanRemainUsesLeastAvailableSpace() {
    ClusterSettings nss = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    DiskThresholdDecider decider = new DiskThresholdDecider(Settings.EMPTY, nss);
    ImmutableOpenMap.Builder<ShardRouting, String> shardRoutingMap = ImmutableOpenMap.builder();
    DiscoveryNode node_0 = new DiscoveryNode("node_0", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
    DiscoveryNode node_1 = new DiscoveryNode("node_1", buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
    final IndexMetadata indexMetadata = metadata.index("test");
    ShardRouting test_0 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 0), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
    test_0 = ShardRoutingHelper.initialize(test_0, node_0.getId());
    test_0 = ShardRoutingHelper.moveToStarted(test_0);
    shardRoutingMap.put(test_0, "/node0/least");
    ShardRouting test_1 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 1), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
    test_1 = ShardRoutingHelper.initialize(test_1, node_1.getId());
    test_1 = ShardRoutingHelper.moveToStarted(test_1);
    shardRoutingMap.put(test_1, "/node1/least");
    ShardRouting test_2 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 2), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
    test_2 = ShardRoutingHelper.initialize(test_2, node_1.getId());
    test_2 = ShardRoutingHelper.moveToStarted(test_2);
    shardRoutingMap.put(test_2, "/node1/most");
    ShardRouting test_3 = ShardRouting.newUnassigned(new ShardId(indexMetadata.getIndex(), 3), true, EmptyStoreRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
    test_3 = ShardRoutingHelper.initialize(test_3, node_1.getId());
    test_3 = ShardRoutingHelper.moveToStarted(test_3);
    // Intentionally not in the shardRoutingMap. We want to test what happens when we don't know where it is.
    RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetadata).build();
    ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
    logger.info("--> adding two nodes");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(node_0).add(node_1)).build();
    // actual test -- after all that bloat :)
    ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsages = ImmutableOpenMap.builder();
    // 90% used
    leastAvailableUsages.put("node_0", new DiskUsage("node_0", "node_0", "/node0/least", 100, 10));
    // 91% used
    leastAvailableUsages.put("node_1", new DiskUsage("node_1", "node_1", "/node1/least", 100, 9));
    ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsage = ImmutableOpenMap.builder();
    // 10% used
    mostAvailableUsage.put("node_0", new DiskUsage("node_0", "node_0", "/node0/most", 100, 90));
    // 10% used
    mostAvailableUsage.put("node_1", new DiskUsage("node_1", "node_1", "/node1/most", 100, 90));
    ImmutableOpenMap.Builder<String, Long> shardSizes = ImmutableOpenMap.builder();
    // 10 bytes
    shardSizes.put("[test][0][p]", 10L);
    shardSizes.put("[test][1][p]", 10L);
    shardSizes.put("[test][2][p]", 10L);
    final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build());
    RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime());
    allocation.debugDecision(true);
    Decision decision = decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation);
    assertEquals(Decision.Type.YES, decision.type());
    assertThat(decision.getExplanation(), containsString("there is enough disk on this node for the shard to remain, free: [10b]"));
    decision = decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation);
    assertEquals(Decision.Type.NO, decision.type());
    assertThat(decision.getExplanation(), containsString("the shard cannot remain on this node because it is " + "above the high watermark cluster setting [cluster.routing.allocation.disk.watermark.high=90%] and there is less than " + "the required [10.0%] free disk on node, actual free: [9.0%]"));
    try {
        decider.canRemain(test_0, new RoutingNode("node_1", node_1), allocation);
        fail("not allocated on this node");
    } catch (IllegalArgumentException ex) {
    // not allocated on that node
    }
    try {
        decider.canRemain(test_1, new RoutingNode("node_0", node_0), allocation);
        fail("not allocated on this node");
    } catch (IllegalArgumentException ex) {
    // not allocated on that node
    }
    decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation);
    assertEquals("can stay since allocated on a different path with enough space", Decision.Type.YES, decision.type());
    assertThat(decision.getExplanation(), containsString("this shard is not allocated on the most utilized disk and can remain"));
    decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation);
    assertEquals("can stay since we don't have information about this shard", Decision.Type.YES, decision.type());
    assertThat(decision.getExplanation(), containsString("this shard is not allocated on the most utilized disk and can remain"));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) Matchers.containsString(org.hamcrest.Matchers.containsString) DiskUsage(org.elasticsearch.cluster.DiskUsage) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) ShardId(org.elasticsearch.index.shard.ShardId) DevNullClusterInfo(org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDeciderTests.DevNullClusterInfo) ClusterInfo(org.elasticsearch.cluster.ClusterInfo) RoutingTable(org.elasticsearch.cluster.routing.RoutingTable) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) RoutingAllocation(org.elasticsearch.cluster.routing.allocation.RoutingAllocation) Test(org.junit.Test)

Aggregations

UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)68 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)45 ShardId (org.elasticsearch.index.shard.ShardId)36 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)30 ClusterState (org.elasticsearch.cluster.ClusterState)20 Index (org.elasticsearch.index.Index)19 ClusterInfo (org.elasticsearch.cluster.ClusterInfo)18 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)18 RoutingNode (org.elasticsearch.cluster.routing.RoutingNode)18 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)18 RoutingNodes (org.elasticsearch.cluster.routing.RoutingNodes)17 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)16 RoutingAllocation (org.elasticsearch.cluster.routing.allocation.RoutingAllocation)16 Matchers.containsString (org.hamcrest.Matchers.containsString)16 ShardRoutingState (org.elasticsearch.cluster.routing.ShardRoutingState)14 AllocateUnassignedDecision (org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision)12 NodeAllocationResult (org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)11 Decision (org.elasticsearch.cluster.routing.allocation.decider.Decision)11 ImmutableOpenMap (org.elasticsearch.common.collect.ImmutableOpenMap)11 MetaData (org.elasticsearch.cluster.metadata.MetaData)10