Search in sources :

Example 96 with RoutingNodes

use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.

the class AllocationService method applyFailedShards.

/**
 * Applies the failed shards. Note, only assigned ShardRouting instances that exist in the routing table should be
 * provided as parameter. Also applies a list of allocation ids to remove from the in-sync set for shard copies for which there
 * are no routing entries in the routing table.
 *
 * <p>
 * If the same instance of ClusterState is returned, then no change has been made.</p>
 */
public ClusterState applyFailedShards(final ClusterState clusterState, final List<FailedShard> failedShards, final List<StaleShard> staleShards) {
    if (staleShards.isEmpty() && failedShards.isEmpty()) {
        return clusterState;
    }
    ClusterState tmpState = IndexMetadataUpdater.removeStaleIdsWithoutRoutings(clusterState, staleShards, LOGGER);
    RoutingNodes routingNodes = getMutableRoutingNodes(tmpState);
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    long currentNanoTime = currentNanoTime();
    RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, tmpState, clusterInfoService.getClusterInfo(), currentNanoTime);
    for (FailedShard failedShardEntry : failedShards) {
        ShardRouting shardToFail = failedShardEntry.getRoutingEntry();
        IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardToFail.shardId().getIndex());
        allocation.addIgnoreShardForNode(shardToFail.shardId(), shardToFail.currentNodeId());
        // failing a primary also fails initializing replica shards, re-resolve ShardRouting
        ShardRouting failedShard = routingNodes.getByAllocationId(shardToFail.shardId(), shardToFail.allocationId().getId());
        if (failedShard != null) {
            if (failedShard != shardToFail) {
                LOGGER.trace("{} shard routing modified in an earlier iteration (previous: {}, current: {})", shardToFail.shardId(), shardToFail, failedShard);
            }
            int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
            final Set<String> failedNodeIds;
            if (failedShard.unassignedInfo() != null) {
                failedNodeIds = new HashSet<>(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
                failedNodeIds.addAll(failedShard.unassignedInfo().getFailedNodeIds());
                failedNodeIds.add(failedShard.currentNodeId());
            } else {
                failedNodeIds = Collections.emptySet();
            }
            String message = "failed shard on node [" + shardToFail.currentNodeId() + "]: " + failedShardEntry.getMessage();
            UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message, failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT, failedNodeIds);
            if (failedShardEntry.markAsStale()) {
                allocation.removeAllocationId(failedShard);
            }
            LOGGER.warn(new ParameterizedMessage("failing shard [{}]", failedShardEntry), failedShardEntry.getFailure());
            routingNodes.failShard(LOGGER, failedShard, unassignedInfo, indexMetadata, allocation.changes());
        } else {
            LOGGER.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);
        }
    }
    gatewayAllocator.applyFailedShards(allocation, failedShards);
    reroute(allocation);
    String failedShardsAsString = firstListElementsToCommaDelimitedString(failedShards, s -> s.getRoutingEntry().shardId().toString(), LOGGER.isDebugEnabled());
    return buildResultAndLogHealthChange(clusterState, allocation, "shards failed [" + failedShardsAsString + "]");
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata)

Example 97 with RoutingNodes

use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.

the class AllocationService method disassociateDeadNodes.

/**
 * unassigned an shards that are associated with nodes that are no longer part of the cluster, potentially promoting replicas
 * if needed.
 */
public ClusterState disassociateDeadNodes(ClusterState clusterState, boolean reroute, String reason) {
    RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime());
    // first, clear from the shards any node id they used to belong to that is now dead
    disassociateDeadNodes(allocation);
    if (allocation.routingNodesChanged()) {
        clusterState = buildResult(clusterState, allocation);
    }
    if (reroute) {
        return reroute(clusterState, reason);
    } else {
        return clusterState;
    }
}
Also used : RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes)

Example 98 with RoutingNodes

use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.

the class AllocationService method reroute.

/**
 * Reroutes the routing table based on the live nodes.
 * <p>
 * If the same instance of ClusterState is returned, then no change has been made.
 */
public ClusterState reroute(ClusterState clusterState, String reason) {
    ClusterState fixedClusterState = adaptAutoExpandReplicas(clusterState);
    RoutingNodes routingNodes = getMutableRoutingNodes(fixedClusterState);
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, fixedClusterState, clusterInfoService.getClusterInfo(), currentNanoTime());
    reroute(allocation);
    if (fixedClusterState == clusterState && allocation.routingNodesChanged() == false) {
        return clusterState;
    }
    return buildResultAndLogHealthChange(clusterState, allocation, reason);
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes)

Example 99 with RoutingNodes

use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.

the class DiskThresholdDeciderTests method logShardStates.

public void logShardStates(ClusterState state) {
    RoutingNodes rn = state.getRoutingNodes();
    logger.info("--> counts: total: {}, unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shards(shard -> true).size(), rn.shardsWithState(UNASSIGNED).size(), rn.shardsWithState(INITIALIZING).size(), rn.shardsWithState(RELOCATING).size(), rn.shardsWithState(STARTED).size());
    logger.info("--> unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shardsWithState(UNASSIGNED), rn.shardsWithState(INITIALIZING), rn.shardsWithState(RELOCATING), rn.shardsWithState(STARTED));
}
Also used : RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes)

Example 100 with RoutingNodes

use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.

the class DiskThresholdDeciderTests method testCanRemainWithShardRelocatingAway.

public void testCanRemainWithShardRelocatingAway() {
    Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "70%").build();
    // We have an index with 2 primary shards each taking 40 bytes. Each node has 100 bytes available
    ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
    // 80% used
    usagesBuilder.put("node1", new DiskUsage("node1", "n1", "/dev/null", 100, 20));
    // 0% used
    usagesBuilder.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 100));
    ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
    ImmutableOpenMap.Builder<String, Long> shardSizesBuilder = ImmutableOpenMap.builder();
    shardSizesBuilder.put("[test][0][p]", 40L);
    shardSizesBuilder.put("[test][1][p]", 40L);
    shardSizesBuilder.put("[foo][0][p]", 10L);
    ImmutableOpenMap<String, Long> shardSizes = shardSizesBuilder.build();
    final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes);
    DiskThresholdDecider diskThresholdDecider = makeDecider(diskSettings);
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).put(IndexMetadata.builder("foo").settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "false"))).build();
    RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).addAsNew(metadata.index("foo")).build();
    DiscoveryNode discoveryNode1 = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, Version.CURRENT);
    DiscoveryNode discoveryNode2 = new DiscoveryNode("node2", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, Version.CURRENT);
    DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(discoveryNode1).add(discoveryNode2).build();
    ClusterState baseClusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).nodes(discoveryNodes).build();
    // Two shards consuming each 80% of disk space while 70% is allowed, so shard 0 isn't allowed here
    ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED);
    ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", null, true, ShardRoutingState.STARTED);
    RoutingNode firstRoutingNode = new RoutingNode("node1", discoveryNode1, firstRouting, secondRouting);
    RoutingTable.Builder builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
    ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build();
    RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
    routingAllocation.debugDecision(true);
    Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
    assertThat(decision.type(), equalTo(Decision.Type.NO));
    assertThat(decision.getExplanation(), containsString("the shard cannot remain on this node because it is above the high watermark cluster setting " + "[cluster.routing.allocation.disk.watermark.high=70%] and there is less than the required [30.0%] free disk on node, " + "actual free: [20.0%]"));
    // Two shards consuming each 80% of disk space while 70% is allowed, but one is relocating, so shard 0 can stay
    firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED);
    secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", "node2", true, ShardRoutingState.RELOCATING);
    ShardRouting fooRouting = TestShardRouting.newShardRouting("foo", 0, null, true, ShardRoutingState.UNASSIGNED);
    firstRoutingNode = new RoutingNode("node1", discoveryNode1, firstRouting, secondRouting);
    builder = RoutingTable.builder().add(IndexRoutingTable.builder(firstRouting.index()).addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()).addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()));
    clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build();
    routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime());
    routingAllocation.debugDecision(true);
    decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation);
    assertThat(decision.type(), equalTo(Decision.Type.YES));
    assertEquals("there is enough disk on this node for the shard to remain, free: [60b]", decision.getExplanation());
    decision = diskThresholdDecider.canAllocate(fooRouting, firstRoutingNode, routingAllocation);
    assertThat(decision.type(), equalTo(Decision.Type.NO));
    if (fooRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE) {
        assertThat(decision.getExplanation(), containsString("the node is above the high watermark cluster setting [cluster.routing.allocation.disk.watermark.high=70%], using " + "more disk space than the maximum allowed [70.0%], actual free: [20.0%]"));
    } else {
        assertThat(decision.getExplanation(), containsString("the node is above the low watermark cluster setting [cluster.routing.allocation.disk.watermark.low=60%], using more " + "disk space than the maximum allowed [60.0%], actual free: [20.0%]"));
    }
    // Creating AllocationService instance and the services it depends on...
    ClusterInfoService cis = () -> {
        logger.info("--> calling fake getClusterInfo");
        return clusterInfo;
    };
    AllocationDeciders deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), diskThresholdDecider)));
    AllocationService strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), cis);
    // Ensure that the reroute call doesn't alter the routing table, since the first primary is relocating away
    // and therefor we will have sufficient disk space on node1.
    ClusterState result = strategy.reroute(clusterState, "reroute");
    assertThat(result, equalTo(clusterState));
    assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().state(), equalTo(STARTED));
    assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().currentNodeId(), equalTo("node1"));
    assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().relocatingNodeId(), nullValue());
    assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().state(), equalTo(RELOCATING));
    assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().currentNodeId(), equalTo("node1"));
    assertThat(result.routingTable().index("test").getShards().get(1).primaryShard().relocatingNodeId(), equalTo("node2"));
}
Also used : IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) Matchers.containsString(org.hamcrest.Matchers.containsString) DiskUsage(org.elasticsearch.cluster.DiskUsage) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) DiskThresholdSettings(org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings) Settings(org.elasticsearch.common.settings.Settings) ClusterSettings(org.elasticsearch.common.settings.ClusterSettings) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) AllocationService(org.elasticsearch.cluster.routing.allocation.AllocationService) TestGatewayAllocator(org.elasticsearch.test.gateway.TestGatewayAllocator) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterInfoService(org.elasticsearch.cluster.ClusterInfoService) BalancedShardsAllocator(org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator) ClusterInfo(org.elasticsearch.cluster.ClusterInfo) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) RoutingTable(org.elasticsearch.cluster.routing.RoutingTable) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) TestShardRouting(org.elasticsearch.cluster.routing.TestShardRouting) RoutingAllocation(org.elasticsearch.cluster.routing.allocation.RoutingAllocation)

Aggregations

RoutingNodes (org.elasticsearch.cluster.routing.RoutingNodes)104 ClusterState (org.elasticsearch.cluster.ClusterState)72 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)64 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)55 MetaData (org.elasticsearch.cluster.metadata.MetaData)53 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)39 RoutingNode (org.elasticsearch.cluster.routing.RoutingNode)24 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)20 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)15 IndexShardRoutingTable (org.elasticsearch.cluster.routing.IndexShardRoutingTable)15 DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)14 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)14 RoutingAllocation (org.elasticsearch.cluster.routing.allocation.RoutingAllocation)14 Metadata (org.elasticsearch.cluster.metadata.Metadata)13 IndexRoutingTable (org.elasticsearch.cluster.routing.IndexRoutingTable)11 TestShardRouting (org.elasticsearch.cluster.routing.TestShardRouting)11 ArrayList (java.util.ArrayList)9 Settings (org.elasticsearch.common.settings.Settings)9 ClusterInfo (org.elasticsearch.cluster.ClusterInfo)8 RerouteExplanation (org.elasticsearch.cluster.routing.allocation.RerouteExplanation)8