Search in sources :

Example 1 with AllocationCommands

use of org.opensearch.cluster.routing.allocation.command.AllocationCommands in project OpenSearch by opensearch-project.

the class AwarenessAllocationTests method testUnassignedShardsWithUnbalancedZones.

public void testUnassignedShardsWithUnbalancedZones() {
    AllocationService strategy = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.awareness.attributes", "zone").build());
    logger.info("Building initial routing table for 'testUnassignedShardsWithUnbalancedZones'");
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(4)).build();
    RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
    ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
    logger.info("--> adding 5 nodes in different zones and do rerouting");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("A-0", singletonMap("zone", "a"))).add(newNode("A-1", singletonMap("zone", "a"))).add(newNode("A-2", singletonMap("zone", "a"))).add(newNode("A-3", singletonMap("zone", "a"))).add(newNode("A-4", singletonMap("zone", "a"))).add(newNode("B-0", singletonMap("zone", "b")))).build();
    clusterState = strategy.reroute(clusterState, "reroute");
    assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
    logger.info("--> start the shard (primary)");
    clusterState = startInitializingShardsAndReroute(strategy, clusterState);
    assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(3));
    // Unassigned shard is expected.
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
    // Cancel all initializing shards and move started primary to another node.
    AllocationCommands commands = new AllocationCommands();
    String primaryNode = null;
    for (ShardRouting routing : clusterState.routingTable().allShards()) {
        if (routing.primary()) {
            primaryNode = routing.currentNodeId();
        } else if (routing.initializing()) {
            commands.add(new CancelAllocationCommand(routing.shardId().getIndexName(), routing.id(), routing.currentNodeId(), false));
        }
    }
    commands.add(new MoveAllocationCommand("test", 0, primaryNode, "A-4"));
    clusterState = strategy.reroute(clusterState, commands, false, false).getClusterState();
    assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().shardsWithState(RELOCATING).size(), equalTo(1));
    // +1 for relocating shard.
    assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(4));
    // Still 1 unassigned.
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingTable(org.opensearch.cluster.routing.RoutingTable) CancelAllocationCommand(org.opensearch.cluster.routing.allocation.command.CancelAllocationCommand) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) ShardRouting(org.opensearch.cluster.routing.ShardRouting) AllocationCommands(org.opensearch.cluster.routing.allocation.command.AllocationCommands)

Example 2 with AllocationCommands

use of org.opensearch.cluster.routing.allocation.command.AllocationCommands in project OpenSearch by opensearch-project.

the class DeadNodesAllocationTests method testDeadNodeWhileRelocatingOnFromNode.

public void testDeadNodeWhileRelocatingOnFromNode() {
    AllocationService allocation = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").build());
    logger.info("--> building initial routing table");
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
    RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
    ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
    logger.info("--> adding 2 nodes on same rack and do rerouting");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    // starting primaries
    clusterState = startInitializingShardsAndReroute(allocation, clusterState);
    // starting replicas
    clusterState = startInitializingShardsAndReroute(allocation, clusterState);
    logger.info("--> verifying all is allocated");
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
    logger.info("--> adding additional node");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0));
    String origPrimaryNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
    String origReplicaNodeId = clusterState.routingTable().index("test").shard(0).replicaShards().get(0).currentNodeId();
    logger.info("--> moving primary shard to node3");
    AllocationService.CommandsResult commandsResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), false, false);
    assertThat(commandsResult.getClusterState(), not(equalTo(clusterState)));
    clusterState = commandsResult.getClusterState();
    assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING));
    assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
    logger.info("--> fail primary shard recovering instance on 'origPrimaryNodeId' being relocated");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node3")).add(newNode(origReplicaNodeId))).build();
    clusterState = allocation.disassociateDeadNodes(clusterState, true, "reroute");
    assertThat(clusterState.getRoutingNodes().node(origReplicaNodeId).iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingTable(org.opensearch.cluster.routing.RoutingTable) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) AllocationCommands(org.opensearch.cluster.routing.allocation.command.AllocationCommands)

Example 3 with AllocationCommands

use of org.opensearch.cluster.routing.allocation.command.AllocationCommands in project OpenSearch by opensearch-project.

the class DeadNodesAllocationTests method testDeadNodeWhileRelocatingOnToNode.

public void testDeadNodeWhileRelocatingOnToNode() {
    AllocationService allocation = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").build());
    logger.info("--> building initial routing table");
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
    RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
    ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
    logger.info("--> adding 2 nodes on same rack and do rerouting");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    // starting primaries
    clusterState = startInitializingShardsAndReroute(allocation, clusterState);
    // starting replicas
    clusterState = startInitializingShardsAndReroute(allocation, clusterState);
    logger.info("--> verifying all is allocated");
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
    logger.info("--> adding additional node");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0));
    String origPrimaryNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
    String origReplicaNodeId = clusterState.routingTable().index("test").shard(0).replicaShards().get(0).currentNodeId();
    logger.info("--> moving primary shard to node3");
    AllocationService.CommandsResult commandsResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), false, false);
    assertThat(commandsResult.getClusterState(), not(equalTo(clusterState)));
    clusterState = commandsResult.getClusterState();
    assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING));
    assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
    logger.info("--> fail primary shard recovering instance on node3 being initialized by killing node3");
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode(origPrimaryNodeId)).add(newNode(origReplicaNodeId))).build();
    clusterState = allocation.disassociateDeadNodes(clusterState, true, "reroute");
    assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(STARTED));
    assertThat(clusterState.getRoutingNodes().node(origReplicaNodeId).iterator().next().state(), equalTo(STARTED));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingTable(org.opensearch.cluster.routing.RoutingTable) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) AllocationCommands(org.opensearch.cluster.routing.allocation.command.AllocationCommands)

Example 4 with AllocationCommands

use of org.opensearch.cluster.routing.allocation.command.AllocationCommands in project OpenSearch by opensearch-project.

the class MaxRetryAllocationDeciderTests method testSingleRetryOnIgnore.

public void testSingleRetryOnIgnore() {
    ClusterState clusterState = createInitialClusterState();
    RoutingTable routingTable = clusterState.routingTable();
    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries - 1; i++) {
        List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException(), randomBoolean()));
        ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
        assertThat(newState, not(equalTo(clusterState)));
        clusterState = newState;
        routingTable = newState.routingTable();
        assertEquals(routingTable.index("idx").shards().size(), 1);
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i + 1);
        assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom" + i));
    }
    // now we go and check that we are actually stick to unassigned on the next failure
    List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
    ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    routingTable = newState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
    // manual resetting of retry count
    newState = strategy.reroute(clusterState, new AllocationCommands(), false, true).getClusterState();
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    routingTable = newState.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(0, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
    assertEquals(INITIALIZING, routingTable.index("idx").shard(0).shards().get(0).state());
    assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
    // again fail it N-1 times
    for (int i = 0; i < retries - 1; i++) {
        failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
        newState = strategy.applyFailedShards(clusterState, failedShards);
        assertThat(newState, not(equalTo(clusterState)));
        clusterState = newState;
        routingTable = newState.routingTable();
        assertEquals(routingTable.index("idx").shards().size(), 1);
        assertEquals(i + 1, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
        assertEquals(INITIALIZING, routingTable.index("idx").shard(0).shards().get(0).state());
        assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
    }
    // now we go and check that we are actually stick to unassigned on the next failure
    failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
    newState = strategy.applyFailedShards(clusterState, failedShards);
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    routingTable = newState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(retries, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
    assertEquals(UNASSIGNED, routingTable.index("idx").shard(0).shards().get(0).state());
    assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingTable(org.opensearch.cluster.routing.RoutingTable) AllocationCommands(org.opensearch.cluster.routing.allocation.command.AllocationCommands)

Example 5 with AllocationCommands

use of org.opensearch.cluster.routing.allocation.command.AllocationCommands in project OpenSearch by opensearch-project.

the class AllocationCommandsTests method testAllocateStalePrimaryCommand.

public void testAllocateStalePrimaryCommand() {
    AllocationService allocation = createAllocationService(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none").put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build());
    final String index = "test";
    logger.info("--> building initial routing table");
    Metadata metadata = Metadata.builder().put(IndexMetadata.builder(index).settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1).putInSyncAllocationIds(0, Collections.singleton("asdf")).putInSyncAllocationIds(1, Collections.singleton("qwertz"))).build();
    // shard routing is added as "from recovery" instead of "new index creation" so that we can test below that allocating an empty
    // primary with accept_data_loss flag set to false fails
    RoutingTable routingTable = RoutingTable.builder().addAsRecovery(metadata.index(index)).build();
    ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
    final String node1 = "node1";
    final String node2 = "node2";
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode(node1)).add(newNode(node2))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    // mark all shards as stale
    final List<ShardRouting> shardRoutings = clusterState.getRoutingNodes().shardsWithState(UNASSIGNED);
    assertThat(shardRoutings, hasSize(2));
    logger.info("--> allocating empty primary with acceptDataLoss flag set to true");
    clusterState = allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, 0, node1, true)), false, false).getClusterState();
    RoutingNode routingNode1 = clusterState.getRoutingNodes().node(node1);
    assertThat(routingNode1.size(), equalTo(1));
    assertThat(routingNode1.shardsWithState(INITIALIZING).size(), equalTo(1));
    Set<String> inSyncAllocationIds = clusterState.metadata().index(index).inSyncAllocationIds(0);
    assertThat(inSyncAllocationIds, equalTo(Collections.singleton(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID)));
    clusterState = startInitializingShardsAndReroute(allocation, clusterState);
    routingNode1 = clusterState.getRoutingNodes().node(node1);
    assertThat(routingNode1.size(), equalTo(1));
    assertThat(routingNode1.shardsWithState(STARTED).size(), equalTo(1));
    inSyncAllocationIds = clusterState.metadata().index(index).inSyncAllocationIds(0);
    assertThat(inSyncAllocationIds, hasSize(1));
    assertThat(inSyncAllocationIds, not(Collections.singleton(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID)));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingTable(org.opensearch.cluster.routing.RoutingTable) RoutingNode(org.opensearch.cluster.routing.RoutingNode) AllocateStalePrimaryAllocationCommand(org.opensearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Matchers.containsString(org.hamcrest.Matchers.containsString) ShardRouting(org.opensearch.cluster.routing.ShardRouting) AllocationCommands(org.opensearch.cluster.routing.allocation.command.AllocationCommands)

Aggregations

AllocationCommands (org.opensearch.cluster.routing.allocation.command.AllocationCommands)23 ClusterState (org.opensearch.cluster.ClusterState)19 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)17 Metadata (org.opensearch.cluster.metadata.Metadata)17 RoutingTable (org.opensearch.cluster.routing.RoutingTable)14 MoveAllocationCommand (org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand)12 Matchers.containsString (org.hamcrest.Matchers.containsString)6 ShardRouting (org.opensearch.cluster.routing.ShardRouting)6 AllocateEmptyPrimaryAllocationCommand (org.opensearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand)6 AllocateReplicaAllocationCommand (org.opensearch.cluster.routing.allocation.command.AllocateReplicaAllocationCommand)5 AllocateStalePrimaryAllocationCommand (org.opensearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand)5 ShardId (org.opensearch.index.shard.ShardId)5 TestGatewayAllocator (org.opensearch.test.gateway.TestGatewayAllocator)5 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)3 IndexRoutingTable (org.opensearch.cluster.routing.IndexRoutingTable)3 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)3 BalancedShardsAllocator (org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator)3 CancelAllocationCommand (org.opensearch.cluster.routing.allocation.command.CancelAllocationCommand)3 Collections.singletonList (java.util.Collections.singletonList)2 HashSet (java.util.HashSet)2