use of org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand in project OpenSearch by opensearch-project.
the class ClusterRerouteIT method testClusterRerouteWithBlocks.
public void testClusterRerouteWithBlocks() {
List<String> nodesIds = internalCluster().startNodes(2);
logger.info("--> create an index with 1 shard and 0 replicas");
createIndex("test-blocks", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build());
if (randomBoolean()) {
assertAcked(client().admin().indices().prepareClose("test-blocks"));
}
ensureGreen("test-blocks");
logger.info("--> check that the index has 1 shard");
ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState();
List<ShardRouting> shards = state.routingTable().allShards("test-blocks");
assertThat(shards, hasSize(1));
logger.info("--> check that the shard is allocated");
ShardRouting shard = shards.get(0);
assertThat(shard.assignedToNode(), equalTo(true));
logger.info("--> retrieve the node where the shard is allocated");
DiscoveryNode node = state.nodes().resolveNode(shard.currentNodeId());
assertNotNull(node);
// toggle is used to mve the shard from one node to another
int toggle = nodesIds.indexOf(node.getName());
// Rerouting shards is not blocked
for (String blockSetting : Arrays.asList(SETTING_BLOCKS_READ, SETTING_BLOCKS_WRITE, SETTING_READ_ONLY, SETTING_BLOCKS_METADATA, SETTING_READ_ONLY_ALLOW_DELETE)) {
try {
enableIndexBlock("test-blocks", blockSetting);
assertAcked(client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test-blocks", 0, nodesIds.get(toggle % 2), nodesIds.get(++toggle % 2))));
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setIndices("test-blocks").setWaitForYellowStatus().setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
} finally {
disableIndexBlock("test-blocks", blockSetting);
}
}
// Rerouting shards is blocked when the cluster is read only
try {
setClusterReadOnly(true);
assertBlocked(client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test-blocks", 1, nodesIds.get(toggle % 2), nodesIds.get(++toggle % 2))));
} finally {
setClusterReadOnly(false);
}
}
use of org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand in project OpenSearch by opensearch-project.
the class ClusterRerouteIT method rerouteWithCommands.
private void rerouteWithCommands(Settings commonSettings) throws Exception {
List<String> nodesIds = internalCluster().startNodes(2, commonSettings);
final String node_1 = nodesIds.get(0);
final String node_2 = nodesIds.get(1);
logger.info("--> create an index with 1 shard, 1 replica, nothing should allocate");
client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).setSettings(Settings.builder().put("index.number_of_shards", 1)).execute().actionGet();
if (randomBoolean()) {
client().admin().indices().prepareClose("test").get();
}
ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(2));
logger.info("--> explicitly allocate shard 1, *under dry_run*");
state = client().admin().cluster().prepareReroute().setExplain(randomBoolean()).add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)).setDryRun(true).execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING));
logger.info("--> get the state, verify nothing changed because of the dry run");
state = client().admin().cluster().prepareState().execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(2));
logger.info("--> explicitly allocate shard 1, actually allocating, no dry run");
state = client().admin().cluster().prepareReroute().setExplain(randomBoolean()).add(new AllocateEmptyPrimaryAllocationCommand("test", 0, node_1, true)).execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING));
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setIndices("test").setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
logger.info("--> get the state, verify shard 1 primary allocated");
state = client().admin().cluster().prepareState().execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
logger.info("--> move shard 1 primary from node1 to node2");
state = client().admin().cluster().prepareReroute().setExplain(randomBoolean()).add(new MoveAllocationCommand("test", 0, node_1, node_2)).execute().actionGet().getState();
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_1).getId()).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING));
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_2).getId()).iterator().next().state(), equalTo(ShardRoutingState.INITIALIZING));
healthResponse = client().admin().cluster().prepareHealth().setIndices("test").setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(healthResponse.isTimedOut(), equalTo(false));
logger.info("--> get the state, verify shard 1 primary moved from node1 to node2");
state = client().admin().cluster().prepareState().execute().actionGet().getState();
assertThat(state.getRoutingNodes().unassigned().size(), equalTo(1));
assertThat(state.getRoutingNodes().node(state.nodes().resolveNode(node_2).getId()).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
}
use of org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand in project OpenSearch by opensearch-project.
the class IndexRecoveryIT method testRerouteRecovery.
public void testRerouteRecovery() throws Exception {
logger.info("--> start node A");
final String nodeA = internalCluster().startNode();
logger.info("--> create index on node: {}", nodeA);
ByteSizeValue shardSize = createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT).getShards()[0].getStats().getStore().size();
logger.info("--> start node B");
final String nodeB = internalCluster().startNode();
ensureGreen();
logger.info("--> slowing down recoveries");
slowDownRecovery(shardSize);
logger.info("--> move shard from: {} to: {}", nodeA, nodeB);
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeB)).execute().actionGet().getState();
logger.info("--> waiting for recovery to start both on source and target");
final Index index = resolveIndex(INDEX_NAME);
assertBusy(() -> {
IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeA);
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsSource(), equalTo(1));
indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsTarget(), equalTo(1));
});
logger.info("--> request recoveries");
RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
List<RecoveryState> nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
assertThat(nodeARecoveryStates.size(), equalTo(1));
List<RecoveryState> nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
assertThat(nodeBRecoveryStates.size(), equalTo(1));
assertRecoveryState(nodeARecoveryStates.get(0), 0, RecoverySource.EmptyStoreRecoverySource.INSTANCE, true, Stage.DONE, null, nodeA);
validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());
assertOnGoingRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, nodeA, nodeB);
validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
logger.info("--> request node recovery stats");
NodesStatsResponse statsResponse = client().admin().cluster().prepareNodesStats().clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
long nodeAThrottling = Long.MAX_VALUE;
long nodeBThrottling = Long.MAX_VALUE;
for (NodeStats nodeStats : statsResponse.getNodes()) {
final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
if (nodeStats.getNode().getName().equals(nodeA)) {
assertThat("node A should have ongoing recovery as source", recoveryStats.currentAsSource(), equalTo(1));
assertThat("node A should not have ongoing recovery as target", recoveryStats.currentAsTarget(), equalTo(0));
nodeAThrottling = recoveryStats.throttleTime().millis();
}
if (nodeStats.getNode().getName().equals(nodeB)) {
assertThat("node B should not have ongoing recovery as source", recoveryStats.currentAsSource(), equalTo(0));
assertThat("node B should have ongoing recovery as target", recoveryStats.currentAsTarget(), equalTo(1));
nodeBThrottling = recoveryStats.throttleTime().millis();
}
}
logger.info("--> checking throttling increases");
final long finalNodeAThrottling = nodeAThrottling;
final long finalNodeBThrottling = nodeBThrottling;
assertBusy(() -> {
NodesStatsResponse statsResponse1 = client().admin().cluster().prepareNodesStats().clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
assertThat(statsResponse1.getNodes(), hasSize(2));
for (NodeStats nodeStats : statsResponse1.getNodes()) {
final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
if (nodeStats.getNode().getName().equals(nodeA)) {
assertThat("node A throttling should increase", recoveryStats.throttleTime().millis(), greaterThan(finalNodeAThrottling));
}
if (nodeStats.getNode().getName().equals(nodeB)) {
assertThat("node B throttling should increase", recoveryStats.throttleTime().millis(), greaterThan(finalNodeBThrottling));
}
}
});
logger.info("--> speeding up recoveries");
restoreRecoverySpeed();
// wait for it to be finished
ensureGreen();
response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
assertThat(recoveryStates.size(), equalTo(1));
assertRecoveryState(recoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
validateIndexRecoveryState(recoveryStates.get(0).getIndex());
Consumer<String> assertNodeHasThrottleTimeAndNoRecoveries = nodeName -> {
NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setNodesIds(nodeName).clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
assertThat(nodesStatsResponse.getNodes(), hasSize(1));
NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);
final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
assertThat(recoveryStats.currentAsSource(), equalTo(0));
assertThat(recoveryStats.currentAsTarget(), equalTo(0));
assertThat(nodeName + " throttling should be >0", recoveryStats.throttleTime().millis(), greaterThan(0L));
};
// we have to use assertBusy as recovery counters are decremented only when the last reference to the RecoveryTarget
// is decremented, which may happen after the recovery was done.
assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeA));
assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeB));
logger.info("--> bump replica count");
client().admin().indices().prepareUpdateSettings(INDEX_NAME).setSettings(Settings.builder().put("number_of_replicas", 1)).execute().actionGet();
ensureGreen();
assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeA));
assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeB));
logger.info("--> start node C");
String nodeC = internalCluster().startNode();
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("3").get().isTimedOut());
logger.info("--> slowing down recoveries");
slowDownRecovery(shardSize);
logger.info("--> move replica shard from: {} to: {}", nodeA, nodeC);
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeC)).execute().actionGet().getState();
response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
assertThat(nodeARecoveryStates.size(), equalTo(1));
nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
assertThat(nodeBRecoveryStates.size(), equalTo(1));
List<RecoveryState> nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
assertThat(nodeCRecoveryStates.size(), equalTo(1));
assertRecoveryState(nodeARecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, Stage.DONE, nodeB, nodeA);
validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());
assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
// relocations of replicas are marked as REPLICA and the source node is the node holding the primary (B)
assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
if (randomBoolean()) {
// shutdown node with relocation source of replica shard and check if recovery continues
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeA));
ensureStableCluster(2);
response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
assertThat(nodeARecoveryStates.size(), equalTo(0));
nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
assertThat(nodeBRecoveryStates.size(), equalTo(1));
nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
assertThat(nodeCRecoveryStates.size(), equalTo(1));
assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
}
logger.info("--> speeding up recoveries");
restoreRecoverySpeed();
ensureGreen();
response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
assertThat(nodeARecoveryStates.size(), equalTo(0));
nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
assertThat(nodeBRecoveryStates.size(), equalTo(1));
nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
assertThat(nodeCRecoveryStates.size(), equalTo(1));
assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
// relocations of replicas are marked as REPLICA and the source node is the node holding the primary (B)
assertRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, Stage.DONE, nodeB, nodeC);
validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
}
use of org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand in project OpenSearch by opensearch-project.
the class IndicesLifecycleListenerIT method testIndexShardFailedOnRelocation.
/**
* Tests that if an *index* structure creation fails on relocation to a new node, the shard
* is not stuck but properly failed.
*/
public void testIndexShardFailedOnRelocation() throws Throwable {
String node1 = internalCluster().startNode();
client().admin().indices().prepareCreate("index1").setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0)).get();
ensureGreen("index1");
String node2 = internalCluster().startNode();
internalCluster().getInstance(MockIndexEventListener.TestEventListener.class, node2).setNewDelegate(new IndexShardStateChangeListener() {
@Override
public void beforeIndexCreated(Index index, Settings indexSettings) {
throw new RuntimeException("FAIL");
}
});
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("index1", 0, node1, node2)).get();
ensureGreen("index1");
ClusterState state = client().admin().cluster().prepareState().get().getState();
List<ShardRouting> shard = state.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED);
assertThat(shard, hasSize(1));
assertThat(state.nodes().resolveNode(shard.get(0).currentNodeId()).getName(), Matchers.equalTo(node1));
}
use of org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand in project OpenSearch by opensearch-project.
the class RelocationIT method testSimpleRelocationNoIndexing.
public void testSimpleRelocationNoIndexing() {
logger.info("--> starting [node1] ...");
final String node_1 = internalCluster().startNode();
logger.info("--> creating test index ...");
prepareCreate("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)).get();
logger.info("--> index 10 docs");
for (int i = 0; i < 10; i++) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value" + i).execute().actionGet();
}
logger.info("--> flush so we have an actual index");
client().admin().indices().prepareFlush().execute().actionGet();
logger.info("--> index more docs so we have something in the translog");
for (int i = 10; i < 20; i++) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value" + i).execute().actionGet();
}
logger.info("--> verifying count");
client().admin().indices().prepareRefresh().execute().actionGet();
assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(20L));
logger.info("--> start another node");
final String node_2 = internalCluster().startNode();
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes("2").execute().actionGet();
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
logger.info("--> relocate the shard from node1 to node2");
client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_2)).execute().actionGet();
clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNoRelocatingShards(true).setTimeout(ACCEPTABLE_RELOCATION_TIME).execute().actionGet();
assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
logger.info("--> verifying count again...");
client().admin().indices().prepareRefresh().execute().actionGet();
assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(20L));
}
Aggregations