use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class DelayedAllocationServiceTests method testDelayedUnassignedScheduleRerouteAfterDelayedReroute.
/**
* This tests that a new delayed reroute is scheduled right after a delayed reroute was run
*/
public void testDelayedUnassignedScheduleRerouteAfterDelayedReroute() throws Exception {
TimeValue shortDelaySetting = timeValueMillis(100);
TimeValue longDelaySetting = TimeValue.timeValueSeconds(1);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("short_delay").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shortDelaySetting)).numberOfShards(1).numberOfReplicas(1)).put(IndexMetaData.builder("long_delay").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), longDelaySetting)).numberOfShards(1).numberOfReplicas(1)).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("short_delay")).addAsNew(metaData.index("long_delay")).build()).nodes(DiscoveryNodes.builder().add(newNode("node0", singleton(DiscoveryNode.Role.MASTER))).localNodeId("node0").masterNodeId("node0").add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build();
// allocate shards
clusterState = allocationService.reroute(clusterState, "reroute");
// start primaries
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// start replicas
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertThat("all shards should be started", clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(4));
// find replica of short_delay
ShardRouting shortDelayReplica = null;
for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("short_delay")) {
if (shardRouting.primary() == false) {
shortDelayReplica = shardRouting;
break;
}
}
assertNotNull(shortDelayReplica);
// find replica of long_delay
ShardRouting longDelayReplica = null;
for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("long_delay")) {
if (shardRouting.primary() == false) {
longDelayReplica = shardRouting;
break;
}
}
assertNotNull(longDelayReplica);
final long baseTimestampNanos = System.nanoTime();
// remove node of shortDelayReplica and node of longDelayReplica and reroute
ClusterState clusterStateBeforeNodeLeft = clusterState;
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove(shortDelayReplica.currentNodeId()).remove(longDelayReplica.currentNodeId())).build();
// make sure both replicas are marked as delayed (i.e. not reallocated)
allocationService.setNanoTimeOverride(baseTimestampNanos);
clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute");
final ClusterState stateWithDelayedShards = clusterState;
assertEquals(2, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShards));
RoutingNodes.UnassignedShards.UnassignedIterator iter = stateWithDelayedShards.getRoutingNodes().unassigned().iterator();
assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos());
assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos());
// mock ClusterService.submitStateUpdateTask() method
CountDownLatch latch1 = new CountDownLatch(1);
AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask1 = new AtomicReference<>();
doAnswer(invocationOnMock -> {
clusterStateUpdateTask1.set((ClusterStateUpdateTask) invocationOnMock.getArguments()[1]);
latch1.countDown();
return null;
}).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class));
assertNull(delayedAllocationService.delayedRerouteTask.get());
long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int) shortDelaySetting.nanos() - 1)).nanos();
long clusterChangeEventTimestampNanos = baseTimestampNanos + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShards, clusterStateBeforeNodeLeft));
// check that delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask firstDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(firstDelayedRerouteTask);
assertFalse(firstDelayedRerouteTask.cancelScheduling.get());
assertThat(firstDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithDelayedShards)));
assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(shortDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos)));
// check that submitStateUpdateTask() was invoked on the cluster service mock
assertTrue(latch1.await(30, TimeUnit.SECONDS));
verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask1.get()));
// advance the time on the allocation service to a timestamp that happened after the delayed scheduling
long nanoTimeForReroute = clusterChangeEventTimestampNanos + shortDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos();
allocationService.setNanoTimeOverride(nanoTimeForReroute);
// apply cluster state
ClusterState stateWithOnlyOneDelayedShard = clusterStateUpdateTask1.get().execute(stateWithDelayedShards);
// check that shard is not delayed anymore
assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithOnlyOneDelayedShard));
// check that task is now removed
assertNull(delayedAllocationService.delayedRerouteTask.get());
// mock ClusterService.submitStateUpdateTask() method again
CountDownLatch latch2 = new CountDownLatch(1);
AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask2 = new AtomicReference<>();
doAnswer(invocationOnMock -> {
clusterStateUpdateTask2.set((ClusterStateUpdateTask) invocationOnMock.getArguments()[1]);
latch2.countDown();
return null;
}).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class));
// simulate calling listener (cluster change event)
delayUntilClusterChangeEvent = timeValueMillis(randomInt(50)).nanos();
clusterChangeEventTimestampNanos = nanoTimeForReroute + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithOnlyOneDelayedShard, stateWithDelayedShards));
// check that new delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask secondDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(secondDelayedRerouteTask);
assertFalse(secondDelayedRerouteTask.cancelScheduling.get());
assertThat(secondDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithOnlyOneDelayedShard)));
assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(longDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos)));
// check that submitStateUpdateTask() was invoked on the cluster service mock
assertTrue(latch2.await(30, TimeUnit.SECONDS));
verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask2.get()));
// advance the time on the allocation service to a timestamp that happened after the delayed scheduling
nanoTimeForReroute = clusterChangeEventTimestampNanos + longDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos();
allocationService.setNanoTimeOverride(nanoTimeForReroute);
// apply cluster state
ClusterState stateWithNoDelayedShards = clusterStateUpdateTask2.get().execute(stateWithOnlyOneDelayedShard);
// check that shard is not delayed anymore
assertEquals(0, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithNoDelayedShards));
// check that task is now removed
assertNull(delayedAllocationService.delayedRerouteTask.get());
// simulate calling listener (cluster change event)
delayedAllocationService.setNanoTimeOverride(nanoTimeForReroute + timeValueMillis(randomInt(50)).nanos());
delayedAllocationService.clusterChanged(new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithNoDelayedShards, stateWithOnlyOneDelayedShard));
// check that no new task is scheduled
assertNull(delayedAllocationService.delayedRerouteTask.get());
// check that no further cluster state update was submitted
verifyNoMoreInteractions(clusterService);
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project crate by crate.
the class BulkRetryCoordinatorPoolTest method testReturnDifferentCoordinatorForRelocatedShardFromRemovedNode.
@Test
public void testReturnDifferentCoordinatorForRelocatedShardFromRemovedNode() throws Exception {
ShardId shardId = new ShardId(TEST_INDEX, 1);
BulkRetryCoordinator coordinator = pool.coordinator(shardId);
ClusterState newState = ClusterState.builder(state).nodes(DiscoveryNodes.builder().put(newNode(NODE_IDS[1]))).build();
AllocationService allocationService = createAllocationService();
RoutingTable routingTable = allocationService.reroute(newState, "test").routingTable();
newState = ClusterState.builder(newState).routingTable(routingTable).build();
pool.clusterChanged(new ClusterChangedEvent("bla", newState, state));
BulkRetryCoordinator otherCoordinator = pool.coordinator(shardId);
assertThat(coordinator, not(sameInstance(otherCoordinator)));
}
Aggregations