use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class DelayedAllocationServiceTests method testDelayedUnassignedScheduleRerouteAfterDelayedReroute.
/**
* This tests that a new delayed reroute is scheduled right after a delayed reroute was run
*/
public void testDelayedUnassignedScheduleRerouteAfterDelayedReroute() throws Exception {
TimeValue shortDelaySetting = timeValueMillis(100);
TimeValue longDelaySetting = TimeValue.timeValueSeconds(1);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("short_delay").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shortDelaySetting)).numberOfShards(1).numberOfReplicas(1)).put(IndexMetaData.builder("long_delay").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), longDelaySetting)).numberOfShards(1).numberOfReplicas(1)).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("short_delay")).addAsNew(metaData.index("long_delay")).build()).nodes(DiscoveryNodes.builder().add(newNode("node0", singleton(DiscoveryNode.Role.MASTER))).localNodeId("node0").masterNodeId("node0").add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build();
// allocate shards
clusterState = allocationService.reroute(clusterState, "reroute");
// start primaries
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// start replicas
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertThat("all shards should be started", clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(4));
// find replica of short_delay
ShardRouting shortDelayReplica = null;
for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("short_delay")) {
if (shardRouting.primary() == false) {
shortDelayReplica = shardRouting;
break;
}
}
assertNotNull(shortDelayReplica);
// find replica of long_delay
ShardRouting longDelayReplica = null;
for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("long_delay")) {
if (shardRouting.primary() == false) {
longDelayReplica = shardRouting;
break;
}
}
assertNotNull(longDelayReplica);
final long baseTimestampNanos = System.nanoTime();
// remove node of shortDelayReplica and node of longDelayReplica and reroute
ClusterState clusterStateBeforeNodeLeft = clusterState;
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove(shortDelayReplica.currentNodeId()).remove(longDelayReplica.currentNodeId())).build();
// make sure both replicas are marked as delayed (i.e. not reallocated)
allocationService.setNanoTimeOverride(baseTimestampNanos);
clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute");
final ClusterState stateWithDelayedShards = clusterState;
assertEquals(2, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShards));
RoutingNodes.UnassignedShards.UnassignedIterator iter = stateWithDelayedShards.getRoutingNodes().unassigned().iterator();
assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos());
assertEquals(baseTimestampNanos, iter.next().unassignedInfo().getUnassignedTimeInNanos());
// mock ClusterService.submitStateUpdateTask() method
CountDownLatch latch1 = new CountDownLatch(1);
AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask1 = new AtomicReference<>();
doAnswer(invocationOnMock -> {
clusterStateUpdateTask1.set((ClusterStateUpdateTask) invocationOnMock.getArguments()[1]);
latch1.countDown();
return null;
}).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class));
assertNull(delayedAllocationService.delayedRerouteTask.get());
long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int) shortDelaySetting.nanos() - 1)).nanos();
long clusterChangeEventTimestampNanos = baseTimestampNanos + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShards, clusterStateBeforeNodeLeft));
// check that delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask firstDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(firstDelayedRerouteTask);
assertFalse(firstDelayedRerouteTask.cancelScheduling.get());
assertThat(firstDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithDelayedShards)));
assertThat(firstDelayedRerouteTask.nextDelay.nanos(), equalTo(shortDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos)));
// check that submitStateUpdateTask() was invoked on the cluster service mock
assertTrue(latch1.await(30, TimeUnit.SECONDS));
verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask1.get()));
// advance the time on the allocation service to a timestamp that happened after the delayed scheduling
long nanoTimeForReroute = clusterChangeEventTimestampNanos + shortDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos();
allocationService.setNanoTimeOverride(nanoTimeForReroute);
// apply cluster state
ClusterState stateWithOnlyOneDelayedShard = clusterStateUpdateTask1.get().execute(stateWithDelayedShards);
// check that shard is not delayed anymore
assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithOnlyOneDelayedShard));
// check that task is now removed
assertNull(delayedAllocationService.delayedRerouteTask.get());
// mock ClusterService.submitStateUpdateTask() method again
CountDownLatch latch2 = new CountDownLatch(1);
AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask2 = new AtomicReference<>();
doAnswer(invocationOnMock -> {
clusterStateUpdateTask2.set((ClusterStateUpdateTask) invocationOnMock.getArguments()[1]);
latch2.countDown();
return null;
}).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class));
// simulate calling listener (cluster change event)
delayUntilClusterChangeEvent = timeValueMillis(randomInt(50)).nanos();
clusterChangeEventTimestampNanos = nanoTimeForReroute + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithOnlyOneDelayedShard, stateWithDelayedShards));
// check that new delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask secondDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(secondDelayedRerouteTask);
assertFalse(secondDelayedRerouteTask.cancelScheduling.get());
assertThat(secondDelayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(UnassignedInfo.findNextDelayedAllocation(clusterChangeEventTimestampNanos, stateWithOnlyOneDelayedShard)));
assertThat(secondDelayedRerouteTask.nextDelay.nanos(), equalTo(longDelaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos)));
// check that submitStateUpdateTask() was invoked on the cluster service mock
assertTrue(latch2.await(30, TimeUnit.SECONDS));
verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask2.get()));
// advance the time on the allocation service to a timestamp that happened after the delayed scheduling
nanoTimeForReroute = clusterChangeEventTimestampNanos + longDelaySetting.nanos() + timeValueMillis(randomInt(50)).nanos();
allocationService.setNanoTimeOverride(nanoTimeForReroute);
// apply cluster state
ClusterState stateWithNoDelayedShards = clusterStateUpdateTask2.get().execute(stateWithOnlyOneDelayedShard);
// check that shard is not delayed anymore
assertEquals(0, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithNoDelayedShards));
// check that task is now removed
assertNull(delayedAllocationService.delayedRerouteTask.get());
// simulate calling listener (cluster change event)
delayedAllocationService.setNanoTimeOverride(nanoTimeForReroute + timeValueMillis(randomInt(50)).nanos());
delayedAllocationService.clusterChanged(new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithNoDelayedShards, stateWithOnlyOneDelayedShard));
// check that no new task is scheduled
assertNull(delayedAllocationService.delayedRerouteTask.get());
// check that no further cluster state update was submitted
verifyNoMoreInteractions(clusterService);
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project crate by crate.
the class BulkRetryCoordinatorPoolTest method testReturnDifferentCoordinatorForRelocatedShardFromRemovedNode.
@Test
public void testReturnDifferentCoordinatorForRelocatedShardFromRemovedNode() throws Exception {
ShardId shardId = new ShardId(TEST_INDEX, 1);
BulkRetryCoordinator coordinator = pool.coordinator(shardId);
ClusterState newState = ClusterState.builder(state).nodes(DiscoveryNodes.builder().put(newNode(NODE_IDS[1]))).build();
AllocationService allocationService = createAllocationService();
RoutingTable routingTable = allocationService.reroute(newState, "test").routingTable();
newState = ClusterState.builder(newState).routingTable(routingTable).build();
pool.clusterChanged(new ClusterChangedEvent("bla", newState, state));
BulkRetryCoordinator otherCoordinator = pool.coordinator(shardId);
assertThat(coordinator, not(sameInstance(otherCoordinator)));
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project crate by crate.
the class ClusterApplierService method applyChanges.
private void applyChanges(UpdateTask task, ClusterState previousClusterState, ClusterState newClusterState, StopWatch stopWatch) {
ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(task.source, newClusterState, previousClusterState);
// new cluster state, notify all listeners
final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
if (nodesDelta.hasChanges() && LOGGER.isInfoEnabled()) {
String summary = nodesDelta.shortSummary();
if (summary.length() > 0) {
LOGGER.info("{}, term: {}, version: {}, reason: {}", summary, newClusterState.term(), newClusterState.version(), task.source);
}
}
LOGGER.trace("connecting to nodes of cluster state with version {}", newClusterState.version());
try (Releasable ignored = stopWatch.timing("connecting to new nodes")) {
connectToNodesAndWait(newClusterState);
}
// nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency
if (clusterChangedEvent.state().blocks().disableStatePersistence() == false && clusterChangedEvent.metadataChanged()) {
LOGGER.debug("applying settings from cluster state with version {}", newClusterState.version());
final Settings incomingSettings = clusterChangedEvent.state().metadata().settings();
try (Releasable ignored = stopWatch.timing("applying settings")) {
clusterSettings.applySettings(incomingSettings);
}
}
LOGGER.debug("apply cluster state with version {}", newClusterState.version());
callClusterStateAppliers(clusterChangedEvent, stopWatch);
nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes());
LOGGER.debug("set locally applied cluster state to version {}", newClusterState.version());
state.set(newClusterState);
callClusterStateListeners(clusterChangedEvent, stopWatch);
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project crate by crate.
the class MasterService method runTasks.
private void runTasks(TaskInputs taskInputs) {
final String summary = taskInputs.summary;
if (!lifecycle.started()) {
LOGGER.debug("processing [{}]: ignoring, master service not started", summary);
return;
}
LOGGER.debug("executing cluster state update for [{}]", summary);
final ClusterState previousClusterState = state();
if (!previousClusterState.nodes().isLocalNodeElectedMaster() && taskInputs.runOnlyWhenMaster()) {
LOGGER.debug("failing [{}]: local node is no longer master", summary);
taskInputs.onNoLongerMaster();
return;
}
final long computationStartTime = threadPool.relativeTimeInMillis();
final TaskOutputs taskOutputs = calculateTaskOutputs(taskInputs, previousClusterState);
taskOutputs.notifyFailedTasks();
final TimeValue computationTime = getTimeSince(computationStartTime);
logExecutionTime(computationTime, "compute cluster state update", summary);
if (taskOutputs.clusterStateUnchanged()) {
final long notificationStartTime = threadPool.relativeTimeInMillis();
taskOutputs.notifySuccessfulTasksOnUnchangedClusterState();
final TimeValue executionTime = getTimeSince(notificationStartTime);
logExecutionTime(executionTime, "notify listeners on unchanged cluster state", summary);
} else {
final ClusterState newClusterState = taskOutputs.newClusterState;
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("cluster state updated, source [{}]\n{}", summary, newClusterState);
} else {
LOGGER.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), summary);
}
final long publicationStartTime = threadPool.relativeTimeInMillis();
try {
ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(summary, newClusterState, previousClusterState);
// new cluster state, notify all listeners
final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
if (nodesDelta.hasChanges() && LOGGER.isInfoEnabled()) {
String nodeSummary = nodesDelta.shortSummary();
if (nodeSummary.length() > 0) {
LOGGER.info("{}, term: {}, version: {}, reason: {}", summary, newClusterState.term(), newClusterState.version(), nodeSummary);
}
}
LOGGER.debug("publishing cluster state version [{}]", newClusterState.version());
publish(clusterChangedEvent, taskOutputs, publicationStartTime);
} catch (Exception e) {
handleException(summary, publicationStartTime, newClusterState, e);
}
}
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project crate by crate.
the class CrateSettingsTest method testSettingsChanged.
@Test
public void testSettingsChanged() {
CrateSettings crateSettings = new CrateSettings(clusterService, clusterService.getSettings());
ClusterState newState = ClusterState.builder(clusterService.state()).metadata(Metadata.builder().transientSettings(Settings.builder().put(JobsLogService.STATS_ENABLED_SETTING.getKey(), true).build())).build();
crateSettings.clusterChanged(new ClusterChangedEvent("settings updated", newState, clusterService.state()));
assertThat(crateSettings.settings().getAsBoolean(JobsLogService.STATS_ENABLED_SETTING.getKey(), false), is(true));
}
Aggregations