use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class IndicesClusterStateServiceRandomUpdatesTests method testJoiningNewClusterOnlyRemovesInMemoryIndexStructures.
/**
* This test ensures that when a node joins a brand new cluster (different cluster UUID),
* different from the cluster it was previously a part of, the in-memory index data structures
* are all removed but the on disk contents of those indices remain so that they can later be
* imported as dangling indices. Normally, the first cluster state update that the node
* receives from the new cluster would contain a cluster block that would cause all in-memory
* structures to be removed (see {@link IndicesClusterStateService#applyClusterState(ClusterChangedEvent)}),
* but in the case where the node joined and was a few cluster state updates behind, it would
* not have received the cluster block, in which case we still need to remove the in-memory
* structures while ensuring the data remains on disk. This test executes this particular
* scenario.
*/
public void testJoiningNewClusterOnlyRemovesInMemoryIndexStructures() {
// a cluster state derived from the initial state that includes a created index
String name = "index_" + randomAsciiOfLength(8).toLowerCase(Locale.ROOT);
ShardRoutingState[] replicaStates = new ShardRoutingState[randomIntBetween(0, 3)];
Arrays.fill(replicaStates, ShardRoutingState.INITIALIZING);
ClusterState stateWithIndex = ClusterStateCreationUtils.state(name, randomBoolean(), ShardRoutingState.INITIALIZING, replicaStates);
// the initial state which is derived from the newly created cluster state but doesn't contain the index
ClusterState initialState = ClusterState.builder(stateWithIndex).metaData(MetaData.builder(stateWithIndex.metaData()).remove(name)).routingTable(RoutingTable.builder().build()).build();
// pick a data node to simulate the adding an index cluster state change event on, that has shards assigned to it
DiscoveryNode node = stateWithIndex.nodes().get(randomFrom(stateWithIndex.routingTable().index(name).shardsWithState(INITIALIZING)).currentNodeId());
// simulate the cluster state change on the node
ClusterState localState = adaptClusterStateToLocalNode(stateWithIndex, node);
ClusterState previousLocalState = adaptClusterStateToLocalNode(initialState, node);
IndicesClusterStateService indicesCSSvc = createIndicesClusterStateService(node, RecordingIndicesService::new);
indicesCSSvc.start();
indicesCSSvc.applyClusterState(new ClusterChangedEvent("cluster state change that adds the index", localState, previousLocalState));
// create a new empty cluster state with a brand new cluster UUID
ClusterState newClusterState = ClusterState.builder(initialState).metaData(MetaData.builder(initialState.metaData()).clusterUUID(UUIDs.randomBase64UUID())).build();
// simulate the cluster state change on the node
localState = adaptClusterStateToLocalNode(newClusterState, node);
previousLocalState = adaptClusterStateToLocalNode(stateWithIndex, node);
indicesCSSvc.applyClusterState(new ClusterChangedEvent("cluster state change with a new cluster UUID (and doesn't contain the index)", localState, previousLocalState));
// check that in memory data structures have been removed once the new cluster state is applied,
// but the persistent data is still there
RecordingIndicesService indicesService = (RecordingIndicesService) indicesCSSvc.indicesService;
for (IndexMetaData indexMetaData : stateWithIndex.metaData()) {
Index index = indexMetaData.getIndex();
assertNull(indicesService.indexService(index));
assertFalse(indicesService.isDeleted(index));
}
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class ClusterServiceTests method testClusterStateTaskListenerThrowingExceptionIsOkay.
/*
* test that a listener throwing an exception while handling a
* notification does not prevent publication notification to the
* executor
*/
public void testClusterStateTaskListenerThrowingExceptionIsOkay() throws InterruptedException {
final CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean published = new AtomicBoolean();
clusterService.submitStateUpdateTask("testClusterStateTaskListenerThrowingExceptionIsOkay", new Object(), ClusterStateTaskConfig.build(Priority.NORMAL), new ClusterStateTaskExecutor<Object>() {
@Override
public ClusterTasksResult<Object> execute(ClusterState currentState, List<Object> tasks) throws Exception {
ClusterState newClusterState = ClusterState.builder(currentState).build();
return ClusterTasksResult.builder().successes(tasks).build(newClusterState);
}
@Override
public void clusterStatePublished(ClusterChangedEvent clusterChangedEvent) {
published.set(true);
latch.countDown();
}
}, new ClusterStateTaskListener() {
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
throw new IllegalStateException(source);
}
@Override
public void onFailure(String source, Exception e) {
}
});
latch.await();
assertTrue(published.get());
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class ClusterServiceTests method testClusterStateBatchedUpdates.
public void testClusterStateBatchedUpdates() throws BrokenBarrierException, InterruptedException {
AtomicInteger counter = new AtomicInteger();
class Task {
private AtomicBoolean state = new AtomicBoolean();
private final int id;
Task(int id) {
this.id = id;
}
public void execute() {
if (!state.compareAndSet(false, true)) {
throw new IllegalStateException();
} else {
counter.incrementAndGet();
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Task task = (Task) o;
return id == task.id;
}
@Override
public int hashCode() {
return id;
}
@Override
public String toString() {
return Integer.toString(id);
}
}
int numberOfThreads = randomIntBetween(2, 8);
int taskSubmissionsPerThread = randomIntBetween(1, 64);
int numberOfExecutors = Math.max(1, numberOfThreads / 4);
final Semaphore semaphore = new Semaphore(numberOfExecutors);
class TaskExecutor implements ClusterStateTaskExecutor<Task> {
private final List<Set<Task>> taskGroups;
private AtomicInteger counter = new AtomicInteger();
private AtomicInteger batches = new AtomicInteger();
private AtomicInteger published = new AtomicInteger();
TaskExecutor(List<Set<Task>> taskGroups) {
this.taskGroups = taskGroups;
}
@Override
public ClusterTasksResult<Task> execute(ClusterState currentState, List<Task> tasks) throws Exception {
for (Set<Task> expectedSet : taskGroups) {
long count = tasks.stream().filter(expectedSet::contains).count();
assertThat("batched set should be executed together or not at all. Expected " + expectedSet + "s. Executing " + tasks, count, anyOf(equalTo(0L), equalTo((long) expectedSet.size())));
}
tasks.forEach(Task::execute);
counter.addAndGet(tasks.size());
ClusterState maybeUpdatedClusterState = currentState;
if (randomBoolean()) {
maybeUpdatedClusterState = ClusterState.builder(currentState).build();
batches.incrementAndGet();
semaphore.acquire();
}
return ClusterTasksResult.<Task>builder().successes(tasks).build(maybeUpdatedClusterState);
}
@Override
public void clusterStatePublished(ClusterChangedEvent clusterChangedEvent) {
published.incrementAndGet();
semaphore.release();
}
}
ConcurrentMap<String, AtomicInteger> processedStates = new ConcurrentHashMap<>();
List<Set<Task>> taskGroups = new ArrayList<>();
List<TaskExecutor> executors = new ArrayList<>();
for (int i = 0; i < numberOfExecutors; i++) {
executors.add(new TaskExecutor(taskGroups));
}
// randomly assign tasks to executors
List<Tuple<TaskExecutor, Set<Task>>> assignments = new ArrayList<>();
int taskId = 0;
for (int i = 0; i < numberOfThreads; i++) {
for (int j = 0; j < taskSubmissionsPerThread; j++) {
TaskExecutor executor = randomFrom(executors);
Set<Task> tasks = new HashSet<>();
for (int t = randomInt(3); t >= 0; t--) {
tasks.add(new Task(taskId++));
}
taskGroups.add(tasks);
assignments.add(Tuple.tuple(executor, tasks));
}
}
Map<TaskExecutor, Integer> counts = new HashMap<>();
int totalTaskCount = 0;
for (Tuple<TaskExecutor, Set<Task>> assignment : assignments) {
final int taskCount = assignment.v2().size();
counts.merge(assignment.v1(), taskCount, (previous, count) -> previous + count);
totalTaskCount += taskCount;
}
final CountDownLatch updateLatch = new CountDownLatch(totalTaskCount);
final ClusterStateTaskListener listener = new ClusterStateTaskListener() {
@Override
public void onFailure(String source, Exception e) {
fail(ExceptionsHelper.detailedMessage(e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
processedStates.computeIfAbsent(source, key -> new AtomicInteger()).incrementAndGet();
updateLatch.countDown();
}
};
final ConcurrentMap<String, AtomicInteger> submittedTasksPerThread = new ConcurrentHashMap<>();
CyclicBarrier barrier = new CyclicBarrier(1 + numberOfThreads);
for (int i = 0; i < numberOfThreads; i++) {
final int index = i;
Thread thread = new Thread(() -> {
final String threadName = Thread.currentThread().getName();
try {
barrier.await();
for (int j = 0; j < taskSubmissionsPerThread; j++) {
Tuple<TaskExecutor, Set<Task>> assignment = assignments.get(index * taskSubmissionsPerThread + j);
final Set<Task> tasks = assignment.v2();
submittedTasksPerThread.computeIfAbsent(threadName, key -> new AtomicInteger()).addAndGet(tasks.size());
final TaskExecutor executor = assignment.v1();
if (tasks.size() == 1) {
clusterService.submitStateUpdateTask(threadName, tasks.stream().findFirst().get(), ClusterStateTaskConfig.build(randomFrom(Priority.values())), executor, listener);
} else {
Map<Task, ClusterStateTaskListener> taskListeners = new HashMap<>();
tasks.stream().forEach(t -> taskListeners.put(t, listener));
clusterService.submitStateUpdateTasks(threadName, taskListeners, ClusterStateTaskConfig.build(randomFrom(Priority.values())), executor);
}
}
barrier.await();
} catch (BrokenBarrierException | InterruptedException e) {
throw new AssertionError(e);
}
});
thread.start();
}
// wait for all threads to be ready
barrier.await();
// wait for all threads to finish
barrier.await();
// wait until all the cluster state updates have been processed
updateLatch.await();
// and until all of the publication callbacks have completed
semaphore.acquire(numberOfExecutors);
// assert the number of executed tasks is correct
assertEquals(totalTaskCount, counter.get());
// assert each executor executed the correct number of tasks
for (TaskExecutor executor : executors) {
if (counts.containsKey(executor)) {
assertEquals((int) counts.get(executor), executor.counter.get());
assertEquals(executor.batches.get(), executor.published.get());
}
}
// assert the correct number of clusterStateProcessed events were triggered
for (Map.Entry<String, AtomicInteger> entry : processedStates.entrySet()) {
assertThat(submittedTasksPerThread, hasKey(entry.getKey()));
assertEquals("not all tasks submitted by " + entry.getKey() + " received a processed event", entry.getValue().get(), submittedTasksPerThread.get(entry.getKey()).get());
}
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class DelayedAllocationServiceTests method testDelayedUnassignedScheduleRerouteRescheduledOnShorterDelay.
public void testDelayedUnassignedScheduleRerouteRescheduledOnShorterDelay() throws Exception {
TimeValue delaySetting = timeValueSeconds(30);
TimeValue shorterDelaySetting = timeValueMillis(100);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("foo").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delaySetting)).numberOfShards(1).numberOfReplicas(1)).put(IndexMetaData.builder("bar").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shorterDelaySetting)).numberOfShards(1).numberOfReplicas(1)).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("foo")).addAsNew(metaData.index("bar")).build()).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4")).localNodeId("node1").masterNodeId("node1")).build();
final long nodeLeftTimestampNanos = System.nanoTime();
allocationService.setNanoTimeOverride(nodeLeftTimestampNanos);
clusterState = allocationService.reroute(clusterState, "reroute");
// starting primaries
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// starting replicas
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertFalse("no shards should be unassigned", clusterState.getRoutingNodes().unassigned().size() > 0);
String nodeIdOfFooReplica = null;
for (ShardRouting shardRouting : clusterState.getRoutingTable().allShards("foo")) {
if (shardRouting.primary() == false) {
nodeIdOfFooReplica = shardRouting.currentNodeId();
break;
}
}
assertNotNull(nodeIdOfFooReplica);
// remove node that has replica and reroute
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove(nodeIdOfFooReplica)).build();
clusterState = allocationService.deassociateDeadNodes(clusterState, true, "fake node left");
ClusterState stateWithDelayedShard = clusterState;
// make sure the replica is marked as delayed (i.e. not reallocated)
assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShard));
ShardRouting delayedShard = stateWithDelayedShard.getRoutingNodes().unassigned().iterator().next();
assertEquals(nodeLeftTimestampNanos, delayedShard.unassignedInfo().getUnassignedTimeInNanos());
assertNull(delayedAllocationService.delayedRerouteTask.get());
long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int) shorterDelaySetting.nanos() - 1)).nanos();
long clusterChangeEventTimestampNanos = nodeLeftTimestampNanos + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShard, clusterState));
// check that delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask delayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(delayedRerouteTask);
assertFalse(delayedRerouteTask.cancelScheduling.get());
assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(delayedRerouteTask.nextDelay.nanos(), equalTo(delaySetting.nanos() - (clusterChangeEventTimestampNanos - nodeLeftTimestampNanos)));
if (randomBoolean()) {
// update settings with shorter delay
ClusterState stateWithShorterDelay = ClusterState.builder(stateWithDelayedShard).metaData(MetaData.builder(stateWithDelayedShard.metaData()).updateSettings(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), shorterDelaySetting).build(), "foo")).build();
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("apply shorter delay", stateWithShorterDelay, stateWithDelayedShard));
} else {
// node leaves with replica shard of index bar that has shorter delay
String nodeIdOfBarReplica = null;
for (ShardRouting shardRouting : stateWithDelayedShard.getRoutingTable().allShards("bar")) {
if (shardRouting.primary() == false) {
nodeIdOfBarReplica = shardRouting.currentNodeId();
break;
}
}
assertNotNull(nodeIdOfBarReplica);
// remove node that has replica and reroute
clusterState = ClusterState.builder(stateWithDelayedShard).nodes(DiscoveryNodes.builder(stateWithDelayedShard.nodes()).remove(nodeIdOfBarReplica)).build();
ClusterState stateWithShorterDelay = allocationService.deassociateDeadNodes(clusterState, true, "fake node left");
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithShorterDelay, stateWithDelayedShard));
}
// check that delayed reroute task was replaced by shorter reroute task
DelayedAllocationService.DelayedRerouteTask shorterDelayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(shorterDelayedRerouteTask);
assertNotEquals(shorterDelayedRerouteTask, delayedRerouteTask);
// existing task was cancelled
assertTrue(delayedRerouteTask.cancelScheduling.get());
assertFalse(shorterDelayedRerouteTask.cancelScheduling.get());
assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(shorterDelayedRerouteTask.nextDelay.nanos(), equalTo(shorterDelaySetting.nanos() - (clusterChangeEventTimestampNanos - nodeLeftTimestampNanos)));
}
use of org.elasticsearch.cluster.ClusterChangedEvent in project elasticsearch by elastic.
the class DelayedAllocationServiceTests method testDelayedUnassignedScheduleReroute.
public void testDelayedUnassignedScheduleReroute() throws Exception {
TimeValue delaySetting = timeValueMillis(100);
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delaySetting)).numberOfShards(1).numberOfReplicas(1)).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("test")).build()).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).localNodeId("node1").masterNodeId("node1")).build();
final long baseTimestampNanos = System.nanoTime();
allocationService.setNanoTimeOverride(baseTimestampNanos);
clusterState = allocationService.reroute(clusterState, "reroute");
// starting primaries
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// starting replicas
clusterState = allocationService.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertFalse("no shards should be unassigned", clusterState.getRoutingNodes().unassigned().size() > 0);
String nodeId = null;
final List<ShardRouting> allShards = clusterState.getRoutingTable().allShards("test");
// we need to find the node with the replica otherwise we will not reroute
for (ShardRouting shardRouting : allShards) {
if (shardRouting.primary() == false) {
nodeId = shardRouting.currentNodeId();
break;
}
}
assertNotNull(nodeId);
// remove node that has replica and reroute
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove(nodeId)).build();
clusterState = allocationService.deassociateDeadNodes(clusterState, true, "reroute");
ClusterState stateWithDelayedShard = clusterState;
// make sure the replica is marked as delayed (i.e. not reallocated)
assertEquals(1, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithDelayedShard));
ShardRouting delayedShard = stateWithDelayedShard.getRoutingNodes().unassigned().iterator().next();
assertEquals(baseTimestampNanos, delayedShard.unassignedInfo().getUnassignedTimeInNanos());
// mock ClusterService.submitStateUpdateTask() method
CountDownLatch latch = new CountDownLatch(1);
AtomicReference<ClusterStateUpdateTask> clusterStateUpdateTask = new AtomicReference<>();
doAnswer(invocationOnMock -> {
clusterStateUpdateTask.set((ClusterStateUpdateTask) invocationOnMock.getArguments()[1]);
latch.countDown();
return null;
}).when(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), any(ClusterStateUpdateTask.class));
assertNull(delayedAllocationService.delayedRerouteTask.get());
long delayUntilClusterChangeEvent = TimeValue.timeValueNanos(randomInt((int) delaySetting.nanos() - 1)).nanos();
long clusterChangeEventTimestampNanos = baseTimestampNanos + delayUntilClusterChangeEvent;
delayedAllocationService.setNanoTimeOverride(clusterChangeEventTimestampNanos);
delayedAllocationService.clusterChanged(new ClusterChangedEvent("fake node left", stateWithDelayedShard, clusterState));
// check that delayed reroute task was created and registered with the proper settings
DelayedAllocationService.DelayedRerouteTask delayedRerouteTask = delayedAllocationService.delayedRerouteTask.get();
assertNotNull(delayedRerouteTask);
assertFalse(delayedRerouteTask.cancelScheduling.get());
assertThat(delayedRerouteTask.baseTimestampNanos, equalTo(clusterChangeEventTimestampNanos));
assertThat(delayedRerouteTask.nextDelay.nanos(), equalTo(delaySetting.nanos() - (clusterChangeEventTimestampNanos - baseTimestampNanos)));
// check that submitStateUpdateTask() was invoked on the cluster service mock
assertTrue(latch.await(30, TimeUnit.SECONDS));
verify(clusterService).submitStateUpdateTask(eq(CLUSTER_UPDATE_TASK_SOURCE), eq(clusterStateUpdateTask.get()));
// advance the time on the allocation service to a timestamp that happened after the delayed scheduling
long nanoTimeForReroute = clusterChangeEventTimestampNanos + delaySetting.nanos() + timeValueMillis(randomInt(200)).nanos();
allocationService.setNanoTimeOverride(nanoTimeForReroute);
// apply cluster state
ClusterState stateWithRemovedDelay = clusterStateUpdateTask.get().execute(stateWithDelayedShard);
// check that shard is not delayed anymore
assertEquals(0, UnassignedInfo.getNumberOfDelayedUnassigned(stateWithRemovedDelay));
// check that task is now removed
assertNull(delayedAllocationService.delayedRerouteTask.get());
// simulate calling listener (cluster change event)
delayedAllocationService.setNanoTimeOverride(nanoTimeForReroute + timeValueMillis(randomInt(200)).nanos());
delayedAllocationService.clusterChanged(new ClusterChangedEvent(CLUSTER_UPDATE_TASK_SOURCE, stateWithRemovedDelay, stateWithDelayedShard));
// check that no new task is scheduled
assertNull(delayedAllocationService.delayedRerouteTask.get());
// check that no further cluster state update was submitted
verifyNoMoreInteractions(clusterService);
}
Aggregations