use of org.elasticsearch.cluster.ClusterStateUpdateTask in project crate by crate.
the class MasterServiceTests method testClusterStateUpdateLogging.
// To ensure that we log cluster state events on TRACE level
@TestLogging("org.elasticsearch.cluster.service:TRACE")
public void testClusterStateUpdateLogging() throws Exception {
MockLogAppender mockAppender = new MockLogAppender();
mockAppender.start();
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test1 start", MasterService.class.getCanonicalName(), Level.DEBUG, "executing cluster state update for [test1]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test1 computation", MasterService.class.getCanonicalName(), Level.DEBUG, "took [1s] to compute cluster state update for [test1]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test1 notification", MasterService.class.getCanonicalName(), Level.DEBUG, "took [0s] to notify listeners on unchanged cluster state for [test1]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2 start", MasterService.class.getCanonicalName(), Level.DEBUG, "executing cluster state update for [test2]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2 failure", MasterService.class.getCanonicalName(), Level.TRACE, "failed to execute cluster state update (on version: [*], uuid: [*]) for [test2]*"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2 computation", MasterService.class.getCanonicalName(), Level.DEBUG, "took [2s] to compute cluster state update for [test2]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test2 notification", MasterService.class.getCanonicalName(), Level.DEBUG, "took [0s] to notify listeners on unchanged cluster state for [test2]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test3 start", MasterService.class.getCanonicalName(), Level.DEBUG, "executing cluster state update for [test3]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test3 computation", MasterService.class.getCanonicalName(), Level.DEBUG, "took [3s] to compute cluster state update for [test3]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test3 notification", MasterService.class.getCanonicalName(), Level.DEBUG, "took [4s] to notify listeners on successful publication of cluster state (version: *, uuid: *) for [test3]"));
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test4", MasterService.class.getCanonicalName(), Level.DEBUG, "executing cluster state update for [test4]"));
Logger clusterLogger = LogManager.getLogger(MasterService.class);
Loggers.addAppender(clusterLogger, mockAppender);
try (MasterService masterService = createMasterService(true)) {
masterService.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
relativeTimeInMillis += TimeValue.timeValueSeconds(1).millis();
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
});
masterService.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
relativeTimeInMillis += TimeValue.timeValueSeconds(2).millis();
throw new IllegalArgumentException("Testing handling of exceptions in the cluster state task");
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
fail();
}
@Override
public void onFailure(String source, Exception e) {
}
});
masterService.submitStateUpdateTask("test3", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
relativeTimeInMillis += TimeValue.timeValueSeconds(3).millis();
return ClusterState.builder(currentState).incrementVersion().build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
relativeTimeInMillis += TimeValue.timeValueSeconds(4).millis();
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
});
masterService.submitStateUpdateTask("test4", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
});
assertBusy(mockAppender::assertAllExpectationsMatched);
} finally {
Loggers.removeAppender(clusterLogger, mockAppender);
mockAppender.stop();
}
}
use of org.elasticsearch.cluster.ClusterStateUpdateTask in project crate by crate.
the class BlockMasterServiceOnMaster method startDisrupting.
@Override
public void startDisrupting() {
disruptedNode = cluster.getMasterName();
final String disruptionNodeCopy = disruptedNode;
if (disruptionNodeCopy == null) {
return;
}
ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
if (clusterService == null) {
return;
}
logger.info("blocking master service on node [{}]", disruptionNodeCopy);
boolean success = disruptionLatch.compareAndSet(null, new CountDownLatch(1));
assert success : "startDisrupting called without waiting on stopDisrupting to complete";
final CountDownLatch started = new CountDownLatch(1);
clusterService.getMasterService().submitStateUpdateTask("service_disruption_block", new ClusterStateUpdateTask() {
@Override
public Priority priority() {
return Priority.IMMEDIATE;
}
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
started.countDown();
CountDownLatch latch = disruptionLatch.get();
if (latch != null) {
try {
latch.await();
} catch (InterruptedException e) {
Throwables.rethrow(e);
}
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
logger.error("unexpected error during disruption", e);
}
});
try {
started.await();
} catch (InterruptedException e) {
}
}
use of org.elasticsearch.cluster.ClusterStateUpdateTask in project crate by crate.
the class ClusterServiceUtils method setState.
public static void setState(MasterService executor, ClusterState clusterState) {
CountDownLatch latch = new CountDownLatch(1);
executor.submitStateUpdateTask("test setting state", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
// make sure we increment versions as listener may depend on it for change
return ClusterState.builder(clusterState).build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
latch.countDown();
}
@Override
public void onFailure(String source, Exception e) {
fail("unexpected exception" + e);
}
});
try {
latch.await();
} catch (InterruptedException e) {
throw new ElasticsearchException("unexpected interruption", e);
}
}
use of org.elasticsearch.cluster.ClusterStateUpdateTask in project crate by crate.
the class SnapshotsService method processSnapshotsOnRemovedNodes.
/**
* Cleans up shard snapshots that were running on removed nodes
*/
private void processSnapshotsOnRemovedNodes() {
clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
DiscoveryNodes nodes = currentState.nodes();
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots == null) {
return currentState;
}
boolean changed = false;
ArrayList<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) {
SnapshotsInProgress.Entry updatedSnapshot = snapshot;
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
boolean snapshotChanged = false;
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshot.shards()) {
final ShardSnapshotStatus shardStatus = shardEntry.value;
final ShardId shardId = shardEntry.key;
if (!shardStatus.state().completed() && shardStatus.nodeId() != null) {
if (nodes.nodeExists(shardStatus.nodeId())) {
shards.put(shardId, shardStatus);
} else {
// TODO: Restart snapshot on another node?
snapshotChanged = true;
LOGGER.warn("failing snapshot of shard [{}] on closed node [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), ShardState.FAILED, "node shutdown", shardStatus.generation()));
}
} else {
shards.put(shardId, shardStatus);
}
}
if (snapshotChanged) {
changed = true;
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shardsMap = shards.build();
if (!snapshot.state().completed() && completed(shardsMap.values())) {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap);
} else {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap);
}
}
entries.add(updatedSnapshot);
} else if (snapshot.state() == State.INIT && initializingSnapshots.contains(snapshot.snapshot()) == false) {
changed = true;
// Mark the snapshot as aborted as it failed to start from the previous master
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.ABORTED, snapshot.shards());
entries.add(updatedSnapshot);
// Clean up the snapshot that failed to start from the old master
deleteSnapshot(snapshot.snapshot(), new ActionListener<Void>() {
@Override
public void onResponse(Void aVoid) {
LOGGER.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
@Override
public void onFailure(Exception e) {
LOGGER.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
}, updatedSnapshot.repositoryStateId(), false);
}
assert updatedSnapshot.shards().size() == snapshot.shards().size() : "Shard count changed during snapshot status update from [" + snapshot + "] to [" + updatedSnapshot + "]";
}
if (changed) {
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build();
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
LOGGER.warn("failed to update snapshot state after node removal");
}
});
}
use of org.elasticsearch.cluster.ClusterStateUpdateTask in project crate by crate.
the class SnapshotsService method beginSnapshot.
/**
* Starts snapshot.
* <p>
* Creates snapshot in repository and updates snapshot metadata record with list of shards that needs to be processed.
*
* @param clusterState cluster state
* @param snapshot snapshot meta data
* @param partial allow partial snapshots
* @param userCreateSnapshotListener listener
*/
private void beginSnapshot(final ClusterState clusterState, final SnapshotsInProgress.Entry snapshot, final boolean partial, final ActionListener<Snapshot> userCreateSnapshotListener) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() {
boolean snapshotCreated;
boolean hadAbortedInitializations;
@Override
protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot());
Repository repository = repositoriesService.repository(snapshot.snapshot().getRepository());
if (repository.isReadOnly()) {
throw new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository");
}
final String snapshotName = snapshot.snapshot().getSnapshotId().getName();
final StepListener<RepositoryData> repositoryDataListener = new StepListener<>();
repository.getRepositoryData(repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists");
}
snapshotCreated = true;
LOGGER.info("snapshot [{}] started", snapshot.snapshot());
if (snapshot.indices().isEmpty()) {
// No indices in this snapshot - we are done
userCreateSnapshotListener.onResponse(snapshot.snapshot());
endSnapshot(snapshot, clusterState.metadata());
return;
}
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshot() + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
List<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.snapshot().equals(snapshot.snapshot()) == false) {
entries.add(entry);
continue;
}
if (entry.state() == State.ABORTED) {
entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else {
// Replace the snapshot that was just initialized
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards = shards(currentState, entry, repositoryData);
if (!partial) {
Tuple<Set<String>, Set<String>> indicesWithMissingShards = indicesWithMissingShards(shards, currentState.metadata());
Set<String> missing = indicesWithMissingShards.v1();
Set<String> closed = indicesWithMissingShards.v2();
if (missing.isEmpty() == false || closed.isEmpty() == false) {
final StringBuilder failureMessage = new StringBuilder();
if (missing.isEmpty() == false) {
failureMessage.append("Indices don't have primary shards ");
failureMessage.append(missing);
}
if (closed.isEmpty() == false) {
if (failureMessage.length() > 0) {
failureMessage.append("; ");
}
failureMessage.append("Indices are closed ");
failureMessage.append(closed);
}
entries.add(new SnapshotsInProgress.Entry(entry, State.FAILED, shards, failureMessage.toString()));
continue;
}
}
entries.add(new SnapshotsInProgress.Entry(entry, State.STARTED, shards));
}
}
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build();
}
@Override
public void onFailure(String source, Exception e) {
LOGGER.warn(() -> new ParameterizedMessage("[{}] failed to create snapshot", snapshot.snapshot().getSnapshotId()), e);
removeSnapshotFromClusterState(snapshot.snapshot(), null, e, new CleanupAfterErrorListener(snapshot, true, userCreateSnapshotListener, e));
}
@Override
public void onNoLongerMaster(String source) {
// We are not longer a master - we shouldn't try to do any cleanup
// The new master will take care of it
LOGGER.warn("[{}] failed to create snapshot - no longer a master", snapshot.snapshot().getSnapshotId());
userCreateSnapshotListener.onFailure(new SnapshotException(snapshot.snapshot(), "master changed during snapshot initialization"));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
// The userCreateSnapshotListener.onResponse() notifies caller that the snapshot was accepted
// for processing. If client wants to wait for the snapshot completion, it can register snapshot
// completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry, newState.metadata());
}
}
});
}, this::onFailure);
}
@Override
public void onFailure(Exception e) {
LOGGER.warn(() -> new ParameterizedMessage("failed to create snapshot [{}]", snapshot.snapshot().getSnapshotId()), e);
removeSnapshotFromClusterState(snapshot.snapshot(), null, e, new CleanupAfterErrorListener(snapshot, snapshotCreated, userCreateSnapshotListener, e));
}
});
}
Aggregations