use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotShardsService method syncShardStatsOnNewMaster.
/**
* Checks if any shards were processed that the new master doesn't know about
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return;
}
final String localNodeId = event.state().nodes().getLocalNodeId();
final DiscoveryNode masterNode = event.state().nodes().getMasterNode();
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
Map<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshot());
if (localShards != null) {
ImmutableOpenMap<ShardId, ShardSnapshotStatus> masterShards = snapshot.shards();
for (Map.Entry<ShardId, IndexShardSnapshotStatus> localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
IndexShardSnapshotStatus localShardStatus = localShard.getValue();
ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
// Master knows about the shard and thinks it has not completed
if (localShardStatus.stage() == Stage.DONE) {
// but we think the shard is done - we need to make new master know that the shard is done
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, updating status on the master", snapshot.snapshot(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshot(), shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
} else if (localShard.getValue().stage() == Stage.FAILURE) {
// but we think the shard failed - we need to make new master know that the shard failed
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, updating status on master", snapshot.snapshot(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshot(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, localShardStatus.failure()), masterNode);
}
}
}
}
}
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotsService method processSnapshotsOnRemovedNodes.
/**
* Cleans up shard snapshots that were running on removed nodes
*
* @param event cluster changed event
*/
private void processSnapshotsOnRemovedNodes(ClusterChangedEvent event) {
if (removedNodesCleanupNeeded(event)) {
// Check if we just became the master
final boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster();
clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
DiscoveryNodes nodes = currentState.nodes();
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots == null) {
return currentState;
}
boolean changed = false;
ArrayList<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) {
SnapshotsInProgress.Entry updatedSnapshot = snapshot;
boolean snapshotChanged = false;
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshot.shards()) {
ShardSnapshotStatus shardStatus = shardEntry.value;
if (!shardStatus.state().completed() && shardStatus.nodeId() != null) {
if (nodes.nodeExists(shardStatus.nodeId())) {
shards.put(shardEntry.key, shardEntry.value);
} else {
// TODO: Restart snapshot on another node?
snapshotChanged = true;
logger.warn("failing snapshot of shard [{}] on closed node [{}]", shardEntry.key, shardStatus.nodeId());
shards.put(shardEntry.key, new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "node shutdown"));
}
}
}
if (snapshotChanged) {
changed = true;
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shardsMap = shards.build();
if (!snapshot.state().completed() && completed(shardsMap.values())) {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap);
endSnapshot(updatedSnapshot);
} else {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap);
}
}
entries.add(updatedSnapshot);
} else if (snapshot.state() == State.INIT && newMaster) {
// Clean up the snapshot that failed to start from the old master
deleteSnapshot(snapshot.snapshot(), new DeleteSnapshotListener() {
@Override
public void onResponse() {
logger.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
@Override
public void onFailure(Exception e) {
logger.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
}, updatedSnapshot.getRepositoryStateId(), false);
} else if (snapshot.state() == State.SUCCESS && newMaster) {
// Finalize the snapshot
endSnapshot(snapshot);
}
}
if (changed) {
snapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()]));
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build();
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
logger.warn("failed to update snapshot state after node removal");
}
});
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotsService method deleteSnapshot.
/**
* Deletes snapshot from repository.
* <p>
* If the snapshot is still running cancels the snapshot first and then deletes it from the repository.
*
* @param snapshot snapshot
* @param listener listener
* @param repositoryStateId the unique id for the state of the repository
*/
private void deleteSnapshot(final Snapshot snapshot, final DeleteSnapshotListener listener, final long repositoryStateId, final boolean immediatePriority) {
Priority priority = immediatePriority ? Priority.IMMEDIATE : Priority.NORMAL;
clusterService.submitStateUpdateTask("delete snapshot", new ClusterStateUpdateTask(priority) {
boolean waitForSnapshot = false;
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE);
if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) {
throw new ConcurrentSnapshotExecutionException(snapshot, "cannot delete - another snapshot is currently being deleted");
}
RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE);
if (restoreInProgress != null) {
// and the files the restore depends on would all be gone
if (restoreInProgress.entries().isEmpty() == false) {
throw new ConcurrentSnapshotExecutionException(snapshot, "cannot delete snapshot during a restore");
}
}
ClusterState.Builder clusterStateBuilder = ClusterState.builder(currentState);
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
SnapshotsInProgress.Entry snapshotEntry = snapshots != null ? snapshots.snapshot(snapshot) : null;
if (snapshotEntry == null) {
// This snapshot is not running - delete
if (snapshots != null && !snapshots.entries().isEmpty()) {
// However other snapshots are running - cannot continue
throw new ConcurrentSnapshotExecutionException(snapshot, "another snapshot is currently running cannot delete");
}
// add the snapshot deletion to the cluster state
SnapshotDeletionsInProgress.Entry entry = new SnapshotDeletionsInProgress.Entry(snapshot, System.currentTimeMillis(), repositoryStateId);
if (deletionsInProgress != null) {
deletionsInProgress = deletionsInProgress.withAddedEntry(entry);
} else {
deletionsInProgress = SnapshotDeletionsInProgress.newInstance(entry);
}
clusterStateBuilder.putCustom(SnapshotDeletionsInProgress.TYPE, deletionsInProgress);
} else {
// This snapshot is currently running - stopping shards first
waitForSnapshot = true;
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards;
if (snapshotEntry.state() == State.STARTED && snapshotEntry.shards() != null) {
// snapshot is currently running - stop started shards
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shardsBuilder = ImmutableOpenMap.builder();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshotEntry.shards()) {
ShardSnapshotStatus status = shardEntry.value;
if (!status.state().completed()) {
shardsBuilder.put(shardEntry.key, new ShardSnapshotStatus(status.nodeId(), State.ABORTED));
} else {
shardsBuilder.put(shardEntry.key, status);
}
}
shards = shardsBuilder.build();
} else if (snapshotEntry.state() == State.INIT) {
// snapshot hasn't started yet - end it
shards = snapshotEntry.shards();
endSnapshot(snapshotEntry);
} else {
boolean hasUncompletedShards = false;
// Cleanup in case a node gone missing and snapshot wasn't updated for some reason
for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshotEntry.shards().values()) {
// Check if we still have shard running on existing nodes
if (shardStatus.value.state().completed() == false && shardStatus.value.nodeId() != null && currentState.nodes().get(shardStatus.value.nodeId()) != null) {
hasUncompletedShards = true;
break;
}
}
if (hasUncompletedShards) {
// snapshot is being finalized - wait for shards to complete finalization process
logger.debug("trying to delete completed snapshot - should wait for shards to finalize on all nodes");
return currentState;
} else {
// no shards to wait for - finish the snapshot
logger.debug("trying to delete completed snapshot with no finalizing shards - can delete immediately");
shards = snapshotEntry.shards();
endSnapshot(snapshotEntry);
}
}
SnapshotsInProgress.Entry newSnapshot = new SnapshotsInProgress.Entry(snapshotEntry, State.ABORTED, shards);
snapshots = new SnapshotsInProgress(newSnapshot);
clusterStateBuilder.putCustom(SnapshotsInProgress.TYPE, snapshots);
}
return clusterStateBuilder.build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (waitForSnapshot) {
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(new SnapshotCompletionListener() {
@Override
public void onSnapshotCompletion(Snapshot completedSnapshot, SnapshotInfo snapshotInfo) {
if (completedSnapshot.equals(snapshot)) {
logger.trace("deleted snapshot completed - deleting files");
removeListener(this);
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> deleteSnapshot(completedSnapshot.getRepository(), completedSnapshot.getSnapshotId().getName(), listener, true));
}
}
@Override
public void onSnapshotFailure(Snapshot failedSnapshot, Exception e) {
if (failedSnapshot.equals(snapshot)) {
logger.trace("deleted snapshot failed - deleting files", e);
removeListener(this);
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> deleteSnapshot(failedSnapshot.getRepository(), failedSnapshot.getSnapshotId().getName(), listener, true));
}
}
});
} else {
logger.trace("deleted snapshot is not running - deleting files");
deleteSnapshotFromRepository(snapshot, listener, repositoryStateId);
}
}
});
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotsService method endSnapshot.
/**
* Finalizes the shard in repository and then removes it from cluster state
* <p>
* This is non-blocking method that runs on a thread from SNAPSHOT thread pool
*
* @param entry snapshot
* @param failure failure reason or null if snapshot was successful
*/
private void endSnapshot(final SnapshotsInProgress.Entry entry, final String failure) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new Runnable() {
@Override
public void run() {
final Snapshot snapshot = entry.snapshot();
try {
final Repository repository = repositoriesService.repository(snapshot.getRepository());
logger.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure);
ArrayList<ShardSearchFailure> failures = new ArrayList<>();
ArrayList<SnapshotShardFailure> shardFailures = new ArrayList<>();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
ShardId shardId = shardStatus.key;
ShardSnapshotStatus status = shardStatus.value;
if (status.state().failed()) {
failures.add(new ShardSearchFailure(status.reason(), new SearchShardTarget(status.nodeId(), shardId)));
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
}
}
SnapshotInfo snapshotInfo = repository.finalizeSnapshot(snapshot.getSnapshotId(), entry.indices(), entry.startTime(), failure, entry.shards().size(), Collections.unmodifiableList(shardFailures), entry.getRepositoryStateId());
removeSnapshotFromClusterState(snapshot, snapshotInfo, null);
} catch (Exception e) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
removeSnapshotFromClusterState(snapshot, null, e);
}
}
});
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SharedClusterSnapshotRestoreIT method testDeleteOrphanSnapshot.
public void testDeleteOrphanSnapshot() throws Exception {
Client client = client();
logger.info("--> creating repository");
final String repositoryName = "test-repo";
assertAcked(client.admin().cluster().preparePutRepository(repositoryName).setType("mock").setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean()).put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
logger.info("--> create the index");
final String idxName = "test-idx";
createIndex(idxName);
ensureGreen();
ClusterService clusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
final CountDownLatch countDownLatch = new CountDownLatch(1);
logger.info("--> snapshot");
final String snapshotName = "test-snap";
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot(repositoryName, snapshotName).setWaitForCompletion(true).setIndices(idxName).get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
logger.info("--> emulate an orphan snapshot");
RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, internalCluster().getMasterName());
final RepositoryData repositoryData = repositoriesService.repository(repositoryName).getRepositoryData();
final IndexId indexId = repositoryData.resolveIndexId(idxName);
clusterService.submitStateUpdateTask("orphan snapshot test", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
// Simulate orphan snapshot
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
shards.put(new ShardId(idxName, "_na_", 0), new ShardSnapshotStatus("unknown-node", State.ABORTED));
shards.put(new ShardId(idxName, "_na_", 1), new ShardSnapshotStatus("unknown-node", State.ABORTED));
shards.put(new ShardId(idxName, "_na_", 2), new ShardSnapshotStatus("unknown-node", State.ABORTED));
List<Entry> entries = new ArrayList<>();
entries.add(new Entry(new Snapshot(repositoryName, createSnapshotResponse.getSnapshotInfo().snapshotId()), true, false, State.ABORTED, Collections.singletonList(indexId), System.currentTimeMillis(), repositoryData.getGenId(), shards.build()));
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(Collections.unmodifiableList(entries))).build();
}
@Override
public void onFailure(String source, Exception e) {
fail();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
countDownLatch.countDown();
}
});
countDownLatch.await();
logger.info("--> try deleting the orphan snapshot");
assertAcked(client.admin().cluster().prepareDeleteSnapshot(repositoryName, snapshotName).get("10s"));
}
Aggregations