use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method waitingShardsStartedOrUnassigned.
private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.state() == State.STARTED) {
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
if (shardStatus.value.state() != ShardState.WAITING) {
continue;
}
final ShardId shardId = shardStatus.key;
if (event.indexRoutingTableChanged(shardId.getIndexName())) {
IndexRoutingTable indexShardRoutingTable = event.state().getRoutingTable().index(shardId.getIndex());
if (indexShardRoutingTable == null) {
// index got removed concurrently and we have to fail WAITING state shards
return true;
}
ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.id()).primaryShard();
if (shardRouting != null && (shardRouting.started() || shardRouting.unassigned())) {
return true;
}
}
}
}
}
return false;
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method deleteSnapshots.
/**
* Deletes snapshots from the repository. In-progress snapshots matched by the delete will be aborted before deleting them.
*
* @param request delete snapshot request
* @param listener listener
*/
public void deleteSnapshots(final DeleteSnapshotRequest request, final ActionListener<Void> listener) {
final String[] snapshotNames = request.snapshots();
final String repoName = request.repository();
logger.info(() -> new ParameterizedMessage("deleting snapshots [{}] from repository [{}]", Strings.arrayToCommaDelimitedString(snapshotNames), repoName));
final Repository repository = repositoriesService.repository(repoName);
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.NORMAL) {
private Snapshot runningSnapshot;
private ClusterStateUpdateTask deleteFromRepoTask;
private boolean abortedDuringInit = false;
private List<SnapshotId> outstandingDeletes;
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
if (snapshotNames.length > 1 && minNodeVersion.before(MULTI_DELETE_VERSION)) {
throw new IllegalArgumentException("Deleting multiple snapshots in a single request is only supported in version [ " + MULTI_DELETE_VERSION + "] but cluster contained node of version [" + currentState.nodes().getMinNodeVersion() + "]");
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> snapshotEntries = findInProgressSnapshots(snapshots, snapshotNames, repoName);
final List<SnapshotId> snapshotIds = matchingSnapshotIds(snapshotEntries.stream().map(e -> e.snapshot().getSnapshotId()).collect(Collectors.toList()), repositoryData, snapshotNames, repoName);
if (snapshotEntries.isEmpty() || minNodeVersion.onOrAfter(SnapshotsService.FULL_CONCURRENCY_VERSION)) {
deleteFromRepoTask = createDeleteStateUpdate(snapshotIds, repoName, repositoryData, Priority.NORMAL, listener);
return deleteFromRepoTask.execute(currentState);
}
assert snapshotEntries.size() == 1 : "Expected just a single running snapshot but saw " + snapshotEntries;
final SnapshotsInProgress.Entry snapshotEntry = snapshotEntries.get(0);
runningSnapshot = snapshotEntry.snapshot();
final ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards;
final State state = snapshotEntry.state();
final String failure;
outstandingDeletes = new ArrayList<>(snapshotIds);
if (state != State.INIT) {
// INIT state snapshots won't ever be physically written to the repository but all other states will end up in the repo
outstandingDeletes.add(runningSnapshot.getSnapshotId());
}
if (state == State.INIT) {
// snapshot is still initializing, mark it as aborted
shards = snapshotEntry.shards();
assert shards.isEmpty();
failure = "Snapshot was aborted during initialization";
abortedDuringInit = true;
} else if (state == State.STARTED) {
// snapshot is started - mark every non completed shard as aborted
final SnapshotsInProgress.Entry abortedEntry = snapshotEntry.abort();
shards = abortedEntry.shards();
failure = abortedEntry.failure();
} else {
boolean hasUncompletedShards = false;
// Cleanup in case a node gone missing and snapshot wasn't updated for some reason
for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshotEntry.shards().values()) {
// Check if we still have shard running on existing nodes
if (shardStatus.value.state().completed() == false && shardStatus.value.nodeId() != null && currentState.nodes().get(shardStatus.value.nodeId()) != null) {
hasUncompletedShards = true;
break;
}
}
if (hasUncompletedShards) {
// snapshot is being finalized - wait for shards to complete finalization process
logger.debug("trying to delete completed snapshot - should wait for shards to finalize on all nodes");
return currentState;
} else {
// no shards to wait for but a node is gone - this is the only case
// where we force to finish the snapshot
logger.debug("trying to delete completed snapshot with no finalizing shards - can delete immediately");
shards = snapshotEntry.shards();
}
failure = snapshotEntry.failure();
}
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(snapshots.entries().stream().filter(existing -> abortedDuringInit == false || existing.equals(snapshotEntry) == false).map(existing -> {
if (existing.equals(snapshotEntry)) {
return snapshotEntry.fail(shards, State.ABORTED, failure);
}
return existing;
}).collect(Collectors.toList()))).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (deleteFromRepoTask != null) {
assert outstandingDeletes == null : "Shouldn't have outstanding deletes after already starting delete task";
deleteFromRepoTask.clusterStateProcessed(source, oldState, newState);
return;
}
if (abortedDuringInit) {
// BwC Path where we removed an outdated INIT state snapshot from the cluster state
logger.info("Successfully aborted snapshot [{}]", runningSnapshot);
if (outstandingDeletes.isEmpty()) {
listener.onResponse(null);
} else {
clusterService.submitStateUpdateTask("delete snapshot", createDeleteStateUpdate(outstandingDeletes, repoName, repositoryData, Priority.IMMEDIATE, listener));
}
return;
}
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(runningSnapshot, ActionListener.wrap(result -> {
logger.debug("deleted snapshot completed - deleting files");
clusterService.submitStateUpdateTask("delete snapshot", createDeleteStateUpdate(outstandingDeletes, repoName, result.v1(), Priority.IMMEDIATE, listener));
}, e -> {
if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) {
logger.warn("cluster-manager failover before deleted snapshot could complete", e);
// Just pass the exception to the transport handler as is so it is retried on the new cluster-manager
listener.onFailure(e);
} else {
logger.warn("deleted snapshot failed", e);
listener.onFailure(new SnapshotMissingException(runningSnapshot.getRepository(), runningSnapshot.getSnapshotId(), e));
}
}));
}
@Override
public TimeValue timeout() {
return request.masterNodeTimeout();
}
}, "delete snapshot", listener::onFailure);
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotShardsService method startNewSnapshots.
private void startNewSnapshots(SnapshotsInProgress snapshotsInProgress) {
final String localNodeId = clusterService.localNode().getId();
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
final State entryState = entry.state();
if (entry.isClone()) {
// This is a snapshot clone, it will be executed on the current master
continue;
}
if (entryState == State.STARTED) {
Map<ShardId, IndexShardSnapshotStatus> startedShards = null;
final Snapshot snapshot = entry.snapshot();
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
// Add all new shards to start processing on
final ShardId shardId = shard.key;
final ShardSnapshotStatus shardSnapshotStatus = shard.value;
if (shardSnapshotStatus.state() == ShardState.INIT && localNodeId.equals(shardSnapshotStatus.nodeId()) && snapshotShards.containsKey(shardId) == false) {
logger.trace("[{}] - Adding shard to the queue", shardId);
if (startedShards == null) {
startedShards = new HashMap<>();
}
startedShards.put(shardId, IndexShardSnapshotStatus.newInitializing(shardSnapshotStatus.generation()));
}
}
if (startedShards != null && startedShards.isEmpty() == false) {
shardSnapshots.computeIfAbsent(snapshot, s -> new HashMap<>()).putAll(startedShards);
startNewShards(entry, startedShards);
}
} else if (entryState == State.ABORTED) {
// Abort all running shards for this snapshot
final Snapshot snapshot = entry.snapshot();
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
if (snapshotStatus == null) {
// notify cluster-manager that abort has completed by moving to FAILED
if (shard.value.state() == ShardState.ABORTED && localNodeId.equals(shard.value.nodeId())) {
notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason());
}
} else {
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
}
}
}
}
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotShardsService method syncShardStatsOnNewMaster.
/**
* Checks if any shards were processed that the new cluster-manager doesn't know about
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return;
}
// Clear request deduplicator since we need to send all requests that were potentially not handled by the previous
// cluster-manager again
remoteFailedRequestDeduplicator.clear();
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
Map<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshot());
if (localShards != null) {
ImmutableOpenMap<ShardId, ShardSnapshotStatus> masterShards = snapshot.shards();
for (Map.Entry<ShardId, IndexShardSnapshotStatus> localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
final IndexShardSnapshotStatus.Copy indexShardSnapshotStatus = localShard.getValue().asCopy();
final Stage stage = indexShardSnapshotStatus.getStage();
// cluster-manager knows about the shard and thinks it has not completed
if (stage == Stage.DONE) {
// but we think the shard is done - we need to make new cluster-manager know that the shard is done
logger.debug("[{}] new cluster-manager thinks the shard [{}] is not completed but the shard is done locally, " + "updating status on the master", snapshot.snapshot(), shardId);
notifySuccessfulSnapshotShard(snapshot.snapshot(), shardId, localShard.getValue().generation());
} else if (stage == Stage.FAILURE) {
// but we think the shard failed - we need to make new cluster-manager know that the shard failed
logger.debug("[{}] new cluster-manager thinks the shard [{}] is not completed but the shard failed locally, " + "updating status on master", snapshot.snapshot(), shardId);
notifyFailedSnapshotShard(snapshot.snapshot(), shardId, indexShardSnapshotStatus.getFailure());
}
}
}
}
}
}
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method startShardSnapshotAfterClone.
/**
* Creates a {@link ShardSnapshotStatus} entry for a snapshot after the shard has become available for snapshotting as a result
* of a snapshot clone completing.
*
* @param currentState current cluster state
* @param shardGeneration shard generation of the shard in the repository
* @param shardId shard id of the shard that just finished cloning
* @return shard snapshot status
*/
private static ShardSnapshotStatus startShardSnapshotAfterClone(ClusterState currentState, String shardGeneration, ShardId shardId) {
final ShardRouting primary = currentState.routingTable().index(shardId.getIndex()).shard(shardId.id()).primaryShard();
final ShardSnapshotStatus shardSnapshotStatus;
if (primary == null || !primary.assignedToNode()) {
shardSnapshotStatus = new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardGeneration);
} else if (primary.relocating() || primary.initializing()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardGeneration);
} else if (primary.started() == false) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, "primary shard hasn't been started yet", shardGeneration);
} else {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardGeneration);
}
return shardSnapshotStatus;
}
Aggregations