use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method shards.
/**
* Calculates the assignment of shards to data nodes for a new snapshot based on the given cluster state and the
* indices that should be included in the snapshot.
*
* @param indices Indices to snapshot
* @param useShardGenerations whether to write {@link ShardGenerations} during the snapshot
* @return list of shard to be included into current snapshot
*/
private static ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(SnapshotsInProgress snapshotsInProgress, @Nullable SnapshotDeletionsInProgress deletionsInProgress, Metadata metadata, RoutingTable routingTable, List<IndexId> indices, boolean useShardGenerations, RepositoryData repositoryData, String repoName) {
ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream().noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED);
for (IndexId index : indices) {
final String indexName = index.getName();
final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
IndexMetadata indexMetadata = metadata.index(indexName);
if (indexMetadata == null) {
// The index was deleted before we managed to start the snapshot - mark it as missing.
builder.put(new ShardId(indexName, IndexMetadata.INDEX_UUID_NA_VALUE, 0), ShardSnapshotStatus.MISSING);
} else {
final IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
final ShardId shardId = indexRoutingTable.shard(i).shardId();
final String shardRepoGeneration;
if (useShardGenerations) {
final String inFlightGeneration = inFlightShardStates.generationForShard(index, shardId.id(), shardGenerations);
if (inFlightGeneration == null && isNewIndex) {
assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index [" + index + "]";
shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
} else {
shardRepoGeneration = inFlightGeneration;
}
} else {
shardRepoGeneration = null;
}
final ShardSnapshotStatus shardSnapshotStatus;
if (indexRoutingTable == null) {
shardSnapshotStatus = new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "missing routing table", shardRepoGeneration);
} else {
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
if (readyToExecute == false || inFlightShardStates.isActive(indexName, i)) {
shardSnapshotStatus = ShardSnapshotStatus.UNASSIGNED_QUEUED;
} else if (primary == null || !primary.assignedToNode()) {
shardSnapshotStatus = new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardRepoGeneration);
} else if (primary.relocating() || primary.initializing()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardRepoGeneration);
} else if (!primary.started()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, "primary shard hasn't been started yet", shardRepoGeneration);
} else {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardRepoGeneration);
}
}
builder.put(shardId, shardSnapshotStatus);
}
}
}
return builder.build();
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method beginSnapshot.
/**
* Starts snapshot.
* <p>
* Creates snapshot in repository and updates snapshot metadata record with list of shards that needs to be processed.
* Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
* compatible path for initializing the snapshot in the repository is executed.
*
* @param clusterState cluster state
* @param snapshot snapshot meta data
* @param partial allow partial snapshots
* @param userCreateSnapshotListener listener
*/
private void beginSnapshot(final ClusterState clusterState, final SnapshotsInProgress.Entry snapshot, final boolean partial, final List<String> indices, final Repository repository, final ActionListener<Snapshot> userCreateSnapshotListener) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() {
boolean hadAbortedInitializations;
@Override
protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot());
if (repository.isReadOnly()) {
throw new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository");
}
final String snapshotName = snapshot.snapshot().getSnapshotId().getName();
final StepListener<RepositoryData> repositoryDataListener = new StepListener<>();
repository.getRepositoryData(repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists");
}
if (clusterState.nodes().getMinNodeVersion().onOrAfter(NO_REPO_INITIALIZE_VERSION) == false) {
// In mixed version clusters we initialize the snapshot in the repository so that in case of a master failover to an
// older version master node snapshot finalization (that assumes initializeSnapshot was called) produces a valid
// snapshot.
repository.initializeSnapshot(snapshot.snapshot().getSnapshotId(), snapshot.indices(), metadataForSnapshot(snapshot, clusterState.metadata()));
}
logger.info("snapshot [{}] started", snapshot.snapshot());
final Version version = minCompatibleVersion(clusterState.nodes().getMinNodeVersion(), repositoryData, null);
if (indices.isEmpty()) {
// No indices in this snapshot - we are done
userCreateSnapshotListener.onResponse(snapshot.snapshot());
endSnapshot(SnapshotsInProgress.startedEntry(snapshot.snapshot(), snapshot.includeGlobalState(), snapshot.partial(), Collections.emptyList(), Collections.emptyList(), threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), ImmutableOpenMap.of(), snapshot.userMetadata(), version), clusterState.metadata(), repositoryData);
return;
}
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshot() + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
List<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.snapshot().equals(snapshot.snapshot()) == false) {
entries.add(entry);
continue;
}
if (entry.state() == State.ABORTED) {
entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else {
final List<IndexId> indexIds = repositoryData.resolveNewIndices(indices, Collections.emptyMap());
// Replace the snapshot that was just initialized
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards = shards(snapshots, currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY), currentState.metadata(), currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, entry.repository());
if (!partial) {
Tuple<Set<String>, Set<String>> indicesWithMissingShards = indicesWithMissingShards(shards, currentState.metadata());
Set<String> missing = indicesWithMissingShards.v1();
Set<String> closed = indicesWithMissingShards.v2();
if (missing.isEmpty() == false || closed.isEmpty() == false) {
final StringBuilder failureMessage = new StringBuilder();
if (missing.isEmpty() == false) {
failureMessage.append("Indices don't have primary shards ");
failureMessage.append(missing);
}
if (closed.isEmpty() == false) {
if (failureMessage.length() > 0) {
failureMessage.append("; ");
}
failureMessage.append("Indices are closed ");
failureMessage.append(closed);
}
entries.add(new SnapshotsInProgress.Entry(entry, State.FAILED, indexIds, repositoryData.getGenId(), shards, version, failureMessage.toString()));
continue;
}
}
entries.add(new SnapshotsInProgress.Entry(entry, State.STARTED, indexIds, repositoryData.getGenId(), shards, version, null));
}
}
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries))).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to create snapshot", snapshot.snapshot().getSnapshotId()), e);
removeFailedSnapshotFromClusterState(snapshot.snapshot(), e, null, new CleanupAfterErrorListener(userCreateSnapshotListener, e));
}
@Override
public void onNoLongerMaster(String source) {
// We are not longer a master - we shouldn't try to do any cleanup
// The new master will take care of it
logger.warn("[{}] failed to create snapshot - no longer a master", snapshot.snapshot().getSnapshotId());
userCreateSnapshotListener.onFailure(new SnapshotException(snapshot.snapshot(), "master changed during snapshot initialization"));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
// The userCreateSnapshotListener.onResponse() notifies caller that the snapshot was accepted
// for processing. If client wants to wait for the snapshot completion, it can register snapshot
// completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry, newState.metadata(), repositoryData);
} else {
endCompletedSnapshots(newState);
}
}
});
}, this::onFailure);
}
@Override
public void onFailure(Exception e) {
logger.warn(() -> new ParameterizedMessage("failed to create snapshot [{}]", snapshot.snapshot().getSnapshotId()), e);
removeFailedSnapshotFromClusterState(snapshot.snapshot(), e, null, new CleanupAfterErrorListener(userCreateSnapshotListener, e));
}
});
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method deleteSnapshots.
/**
* Deletes snapshots from the repository. In-progress snapshots matched by the delete will be aborted before deleting them.
*
* @param request delete snapshot request
* @param listener listener
*/
public void deleteSnapshots(final DeleteSnapshotRequest request, final ActionListener<Void> listener) {
final String[] snapshotNames = request.snapshots();
final String repoName = request.repository();
logger.info(() -> new ParameterizedMessage("deleting snapshots [{}] from repository [{}]", Strings.arrayToCommaDelimitedString(snapshotNames), repoName));
final Repository repository = repositoriesService.repository(repoName);
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.NORMAL) {
private Snapshot runningSnapshot;
private ClusterStateUpdateTask deleteFromRepoTask;
private boolean abortedDuringInit = false;
private List<SnapshotId> outstandingDeletes;
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
if (snapshotNames.length > 1 && minNodeVersion.before(MULTI_DELETE_VERSION)) {
throw new IllegalArgumentException("Deleting multiple snapshots in a single request is only supported in version [ " + MULTI_DELETE_VERSION + "] but cluster contained node of version [" + currentState.nodes().getMinNodeVersion() + "]");
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> snapshotEntries = findInProgressSnapshots(snapshots, snapshotNames, repoName);
final List<SnapshotId> snapshotIds = matchingSnapshotIds(snapshotEntries.stream().map(e -> e.snapshot().getSnapshotId()).collect(Collectors.toList()), repositoryData, snapshotNames, repoName);
if (snapshotEntries.isEmpty() || minNodeVersion.onOrAfter(SnapshotsService.FULL_CONCURRENCY_VERSION)) {
deleteFromRepoTask = createDeleteStateUpdate(snapshotIds, repoName, repositoryData, Priority.NORMAL, listener);
return deleteFromRepoTask.execute(currentState);
}
assert snapshotEntries.size() == 1 : "Expected just a single running snapshot but saw " + snapshotEntries;
final SnapshotsInProgress.Entry snapshotEntry = snapshotEntries.get(0);
runningSnapshot = snapshotEntry.snapshot();
final ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards;
final State state = snapshotEntry.state();
final String failure;
outstandingDeletes = new ArrayList<>(snapshotIds);
if (state != State.INIT) {
// INIT state snapshots won't ever be physically written to the repository but all other states will end up in the repo
outstandingDeletes.add(runningSnapshot.getSnapshotId());
}
if (state == State.INIT) {
// snapshot is still initializing, mark it as aborted
shards = snapshotEntry.shards();
assert shards.isEmpty();
failure = "Snapshot was aborted during initialization";
abortedDuringInit = true;
} else if (state == State.STARTED) {
// snapshot is started - mark every non completed shard as aborted
final SnapshotsInProgress.Entry abortedEntry = snapshotEntry.abort();
shards = abortedEntry.shards();
failure = abortedEntry.failure();
} else {
boolean hasUncompletedShards = false;
// Cleanup in case a node gone missing and snapshot wasn't updated for some reason
for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshotEntry.shards().values()) {
// Check if we still have shard running on existing nodes
if (shardStatus.value.state().completed() == false && shardStatus.value.nodeId() != null && currentState.nodes().get(shardStatus.value.nodeId()) != null) {
hasUncompletedShards = true;
break;
}
}
if (hasUncompletedShards) {
// snapshot is being finalized - wait for shards to complete finalization process
logger.debug("trying to delete completed snapshot - should wait for shards to finalize on all nodes");
return currentState;
} else {
// no shards to wait for but a node is gone - this is the only case
// where we force to finish the snapshot
logger.debug("trying to delete completed snapshot with no finalizing shards - can delete immediately");
shards = snapshotEntry.shards();
}
failure = snapshotEntry.failure();
}
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(snapshots.entries().stream().filter(existing -> abortedDuringInit == false || existing.equals(snapshotEntry) == false).map(existing -> {
if (existing.equals(snapshotEntry)) {
return snapshotEntry.fail(shards, State.ABORTED, failure);
}
return existing;
}).collect(Collectors.toList()))).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (deleteFromRepoTask != null) {
assert outstandingDeletes == null : "Shouldn't have outstanding deletes after already starting delete task";
deleteFromRepoTask.clusterStateProcessed(source, oldState, newState);
return;
}
if (abortedDuringInit) {
// BwC Path where we removed an outdated INIT state snapshot from the cluster state
logger.info("Successfully aborted snapshot [{}]", runningSnapshot);
if (outstandingDeletes.isEmpty()) {
listener.onResponse(null);
} else {
clusterService.submitStateUpdateTask("delete snapshot", createDeleteStateUpdate(outstandingDeletes, repoName, repositoryData, Priority.IMMEDIATE, listener));
}
return;
}
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(runningSnapshot, ActionListener.wrap(result -> {
logger.debug("deleted snapshot completed - deleting files");
clusterService.submitStateUpdateTask("delete snapshot", createDeleteStateUpdate(outstandingDeletes, repoName, result.v1(), Priority.IMMEDIATE, listener));
}, e -> {
if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) {
logger.warn("master failover before deleted snapshot could complete", e);
// Just pass the exception to the transport handler as is so it is retried on the new master
listener.onFailure(e);
} else {
logger.warn("deleted snapshot failed", e);
listener.onFailure(new SnapshotMissingException(runningSnapshot.getRepository(), runningSnapshot.getSnapshotId(), e));
}
}));
}
@Override
public TimeValue timeout() {
return request.masterNodeTimeout();
}
}, "delete snapshot", listener::onFailure);
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method waitingShardsStartedOrUnassigned.
private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.state() == State.STARTED) {
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
if (shardStatus.value.state() != ShardState.WAITING) {
continue;
}
final ShardId shardId = shardStatus.key;
if (event.indexRoutingTableChanged(shardId.getIndexName())) {
IndexRoutingTable indexShardRoutingTable = event.state().getRoutingTable().index(shardId.getIndex());
if (indexShardRoutingTable == null) {
// index got removed concurrently and we have to fail WAITING state shards
return true;
}
ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.id()).primaryShard();
if (shardRouting != null && (shardRouting.started() || shardRouting.unassigned())) {
return true;
}
}
}
}
}
return false;
}
use of org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project OpenSearch by opensearch-project.
the class SnapshotsService method startCloning.
/**
* Determine the number of shards in each index of a clone operation and update the cluster state accordingly.
*
* @param repository repository to run operation on
* @param cloneEntry clone operation in the cluster state
*/
private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) {
final List<IndexId> indices = cloneEntry.indices();
final SnapshotId sourceSnapshot = cloneEntry.source();
final Snapshot targetSnapshot = cloneEntry.snapshot();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// Exception handler for IO exceptions with loading index and repo metadata
final Consumer<Exception> onFailure = e -> {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null);
};
// 1. step, load SnapshotInfo to make sure that source snapshot was successful for the indices we want to clone
// TODO: we could skip this step for snapshots with state SUCCESS
final StepListener<SnapshotInfo> snapshotInfoListener = new StepListener<>();
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshot)));
final StepListener<Collection<Tuple<IndexId, Integer>>> allShardCountsListener = new StepListener<>();
final GroupedActionListener<Tuple<IndexId, Integer>> shardCountListener = new GroupedActionListener<>(allShardCountsListener, indices.size());
snapshotInfoListener.whenComplete(snapshotInfo -> {
for (IndexId indexId : indices) {
if (RestoreService.failed(snapshotInfo, indexId.getName())) {
throw new SnapshotException(targetSnapshot, "Can't clone index [" + indexId + "] because its snapshot was not successful.");
}
}
// 2. step, load the number of shards we have in each index to be cloned from the index metadata.
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
for (IndexId index : indices) {
executor.execute(ActionRunnable.supply(shardCountListener, () -> {
final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index);
return Tuple.tuple(index, metadata.getNumberOfShards());
}));
}
}, onFailure));
}, onFailure);
// 3. step, we have all the shard counts, now update the cluster state to have clone jobs in the snap entry
allShardCountsListener.whenComplete(counts -> repository.executeConsistentStateUpdate(repoData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry updatedEntry;
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> updatedEntries = new ArrayList<>(snapshotsInProgress.entries());
boolean changed = false;
final String localNodeId = currentState.nodes().getLocalNodeId();
final String repoName = cloneEntry.repository();
final ShardGenerations shardGenerations = repoData.shardGenerations();
for (int i = 0; i < updatedEntries.size(); i++) {
if (cloneEntry.snapshot().equals(updatedEntries.get(i).snapshot())) {
final ImmutableOpenMap.Builder<RepositoryShardId, ShardSnapshotStatus> clonesBuilder = ImmutableOpenMap.builder();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
for (Tuple<IndexId, Integer> count : counts) {
for (int shardId = 0; shardId < count.v2(); shardId++) {
final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId);
final String indexName = repoShardId.indexName();
if (inFlightShardStates.isActive(indexName, shardId)) {
clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED);
} else {
clonesBuilder.put(repoShardId, new ShardSnapshotStatus(localNodeId, inFlightShardStates.generationForShard(repoShardId.index(), shardId, shardGenerations)));
}
}
}
updatedEntry = cloneEntry.withClones(clonesBuilder.build());
updatedEntries.set(i, updatedEntry);
changed = true;
break;
}
}
return updateWithSnapshots(currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null);
}
@Override
public void onFailure(String source, Exception e) {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
initializingClones.remove(targetSnapshot);
if (updatedEntry != null) {
final Snapshot target = updatedEntry.snapshot();
final SnapshotId sourceSnapshot = updatedEntry.source();
for (ObjectObjectCursor<RepositoryShardId, ShardSnapshotStatus> indexClone : updatedEntry.clones()) {
final ShardSnapshotStatus shardStatusBefore = indexClone.value;
if (shardStatusBefore.state() != ShardState.INIT) {
continue;
}
final RepositoryShardId repoShardId = indexClone.key;
runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository);
}
} else {
// Extremely unlikely corner case of master failing over between between starting the clone and
// starting shard clones.
logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry);
}
}
}, "start snapshot clone", onFailure), onFailure);
}
Aggregations