use of org.opensearch.cluster.RestoreInProgress in project OpenSearch by opensearch-project.
the class RestoreClusterStateListener method clusterChanged.
@Override
public void clusterChanged(ClusterChangedEvent changedEvent) {
final RestoreInProgress.Entry prevEntry = restoreInProgress(changedEvent.previousState(), uuid);
final RestoreInProgress.Entry newEntry = restoreInProgress(changedEvent.state(), uuid);
if (prevEntry == null) {
// When there is a master failure after a restore has been started, this listener might not be registered
// on the current master and as such it might miss some intermediary cluster states due to batching.
// Clean up listener in that case and acknowledge completion of restore operation to client.
clusterService.removeListener(this);
listener.onResponse(new RestoreSnapshotResponse((RestoreInfo) null));
} else if (newEntry == null) {
clusterService.removeListener(this);
ImmutableOpenMap<ShardId, RestoreInProgress.ShardRestoreStatus> shards = prevEntry.shards();
assert prevEntry.state().completed() : "expected completed snapshot state but was " + prevEntry.state();
assert RestoreService.completed(shards) : "expected all restore entries to be completed";
RestoreInfo ri = new RestoreInfo(prevEntry.snapshot().getSnapshotId().getName(), prevEntry.indices(), shards.size(), shards.size() - RestoreService.failedShards(shards));
RestoreSnapshotResponse response = new RestoreSnapshotResponse(ri);
logger.debug("restore of [{}] completed", prevEntry.snapshot().getSnapshotId());
listener.onResponse(response);
} else {
// restore not completed yet, wait for next cluster state update
}
}
use of org.opensearch.cluster.RestoreInProgress in project OpenSearch by opensearch-project.
the class RestoreInProgressAllocationDecider method canAllocate.
@Override
public Decision canAllocate(final ShardRouting shardRouting, final RoutingAllocation allocation) {
final RecoverySource recoverySource = shardRouting.recoverySource();
if (recoverySource == null || recoverySource.getType() != RecoverySource.Type.SNAPSHOT) {
return allocation.decision(Decision.YES, NAME, "ignored as shard is not being recovered from a snapshot");
}
final RecoverySource.SnapshotRecoverySource source = (RecoverySource.SnapshotRecoverySource) recoverySource;
if (source.restoreUUID().equals(RecoverySource.SnapshotRecoverySource.NO_API_RESTORE_UUID)) {
return allocation.decision(Decision.YES, NAME, "not an API-level restore");
}
final RestoreInProgress restoresInProgress = allocation.custom(RestoreInProgress.TYPE);
if (restoresInProgress != null) {
RestoreInProgress.Entry restoreInProgress = restoresInProgress.get(source.restoreUUID());
if (restoreInProgress != null) {
RestoreInProgress.ShardRestoreStatus shardRestoreStatus = restoreInProgress.shards().get(shardRouting.shardId());
if (shardRestoreStatus != null && shardRestoreStatus.state().completed() == false) {
assert shardRestoreStatus.state() != RestoreInProgress.State.SUCCESS : "expected shard [" + shardRouting + "] to be in initializing state but got [" + shardRestoreStatus.state() + "]";
return allocation.decision(Decision.YES, NAME, "shard is currently being restored");
}
}
}
return allocation.decision(Decision.NO, NAME, "shard has failed to be restored from the snapshot [%s] because of [%s] - " + "manually close or delete the index [%s] in order to retry to restore the snapshot again or use the reroute API to force the " + "allocation of an empty primary shard", source.snapshot(), shardRouting.unassignedInfo().getDetails(), shardRouting.getIndexName());
}
use of org.opensearch.cluster.RestoreInProgress in project OpenSearch by opensearch-project.
the class SnapshotsService method createDeleteStateUpdate.
private ClusterStateUpdateTask createDeleteStateUpdate(List<SnapshotId> snapshotIds, String repoName, RepositoryData repositoryData, Priority priority, ActionListener<Void> listener) {
// Short circuit to noop state update if there isn't anything to delete
if (snapshotIds.isEmpty()) {
return new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
listener.onResponse(null);
}
};
}
return new ClusterStateUpdateTask(priority) {
private SnapshotDeletionsInProgress.Entry newDelete;
private boolean reusedExistingDelete = false;
// Snapshots that had all of their shard snapshots in queued state and thus were removed from the
// cluster state right away
private final Collection<Snapshot> completedNoCleanup = new ArrayList<>();
// Snapshots that were aborted and that already wrote data to the repository and now have to be deleted
// from the repository after the cluster state update
private final Collection<SnapshotsInProgress.Entry> completedWithCleanup = new ArrayList<>();
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY);
final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
if (minNodeVersion.before(FULL_CONCURRENCY_VERSION)) {
if (deletionsInProgress.hasDeletionsInProgress()) {
throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete - another snapshot is currently being deleted in [" + deletionsInProgress + "]");
}
}
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY);
if (repositoryCleanupInProgress.hasCleanupInProgress()) {
throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete snapshots while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]");
}
final RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY);
for (RestoreInProgress.Entry entry : restoreInProgress) {
if (repoName.equals(entry.snapshot().getRepository()) && snapshotIds.contains(entry.snapshot().getSnapshotId())) {
throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete snapshot during a restore in progress in [" + restoreInProgress + "]");
}
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final Set<SnapshotId> activeCloneSources = snapshots.entries().stream().filter(SnapshotsInProgress.Entry::isClone).map(SnapshotsInProgress.Entry::source).collect(Collectors.toSet());
for (SnapshotId snapshotId : snapshotIds) {
if (activeCloneSources.contains(snapshotId)) {
throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotId), "cannot delete snapshot while it is being cloned");
}
}
// Snapshot ids that will have to be physically deleted from the repository
final Set<SnapshotId> snapshotIdsRequiringCleanup = new HashSet<>(snapshotIds);
final SnapshotsInProgress updatedSnapshots;
if (minNodeVersion.onOrAfter(FULL_CONCURRENCY_VERSION)) {
updatedSnapshots = SnapshotsInProgress.of(snapshots.entries().stream().map(existing -> {
if (existing.state() == State.STARTED && snapshotIdsRequiringCleanup.contains(existing.snapshot().getSnapshotId())) {
// snapshot is started - mark every non completed shard as aborted
final SnapshotsInProgress.Entry abortedEntry = existing.abort();
if (abortedEntry == null) {
// No work has been done for this snapshot yet so we remove it from the cluster state directly
final Snapshot existingNotYetStartedSnapshot = existing.snapshot();
// any leaked listener assertions
if (endingSnapshots.add(existingNotYetStartedSnapshot)) {
completedNoCleanup.add(existingNotYetStartedSnapshot);
}
snapshotIdsRequiringCleanup.remove(existingNotYetStartedSnapshot.getSnapshotId());
} else if (abortedEntry.state().completed()) {
completedWithCleanup.add(abortedEntry);
}
return abortedEntry;
}
return existing;
}).filter(Objects::nonNull).collect(Collectors.toList()));
if (snapshotIdsRequiringCleanup.isEmpty()) {
// We only saw snapshots that could be removed from the cluster state right away, no need to update the deletions
return updateWithSnapshots(currentState, updatedSnapshots, null);
}
} else {
if (snapshots.entries().isEmpty() == false) {
// However other snapshots are running - cannot continue
throw new ConcurrentSnapshotExecutionException(repoName, snapshotIds.toString(), "another snapshot is currently running cannot delete");
}
updatedSnapshots = snapshots;
}
// add the snapshot deletion to the cluster state
final SnapshotDeletionsInProgress.Entry replacedEntry = deletionsInProgress.getEntries().stream().filter(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.WAITING).findFirst().orElse(null);
if (replacedEntry == null) {
final Optional<SnapshotDeletionsInProgress.Entry> foundDuplicate = deletionsInProgress.getEntries().stream().filter(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED && entry.getSnapshots().containsAll(snapshotIds)).findFirst();
if (foundDuplicate.isPresent()) {
newDelete = foundDuplicate.get();
reusedExistingDelete = true;
return currentState;
}
final List<SnapshotId> toDelete = Collections.unmodifiableList(new ArrayList<>(snapshotIdsRequiringCleanup));
ensureBelowConcurrencyLimit(repoName, toDelete.get(0).getName(), snapshots, deletionsInProgress);
newDelete = new SnapshotDeletionsInProgress.Entry(toDelete, repoName, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), updatedSnapshots.entries().stream().filter(entry -> repoName.equals(entry.repository())).noneMatch(SnapshotsService::isWritingToRepository) && deletionsInProgress.getEntries().stream().noneMatch(entry -> repoName.equals(entry.repository()) && entry.state() == SnapshotDeletionsInProgress.State.STARTED) ? SnapshotDeletionsInProgress.State.STARTED : SnapshotDeletionsInProgress.State.WAITING);
} else {
newDelete = replacedEntry.withAddedSnapshots(snapshotIdsRequiringCleanup);
}
return updateWithSnapshots(currentState, updatedSnapshots, (replacedEntry == null ? deletionsInProgress : deletionsInProgress.withRemovedEntry(replacedEntry.uuid())).withAddedEntry(newDelete));
}
@Override
public void onFailure(String source, Exception e) {
endingSnapshots.removeAll(completedNoCleanup);
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (completedNoCleanup.isEmpty() == false) {
logger.info("snapshots {} aborted", completedNoCleanup);
}
for (Snapshot snapshot : completedNoCleanup) {
failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, SnapshotsInProgress.ABORTED_FAILURE_TEXT));
}
if (newDelete == null) {
listener.onResponse(null);
} else {
addDeleteListener(newDelete.uuid(), listener);
if (reusedExistingDelete) {
return;
}
if (newDelete.state() == SnapshotDeletionsInProgress.State.STARTED) {
if (tryEnterRepoLoop(repoName)) {
deleteSnapshotsFromRepository(newDelete, repositoryData, newState.nodes().getMinNodeVersion());
} else {
logger.trace("Delete [{}] could not execute directly and was queued", newDelete);
}
} else {
for (SnapshotsInProgress.Entry completedSnapshot : completedWithCleanup) {
endSnapshot(completedSnapshot, newState.metadata(), repositoryData);
}
}
}
}
};
}
use of org.opensearch.cluster.RestoreInProgress in project OpenSearch by opensearch-project.
the class SharedClusterSnapshotRestoreIT method unrestorableUseCase.
/**
* Execute the unrestorable test use case *
*/
private void unrestorableUseCase(final String indexName, final Settings createIndexSettings, final Settings repositorySettings, final Settings restoreIndexSettings, final Consumer<UnassignedInfo> checkUnassignedInfo, final Runnable fixUpAction) throws Exception {
// create a test repository
final Path repositoryLocation = randomRepoPath();
createRepository("test-repo", "fs", repositoryLocation);
// create a test index
assertAcked(prepareCreate(indexName, Settings.builder().put(createIndexSettings)));
// index some documents
final int nbDocs = scaledRandomIntBetween(10, 100);
indexRandomDocs(indexName, nbDocs);
// create a snapshot
final NumShards numShards = getNumShards(indexName);
final SnapshotInfo snapshotInfo = createSnapshot("test-repo", "test-snap", Collections.singletonList(indexName));
assertThat(snapshotInfo.successfulShards(), equalTo(numShards.numPrimaries));
// delete the test index
assertAcked(client().admin().indices().prepareDelete(indexName));
// update the test repository
assertAcked(clusterAdmin().preparePutRepository("test-repo").setType("mock").setSettings(Settings.builder().put("location", repositoryLocation).put(repositorySettings).build()));
// attempt to restore the snapshot with the given settings
RestoreSnapshotResponse restoreResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap").setIndices(indexName).setIndexSettings(restoreIndexSettings).setWaitForCompletion(true).get();
// check that all shards failed during restore
assertThat(restoreResponse.getRestoreInfo().totalShards(), equalTo(numShards.numPrimaries));
assertThat(restoreResponse.getRestoreInfo().successfulShards(), equalTo(0));
ClusterStateResponse clusterStateResponse = clusterAdmin().prepareState().setCustoms(true).setRoutingTable(true).get();
// check that there is no restore in progress
RestoreInProgress restoreInProgress = clusterStateResponse.getState().custom(RestoreInProgress.TYPE);
assertNotNull("RestoreInProgress must be not null", restoreInProgress);
assertTrue("RestoreInProgress must be empty but found entries in " + restoreInProgress, restoreInProgress.isEmpty());
// check that the shards have been created but are not assigned
assertThat(clusterStateResponse.getState().getRoutingTable().allShards(indexName), hasSize(numShards.totalNumShards));
// check that every primary shard is unassigned
for (ShardRouting shard : clusterStateResponse.getState().getRoutingTable().allShards(indexName)) {
if (shard.primary()) {
assertThat(shard.state(), equalTo(ShardRoutingState.UNASSIGNED));
assertThat(shard.recoverySource().getType(), equalTo(RecoverySource.Type.SNAPSHOT));
assertThat(shard.unassignedInfo().getLastAllocationStatus(), equalTo(UnassignedInfo.AllocationStatus.DECIDERS_NO));
checkUnassignedInfo.accept(shard.unassignedInfo());
}
}
// update the test repository in order to make it work
createRepository("test-repo", "fs", repositoryLocation);
// execute action to eventually fix the situation
fixUpAction.run();
// delete the index and restore again
assertAcked(client().admin().indices().prepareDelete(indexName));
restoreResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true).get();
assertThat(restoreResponse.getRestoreInfo().totalShards(), equalTo(numShards.numPrimaries));
assertThat(restoreResponse.getRestoreInfo().successfulShards(), equalTo(numShards.numPrimaries));
// Wait for the shards to be assigned
ensureGreen(indexName);
refresh(indexName);
assertDocCount(indexName, nbDocs);
}
use of org.opensearch.cluster.RestoreInProgress in project OpenSearch by opensearch-project.
the class MetadataDeleteIndexService method deleteIndices.
/**
* Delete some indices from the cluster state.
*/
public ClusterState deleteIndices(ClusterState currentState, Set<Index> indices) {
final Metadata meta = currentState.metadata();
final Set<Index> indicesToDelete = new HashSet<>();
final Map<Index, DataStream> backingIndices = new HashMap<>();
for (Index index : indices) {
IndexMetadata im = meta.getIndexSafe(index);
IndexAbstraction.DataStream parent = meta.getIndicesLookup().get(im.getIndex().getName()).getParentDataStream();
if (parent != null) {
if (parent.getWriteIndex().equals(im)) {
throw new IllegalArgumentException("index [" + index.getName() + "] is the write index for data stream [" + parent.getName() + "] and cannot be deleted");
} else {
backingIndices.put(index, parent.getDataStream());
}
}
indicesToDelete.add(im.getIndex());
}
// Check if index deletion conflicts with any running snapshots
Set<Index> snapshottingIndices = SnapshotsService.snapshottingIndices(currentState, indicesToDelete);
if (snapshottingIndices.isEmpty() == false) {
throw new SnapshotInProgressException("Cannot delete indices that are being snapshotted: " + snapshottingIndices + ". Try again after snapshot finishes or cancel the currently running snapshot.");
}
RoutingTable.Builder routingTableBuilder = RoutingTable.builder(currentState.routingTable());
Metadata.Builder metadataBuilder = Metadata.builder(meta);
ClusterBlocks.Builder clusterBlocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks());
final IndexGraveyard.Builder graveyardBuilder = IndexGraveyard.builder(metadataBuilder.indexGraveyard());
final int previousGraveyardSize = graveyardBuilder.tombstones().size();
for (final Index index : indices) {
String indexName = index.getName();
logger.info("{} deleting index", index);
routingTableBuilder.remove(indexName);
clusterBlocksBuilder.removeIndexBlocks(indexName);
metadataBuilder.remove(indexName);
if (backingIndices.containsKey(index)) {
DataStream parent = metadataBuilder.dataStream(backingIndices.get(index).getName());
metadataBuilder.put(parent.removeBackingIndex(index));
}
}
// add tombstones to the cluster state for each deleted index
final IndexGraveyard currentGraveyard = graveyardBuilder.addTombstones(indices).build(settings);
// the new graveyard set on the metadata
metadataBuilder.indexGraveyard(currentGraveyard);
logger.trace("{} tombstones purged from the cluster state. Previous tombstone size: {}. Current tombstone size: {}.", graveyardBuilder.getNumPurged(), previousGraveyardSize, currentGraveyard.getTombstones().size());
Metadata newMetadata = metadataBuilder.build();
ClusterBlocks blocks = clusterBlocksBuilder.build();
// update snapshot restore entries
ImmutableOpenMap<String, ClusterState.Custom> customs = currentState.getCustoms();
final RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY);
RestoreInProgress updatedRestoreInProgress = RestoreService.updateRestoreStateWithDeletedIndices(restoreInProgress, indices);
if (updatedRestoreInProgress != restoreInProgress) {
ImmutableOpenMap.Builder<String, ClusterState.Custom> builder = ImmutableOpenMap.builder(customs);
builder.put(RestoreInProgress.TYPE, updatedRestoreInProgress);
customs = builder.build();
}
return allocationService.reroute(ClusterState.builder(currentState).routingTable(routingTableBuilder.build()).metadata(newMetadata).blocks(blocks).customs(customs).build(), "deleted indices [" + indices + "]");
}
Aggregations