Search in sources :

Example 1 with SnapshotDeletionsInProgress

use of org.opensearch.cluster.SnapshotDeletionsInProgress in project OpenSearch by opensearch-project.

the class TransportCleanupRepositoryAction method cleanupRepo.

/**
 * Runs cleanup operations on the given repository.
 * @param repositoryName Repository to clean up
 * @param listener Listener for cleanup result
 */
private void cleanupRepo(String repositoryName, ActionListener<RepositoryCleanupResult> listener) {
    final Repository repository = repositoriesService.repository(repositoryName);
    if (repository instanceof BlobStoreRepository == false) {
        listener.onFailure(new IllegalArgumentException("Repository [" + repositoryName + "] does not support repository cleanup"));
        return;
    }
    final BlobStoreRepository blobStoreRepository = (BlobStoreRepository) repository;
    final StepListener<RepositoryData> repositoryDataListener = new StepListener<>();
    repository.getRepositoryData(repositoryDataListener);
    repositoryDataListener.whenComplete(repositoryData -> {
        final long repositoryStateId = repositoryData.getGenId();
        logger.info("Running cleanup operations on repository [{}][{}]", repositoryName, repositoryStateId);
        clusterService.submitStateUpdateTask("cleanup repository [" + repositoryName + "][" + repositoryStateId + ']', new ClusterStateUpdateTask() {

            private boolean startedCleanup = false;

            @Override
            public ClusterState execute(ClusterState currentState) {
                final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY);
                if (repositoryCleanupInProgress.hasCleanupInProgress()) {
                    throw new IllegalStateException("Cannot cleanup [" + repositoryName + "] - a repository cleanup is already in-progress in [" + repositoryCleanupInProgress + "]");
                }
                final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY);
                if (deletionsInProgress.hasDeletionsInProgress()) {
                    throw new IllegalStateException("Cannot cleanup [" + repositoryName + "] - a snapshot is currently being deleted in [" + deletionsInProgress + "]");
                }
                SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
                if (snapshots.entries().isEmpty() == false) {
                    throw new IllegalStateException("Cannot cleanup [" + repositoryName + "] - a snapshot is currently running in [" + snapshots + "]");
                }
                return ClusterState.builder(currentState).putCustom(RepositoryCleanupInProgress.TYPE, new RepositoryCleanupInProgress(Collections.singletonList(RepositoryCleanupInProgress.startedEntry(repositoryName, repositoryStateId)))).build();
            }

            @Override
            public void onFailure(String source, Exception e) {
                after(e, null);
            }

            @Override
            public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                startedCleanup = true;
                logger.debug("Initialized repository cleanup in cluster state for [{}][{}]", repositoryName, repositoryStateId);
                threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> blobStoreRepository.cleanup(repositoryStateId, snapshotsService.minCompatibleVersion(newState.nodes().getMinNodeVersion(), repositoryData, null), ActionListener.wrap(result -> after(null, result), e -> after(e, null)))));
            }

            private void after(@Nullable Exception failure, @Nullable RepositoryCleanupResult result) {
                if (failure == null) {
                    logger.debug("Finished repository cleanup operations on [{}][{}]", repositoryName, repositoryStateId);
                } else {
                    logger.debug(() -> new ParameterizedMessage("Failed to finish repository cleanup operations on [{}][{}]", repositoryName, repositoryStateId), failure);
                }
                assert failure != null || result != null;
                if (startedCleanup == false) {
                    logger.debug("No cleanup task to remove from cluster state because we failed to start one", failure);
                    listener.onFailure(failure);
                    return;
                }
                clusterService.submitStateUpdateTask("remove repository cleanup task [" + repositoryName + "][" + repositoryStateId + ']', new ClusterStateUpdateTask() {

                    @Override
                    public ClusterState execute(ClusterState currentState) {
                        return removeInProgressCleanup(currentState);
                    }

                    @Override
                    public void onFailure(String source, Exception e) {
                        if (failure != null) {
                            e.addSuppressed(failure);
                        }
                        logger.warn(() -> new ParameterizedMessage("[{}] failed to remove repository cleanup task", repositoryName), e);
                        listener.onFailure(e);
                    }

                    @Override
                    public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                        if (failure == null) {
                            logger.info("Done with repository cleanup on [{}][{}] with result [{}]", repositoryName, repositoryStateId, result);
                            listener.onResponse(result);
                        } else {
                            logger.warn(() -> new ParameterizedMessage("Failed to run repository cleanup operations on [{}][{}]", repositoryName, repositoryStateId), failure);
                            listener.onFailure(failure);
                        }
                    }
                });
            }
        });
    }, listener::onFailure);
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) RepositoryCleanupInProgress(org.opensearch.cluster.RepositoryCleanupInProgress) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) IOException(java.io.IOException) RepositoryData(org.opensearch.repositories.RepositoryData) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress) RepositoryCleanupResult(org.opensearch.repositories.RepositoryCleanupResult) BlobStoreRepository(org.opensearch.repositories.blobstore.BlobStoreRepository) Repository(org.opensearch.repositories.Repository) BlobStoreRepository(org.opensearch.repositories.blobstore.BlobStoreRepository) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) StepListener(org.opensearch.action.StepListener) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 2 with SnapshotDeletionsInProgress

use of org.opensearch.cluster.SnapshotDeletionsInProgress in project OpenSearch by opensearch-project.

the class ConcurrentSnapshotsIT method testMasterFailOverWithQueuedDeletes.

public void testMasterFailOverWithQueuedDeletes() throws Exception {
    internalCluster().startMasterOnlyNodes(3);
    final String dataNode = internalCluster().startDataOnlyNode();
    final String repoName = "test-repo";
    createRepository(repoName, "mock");
    final String firstIndex = "index-one";
    createIndexWithContent(firstIndex);
    final String firstSnapshot = "snapshot-one";
    blockDataNode(repoName, dataNode);
    final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromNonMasterClient(repoName, firstSnapshot);
    waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
    final String dataNode2 = internalCluster().startDataOnlyNode();
    ensureStableCluster(5);
    final String secondIndex = "index-two";
    createIndexWithContent(secondIndex, dataNode2, dataNode);
    final String secondSnapshot = "snapshot-two";
    final ActionFuture<CreateSnapshotResponse> secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot);
    logger.info("--> wait for snapshot on second data node to finish");
    awaitClusterState(state -> {
        final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
        return snapshotsInProgress.entries().size() == 2 && snapshotHasCompletedShard(secondSnapshot, snapshotsInProgress);
    });
    final ActionFuture<AcknowledgedResponse> firstDeleteFuture = startDeleteFromNonMasterClient(repoName, firstSnapshot);
    awaitNDeletionsInProgress(1);
    blockNodeOnAnyFiles(repoName, dataNode2);
    final ActionFuture<CreateSnapshotResponse> snapshotThreeFuture = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
    waitForBlock(dataNode2, repoName, TimeValue.timeValueSeconds(30L));
    assertThat(firstSnapshotResponse.isDone(), is(false));
    assertThat(secondSnapshotResponse.isDone(), is(false));
    logger.info("--> waiting for all three snapshots to show up as in-progress");
    assertBusy(() -> assertThat(currentSnapshots(repoName), hasSize(3)), 30L, TimeUnit.SECONDS);
    final ActionFuture<AcknowledgedResponse> deleteAllSnapshots = startDeleteFromNonMasterClient(repoName, "*");
    logger.info("--> wait for delete to be enqueued in cluster state");
    awaitClusterState(state -> {
        final SnapshotDeletionsInProgress deletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
        return deletionsInProgress.getEntries().size() == 1 && deletionsInProgress.getEntries().get(0).getSnapshots().size() == 3;
    });
    logger.info("--> waiting for second snapshot to finish and the other two snapshots to become aborted");
    assertBusy(() -> {
        assertThat(currentSnapshots(repoName), hasSize(2));
        for (SnapshotsInProgress.Entry entry : clusterService().state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
            assertThat(entry.state(), is(SnapshotsInProgress.State.ABORTED));
            assertThat(entry.snapshot().getSnapshotId().getName(), not(secondSnapshot));
        }
    }, 30L, TimeUnit.SECONDS);
    logger.info("--> stopping current master node");
    internalCluster().stopCurrentMasterNode();
    unblockNode(repoName, dataNode);
    unblockNode(repoName, dataNode2);
    for (ActionFuture<AcknowledgedResponse> deleteFuture : Arrays.asList(firstDeleteFuture, deleteAllSnapshots)) {
        try {
            assertAcked(deleteFuture.actionGet());
        } catch (RepositoryException rex) {
            // rarely the master node fails over twice when shutting down the initial master and fails the transport listener
            assertThat(rex.repository(), is("_all"));
            assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
        } catch (SnapshotMissingException sme) {
            // very rarely a master node fail-over happens at such a time that the client on the data-node sees a disconnect exception
            // after the master has already started the delete, leading to the delete retry to run into a situation where the
            // snapshot has already been deleted potentially
            assertThat(sme.getSnapshotName(), is(firstSnapshot));
        }
    }
    expectThrows(SnapshotException.class, snapshotThreeFuture::actionGet);
    logger.info("--> verify that all snapshots are gone and no more work is left in the cluster state");
    assertBusy(() -> {
        assertThat(client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(), empty());
        final ClusterState state = clusterService().state();
        final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE);
        assertThat(snapshotsInProgress.entries(), empty());
        final SnapshotDeletionsInProgress snapshotDeletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
        assertThat(snapshotDeletionsInProgress.getEntries(), empty());
    }, 30L, TimeUnit.SECONDS);
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) CreateSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) RepositoryException(org.opensearch.repositories.RepositoryException) Matchers.containsString(org.hamcrest.Matchers.containsString) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress)

Example 3 with SnapshotDeletionsInProgress

use of org.opensearch.cluster.SnapshotDeletionsInProgress in project OpenSearch by opensearch-project.

the class SnapshotsService method shards.

/**
 * Calculates the assignment of shards to data nodes for a new snapshot based on the given cluster state and the
 * indices that should be included in the snapshot.
 *
 * @param indices             Indices to snapshot
 * @param useShardGenerations whether to write {@link ShardGenerations} during the snapshot
 * @return list of shard to be included into current snapshot
 */
private static ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(SnapshotsInProgress snapshotsInProgress, @Nullable SnapshotDeletionsInProgress deletionsInProgress, Metadata metadata, RoutingTable routingTable, List<IndexId> indices, boolean useShardGenerations, RepositoryData repositoryData, String repoName) {
    ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
    final ShardGenerations shardGenerations = repositoryData.shardGenerations();
    final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
    final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream().noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED);
    for (IndexId index : indices) {
        final String indexName = index.getName();
        final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
        IndexMetadata indexMetadata = metadata.index(indexName);
        if (indexMetadata == null) {
            // The index was deleted before we managed to start the snapshot - mark it as missing.
            builder.put(new ShardId(indexName, IndexMetadata.INDEX_UUID_NA_VALUE, 0), ShardSnapshotStatus.MISSING);
        } else {
            final IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
            for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
                final ShardId shardId = indexRoutingTable.shard(i).shardId();
                final String shardRepoGeneration;
                if (useShardGenerations) {
                    final String inFlightGeneration = inFlightShardStates.generationForShard(index, shardId.id(), shardGenerations);
                    if (inFlightGeneration == null && isNewIndex) {
                        assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index [" + index + "]";
                        shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
                    } else {
                        shardRepoGeneration = inFlightGeneration;
                    }
                } else {
                    shardRepoGeneration = null;
                }
                final ShardSnapshotStatus shardSnapshotStatus;
                if (indexRoutingTable == null) {
                    shardSnapshotStatus = new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "missing routing table", shardRepoGeneration);
                } else {
                    ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
                    if (readyToExecute == false || inFlightShardStates.isActive(indexName, i)) {
                        shardSnapshotStatus = ShardSnapshotStatus.UNASSIGNED_QUEUED;
                    } else if (primary == null || !primary.assignedToNode()) {
                        shardSnapshotStatus = new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardRepoGeneration);
                    } else if (primary.relocating() || primary.initializing()) {
                        shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardRepoGeneration);
                    } else if (!primary.started()) {
                        shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, "primary shard hasn't been started yet", shardRepoGeneration);
                    } else {
                        shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardRepoGeneration);
                    }
                }
                builder.put(shardId, shardSnapshotStatus);
            }
        }
    }
    return builder.build();
}
Also used : RepositoryMissingException(org.opensearch.repositories.RepositoryMissingException) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) Arrays(java.util.Arrays) Metadata(org.opensearch.cluster.metadata.Metadata) Collections.unmodifiableList(java.util.Collections.unmodifiableList) DataStream(org.opensearch.cluster.metadata.DataStream) Version(org.opensearch.Version) ClusterStateApplier(org.opensearch.cluster.ClusterStateApplier) Regex(org.opensearch.common.regex.Regex) Strings(org.opensearch.common.Strings) GroupedActionListener(org.opensearch.action.support.GroupedActionListener) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) EnumSet(java.util.EnumSet) Repository(org.opensearch.repositories.Repository) TimeValue(org.opensearch.common.unit.TimeValue) Index(org.opensearch.index.Index) ExceptionsHelper(org.opensearch.ExceptionsHelper) Set(java.util.Set) ClusterStateTaskExecutor(org.opensearch.cluster.ClusterStateTaskExecutor) Settings(org.opensearch.common.settings.Settings) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) TransportService(org.opensearch.transport.TransportService) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ActionFilters(org.opensearch.action.support.ActionFilters) AbstractLifecycleComponent(org.opensearch.common.component.AbstractLifecycleComponent) ShardState(org.opensearch.cluster.SnapshotsInProgress.ShardState) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) StepListener(org.opensearch.action.StepListener) State(org.opensearch.cluster.SnapshotsInProgress.State) IndexNameExpressionResolver(org.opensearch.cluster.metadata.IndexNameExpressionResolver) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) RepositoriesService(org.opensearch.repositories.RepositoriesService) ThreadPool(org.opensearch.threadpool.ThreadPool) Priority(org.opensearch.common.Priority) TransportMasterNodeAction(org.opensearch.action.support.master.TransportMasterNodeAction) ArrayList(java.util.ArrayList) ClusterState(org.opensearch.cluster.ClusterState) LegacyESVersion(org.opensearch.LegacyESVersion) ClusterStateTaskConfig(org.opensearch.cluster.ClusterStateTaskConfig) RepositoryCleanupInProgress(org.opensearch.cluster.RepositoryCleanupInProgress) RepositoriesMetadata(org.opensearch.cluster.metadata.RepositoriesMetadata) Executor(java.util.concurrent.Executor) IOException(java.io.IOException) DeleteSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest) ClusterService(org.opensearch.cluster.service.ClusterService) RestoreInProgress(org.opensearch.cluster.RestoreInProgress) RoutingTable(org.opensearch.cluster.routing.RoutingTable) SnapshotsInProgress.completed(org.opensearch.cluster.SnapshotsInProgress.completed) ClusterChangedEvent(org.opensearch.cluster.ClusterChangedEvent) ShardGenerations(org.opensearch.repositories.ShardGenerations) AbstractRunnable(org.opensearch.common.util.concurrent.AbstractRunnable) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexId(org.opensearch.repositories.IndexId) Locale(java.util.Locale) NotMasterException(org.opensearch.cluster.NotMasterException) ShardSnapshotStatus(org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) RepositoryException(org.opensearch.repositories.RepositoryException) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) Collectors(java.util.stream.Collectors) Nullable(org.opensearch.common.Nullable) Tuple(org.opensearch.common.collect.Tuple) Objects(java.util.Objects) List(java.util.List) Optional(java.util.Optional) ClusterStateTaskListener(org.opensearch.cluster.ClusterStateTaskListener) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ActionRunnable(org.opensearch.action.ActionRunnable) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) CloneSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest) HashMap(java.util.HashMap) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress) Deque(java.util.Deque) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Function(java.util.function.Function) HashSet(java.util.HashSet) IndexRoutingTable(org.opensearch.cluster.routing.IndexRoutingTable) UUIDs(org.opensearch.common.UUIDs) LinkedList(java.util.LinkedList) StreamInput(org.opensearch.common.io.stream.StreamInput) RepositoryData(org.opensearch.repositories.RepositoryData) Setting(org.opensearch.common.settings.Setting) Iterator(java.util.Iterator) Collections.emptySet(java.util.Collections.emptySet) RepositoryShardId(org.opensearch.repositories.RepositoryShardId) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) CreateSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) IndexId(org.opensearch.repositories.IndexId) IndexRoutingTable(org.opensearch.cluster.routing.IndexRoutingTable) ShardSnapshotStatus(org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) ShardGenerations(org.opensearch.repositories.ShardGenerations) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) RepositoryShardId(org.opensearch.repositories.RepositoryShardId) ShardId(org.opensearch.index.shard.ShardId) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 4 with SnapshotDeletionsInProgress

use of org.opensearch.cluster.SnapshotDeletionsInProgress in project OpenSearch by opensearch-project.

the class SnapshotsService method createDeleteStateUpdate.

private ClusterStateUpdateTask createDeleteStateUpdate(List<SnapshotId> snapshotIds, String repoName, RepositoryData repositoryData, Priority priority, ActionListener<Void> listener) {
    // Short circuit to noop state update if there isn't anything to delete
    if (snapshotIds.isEmpty()) {
        return new ClusterStateUpdateTask() {

            @Override
            public ClusterState execute(ClusterState currentState) {
                return currentState;
            }

            @Override
            public void onFailure(String source, Exception e) {
                listener.onFailure(e);
            }

            @Override
            public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                listener.onResponse(null);
            }
        };
    }
    return new ClusterStateUpdateTask(priority) {

        private SnapshotDeletionsInProgress.Entry newDelete;

        private boolean reusedExistingDelete = false;

        // Snapshots that had all of their shard snapshots in queued state and thus were removed from the
        // cluster state right away
        private final Collection<Snapshot> completedNoCleanup = new ArrayList<>();

        // Snapshots that were aborted and that already wrote data to the repository and now have to be deleted
        // from the repository after the cluster state update
        private final Collection<SnapshotsInProgress.Entry> completedWithCleanup = new ArrayList<>();

        @Override
        public ClusterState execute(ClusterState currentState) {
            final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY);
            final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
            if (minNodeVersion.before(FULL_CONCURRENCY_VERSION)) {
                if (deletionsInProgress.hasDeletionsInProgress()) {
                    throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete - another snapshot is currently being deleted in [" + deletionsInProgress + "]");
                }
            }
            final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY);
            if (repositoryCleanupInProgress.hasCleanupInProgress()) {
                throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete snapshots while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]");
            }
            final RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY);
            for (RestoreInProgress.Entry entry : restoreInProgress) {
                if (repoName.equals(entry.snapshot().getRepository()) && snapshotIds.contains(entry.snapshot().getSnapshotId())) {
                    throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotIds.get(0)), "cannot delete snapshot during a restore in progress in [" + restoreInProgress + "]");
                }
            }
            final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
            final Set<SnapshotId> activeCloneSources = snapshots.entries().stream().filter(SnapshotsInProgress.Entry::isClone).map(SnapshotsInProgress.Entry::source).collect(Collectors.toSet());
            for (SnapshotId snapshotId : snapshotIds) {
                if (activeCloneSources.contains(snapshotId)) {
                    throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotId), "cannot delete snapshot while it is being cloned");
                }
            }
            // Snapshot ids that will have to be physically deleted from the repository
            final Set<SnapshotId> snapshotIdsRequiringCleanup = new HashSet<>(snapshotIds);
            final SnapshotsInProgress updatedSnapshots;
            if (minNodeVersion.onOrAfter(FULL_CONCURRENCY_VERSION)) {
                updatedSnapshots = SnapshotsInProgress.of(snapshots.entries().stream().map(existing -> {
                    if (existing.state() == State.STARTED && snapshotIdsRequiringCleanup.contains(existing.snapshot().getSnapshotId())) {
                        // snapshot is started - mark every non completed shard as aborted
                        final SnapshotsInProgress.Entry abortedEntry = existing.abort();
                        if (abortedEntry == null) {
                            // No work has been done for this snapshot yet so we remove it from the cluster state directly
                            final Snapshot existingNotYetStartedSnapshot = existing.snapshot();
                            // any leaked listener assertions
                            if (endingSnapshots.add(existingNotYetStartedSnapshot)) {
                                completedNoCleanup.add(existingNotYetStartedSnapshot);
                            }
                            snapshotIdsRequiringCleanup.remove(existingNotYetStartedSnapshot.getSnapshotId());
                        } else if (abortedEntry.state().completed()) {
                            completedWithCleanup.add(abortedEntry);
                        }
                        return abortedEntry;
                    }
                    return existing;
                }).filter(Objects::nonNull).collect(Collectors.toList()));
                if (snapshotIdsRequiringCleanup.isEmpty()) {
                    // We only saw snapshots that could be removed from the cluster state right away, no need to update the deletions
                    return updateWithSnapshots(currentState, updatedSnapshots, null);
                }
            } else {
                if (snapshots.entries().isEmpty() == false) {
                    // However other snapshots are running - cannot continue
                    throw new ConcurrentSnapshotExecutionException(repoName, snapshotIds.toString(), "another snapshot is currently running cannot delete");
                }
                updatedSnapshots = snapshots;
            }
            // add the snapshot deletion to the cluster state
            final SnapshotDeletionsInProgress.Entry replacedEntry = deletionsInProgress.getEntries().stream().filter(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.WAITING).findFirst().orElse(null);
            if (replacedEntry == null) {
                final Optional<SnapshotDeletionsInProgress.Entry> foundDuplicate = deletionsInProgress.getEntries().stream().filter(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED && entry.getSnapshots().containsAll(snapshotIds)).findFirst();
                if (foundDuplicate.isPresent()) {
                    newDelete = foundDuplicate.get();
                    reusedExistingDelete = true;
                    return currentState;
                }
                final List<SnapshotId> toDelete = Collections.unmodifiableList(new ArrayList<>(snapshotIdsRequiringCleanup));
                ensureBelowConcurrencyLimit(repoName, toDelete.get(0).getName(), snapshots, deletionsInProgress);
                newDelete = new SnapshotDeletionsInProgress.Entry(toDelete, repoName, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), updatedSnapshots.entries().stream().filter(entry -> repoName.equals(entry.repository())).noneMatch(SnapshotsService::isWritingToRepository) && deletionsInProgress.getEntries().stream().noneMatch(entry -> repoName.equals(entry.repository()) && entry.state() == SnapshotDeletionsInProgress.State.STARTED) ? SnapshotDeletionsInProgress.State.STARTED : SnapshotDeletionsInProgress.State.WAITING);
            } else {
                newDelete = replacedEntry.withAddedSnapshots(snapshotIdsRequiringCleanup);
            }
            return updateWithSnapshots(currentState, updatedSnapshots, (replacedEntry == null ? deletionsInProgress : deletionsInProgress.withRemovedEntry(replacedEntry.uuid())).withAddedEntry(newDelete));
        }

        @Override
        public void onFailure(String source, Exception e) {
            endingSnapshots.removeAll(completedNoCleanup);
            listener.onFailure(e);
        }

        @Override
        public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
            if (completedNoCleanup.isEmpty() == false) {
                logger.info("snapshots {} aborted", completedNoCleanup);
            }
            for (Snapshot snapshot : completedNoCleanup) {
                failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, SnapshotsInProgress.ABORTED_FAILURE_TEXT));
            }
            if (newDelete == null) {
                listener.onResponse(null);
            } else {
                addDeleteListener(newDelete.uuid(), listener);
                if (reusedExistingDelete) {
                    return;
                }
                if (newDelete.state() == SnapshotDeletionsInProgress.State.STARTED) {
                    if (tryEnterRepoLoop(repoName)) {
                        deleteSnapshotsFromRepository(newDelete, repositoryData, newState.nodes().getMinNodeVersion());
                    } else {
                        logger.trace("Delete [{}] could not execute directly and was queued", newDelete);
                    }
                } else {
                    for (SnapshotsInProgress.Entry completedSnapshot : completedWithCleanup) {
                        endSnapshot(completedSnapshot, newState.metadata(), repositoryData);
                    }
                }
            }
        }
    };
}
Also used : RepositoryMissingException(org.opensearch.repositories.RepositoryMissingException) ImmutableOpenMap(org.opensearch.common.collect.ImmutableOpenMap) Arrays(java.util.Arrays) Metadata(org.opensearch.cluster.metadata.Metadata) Collections.unmodifiableList(java.util.Collections.unmodifiableList) DataStream(org.opensearch.cluster.metadata.DataStream) Version(org.opensearch.Version) ClusterStateApplier(org.opensearch.cluster.ClusterStateApplier) Regex(org.opensearch.common.regex.Regex) Strings(org.opensearch.common.Strings) GroupedActionListener(org.opensearch.action.support.GroupedActionListener) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) EnumSet(java.util.EnumSet) Repository(org.opensearch.repositories.Repository) TimeValue(org.opensearch.common.unit.TimeValue) Index(org.opensearch.index.Index) ExceptionsHelper(org.opensearch.ExceptionsHelper) Set(java.util.Set) ClusterStateTaskExecutor(org.opensearch.cluster.ClusterStateTaskExecutor) Settings(org.opensearch.common.settings.Settings) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) TransportService(org.opensearch.transport.TransportService) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) ActionFilters(org.opensearch.action.support.ActionFilters) AbstractLifecycleComponent(org.opensearch.common.component.AbstractLifecycleComponent) ShardState(org.opensearch.cluster.SnapshotsInProgress.ShardState) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) StepListener(org.opensearch.action.StepListener) State(org.opensearch.cluster.SnapshotsInProgress.State) IndexNameExpressionResolver(org.opensearch.cluster.metadata.IndexNameExpressionResolver) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) RepositoriesService(org.opensearch.repositories.RepositoriesService) ThreadPool(org.opensearch.threadpool.ThreadPool) Priority(org.opensearch.common.Priority) TransportMasterNodeAction(org.opensearch.action.support.master.TransportMasterNodeAction) ArrayList(java.util.ArrayList) ClusterState(org.opensearch.cluster.ClusterState) LegacyESVersion(org.opensearch.LegacyESVersion) ClusterStateTaskConfig(org.opensearch.cluster.ClusterStateTaskConfig) RepositoryCleanupInProgress(org.opensearch.cluster.RepositoryCleanupInProgress) RepositoriesMetadata(org.opensearch.cluster.metadata.RepositoriesMetadata) Executor(java.util.concurrent.Executor) IOException(java.io.IOException) DeleteSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest) ClusterService(org.opensearch.cluster.service.ClusterService) RestoreInProgress(org.opensearch.cluster.RestoreInProgress) RoutingTable(org.opensearch.cluster.routing.RoutingTable) SnapshotsInProgress.completed(org.opensearch.cluster.SnapshotsInProgress.completed) ClusterChangedEvent(org.opensearch.cluster.ClusterChangedEvent) ShardGenerations(org.opensearch.repositories.ShardGenerations) AbstractRunnable(org.opensearch.common.util.concurrent.AbstractRunnable) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexId(org.opensearch.repositories.IndexId) Locale(java.util.Locale) NotMasterException(org.opensearch.cluster.NotMasterException) ShardSnapshotStatus(org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) RepositoryException(org.opensearch.repositories.RepositoryException) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) Collectors(java.util.stream.Collectors) Nullable(org.opensearch.common.Nullable) Tuple(org.opensearch.common.collect.Tuple) Objects(java.util.Objects) List(java.util.List) Optional(java.util.Optional) ClusterStateTaskListener(org.opensearch.cluster.ClusterStateTaskListener) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ActionRunnable(org.opensearch.action.ActionRunnable) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) CloneSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest) HashMap(java.util.HashMap) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress) Deque(java.util.Deque) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Function(java.util.function.Function) HashSet(java.util.HashSet) IndexRoutingTable(org.opensearch.cluster.routing.IndexRoutingTable) UUIDs(org.opensearch.common.UUIDs) LinkedList(java.util.LinkedList) StreamInput(org.opensearch.common.io.stream.StreamInput) RepositoryData(org.opensearch.repositories.RepositoryData) Setting(org.opensearch.common.settings.Setting) Iterator(java.util.Iterator) Collections.emptySet(java.util.Collections.emptySet) RepositoryShardId(org.opensearch.repositories.RepositoryShardId) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) CreateSnapshotRequest(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) RepositoryCleanupInProgress(org.opensearch.cluster.RepositoryCleanupInProgress) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress) Version(org.opensearch.Version) LegacyESVersion(org.opensearch.LegacyESVersion) HashSet(java.util.HashSet) ClusterState(org.opensearch.cluster.ClusterState) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) RepositoryMissingException(org.opensearch.repositories.RepositoryMissingException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) IOException(java.io.IOException) NotMasterException(org.opensearch.cluster.NotMasterException) RepositoryException(org.opensearch.repositories.RepositoryException) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) RestoreInProgress(org.opensearch.cluster.RestoreInProgress) Collection(java.util.Collection) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress)

Example 5 with SnapshotDeletionsInProgress

use of org.opensearch.cluster.SnapshotDeletionsInProgress in project OpenSearch by opensearch-project.

the class SnapshotsService method createSnapshotLegacy.

/**
 * Initializes the snapshotting process.
 * <p>
 * This method is used by clients to start snapshot. It makes sure that there is no snapshots are currently running and
 * creates a snapshot record in cluster state metadata.
 * Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
 * compatible path for initializing the snapshot in the repository is executed.
 *
 * @param request  snapshot request
 * @param listener snapshot creation listener
 */
public void createSnapshotLegacy(final CreateSnapshotRequest request, final ActionListener<Snapshot> listener) {
    final String repositoryName = request.repository();
    final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot());
    validate(repositoryName, snapshotName);
    // new UUID for the snapshot
    final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID());
    Repository repository = repositoriesService.repository(request.repository());
    final Map<String, Object> userMeta = repository.adaptUserMetadata(request.userMetadata());
    clusterService.submitStateUpdateTask("create_snapshot [" + snapshotName + ']', new ClusterStateUpdateTask() {

        private List<String> indices;

        private SnapshotsInProgress.Entry newEntry;

        @Override
        public ClusterState execute(ClusterState currentState) {
            validate(repositoryName, snapshotName, currentState);
            SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE);
            if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) {
                throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, "cannot snapshot while a snapshot deletion is in-progress in [" + deletionsInProgress + "]");
            }
            final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(RepositoryCleanupInProgress.TYPE);
            if (repositoryCleanupInProgress != null && repositoryCleanupInProgress.hasCleanupInProgress()) {
                throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]");
            }
            SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
            // cluster state anyway in #applyClusterState.
            if (snapshots != null && snapshots.entries().stream().anyMatch(entry -> (entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false) == false)) {
                throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running");
            }
            // Store newSnapshot here to be processed in clusterStateProcessed
            indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request));
            final List<String> dataStreams = indexNameExpressionResolver.dataStreamNames(currentState, request.indicesOptions(), request.indices());
            logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices);
            newEntry = new SnapshotsInProgress.Entry(new Snapshot(repositoryName, snapshotId), request.includeGlobalState(), request.partial(), State.INIT, // We'll resolve the list of indices when moving to the STARTED state in #beginSnapshot
            Collections.emptyList(), dataStreams, threadPool.absoluteTimeInMillis(), RepositoryData.UNKNOWN_REPO_GEN, ImmutableOpenMap.of(), userMeta, Version.CURRENT);
            initializingSnapshots.add(newEntry.snapshot());
            snapshots = SnapshotsInProgress.of(Collections.singletonList(newEntry));
            return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build();
        }

        @Override
        public void onFailure(String source, Exception e) {
            logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e);
            if (newEntry != null) {
                initializingSnapshots.remove(newEntry.snapshot());
            }
            newEntry = null;
            listener.onFailure(e);
        }

        @Override
        public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
            if (newEntry != null) {
                final Snapshot current = newEntry.snapshot();
                assert initializingSnapshots.contains(current);
                assert indices != null;
                beginSnapshot(newState, newEntry, request.partial(), indices, repository, new ActionListener<Snapshot>() {

                    @Override
                    public void onResponse(final Snapshot snapshot) {
                        initializingSnapshots.remove(snapshot);
                        listener.onResponse(snapshot);
                    }

                    @Override
                    public void onFailure(final Exception e) {
                        initializingSnapshots.remove(current);
                        listener.onFailure(e);
                    }
                });
            }
        }

        @Override
        public TimeValue timeout() {
            return request.masterNodeTimeout();
        }
    });
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) RepositoryCleanupInProgress(org.opensearch.cluster.RepositoryCleanupInProgress) RepositoryMissingException(org.opensearch.repositories.RepositoryMissingException) FailedToCommitClusterStateException(org.opensearch.cluster.coordination.FailedToCommitClusterStateException) IOException(java.io.IOException) NotMasterException(org.opensearch.cluster.NotMasterException) RepositoryException(org.opensearch.repositories.RepositoryException) ClusterBlockException(org.opensearch.cluster.block.ClusterBlockException) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress) Repository(org.opensearch.repositories.Repository) GroupedActionListener(org.opensearch.action.support.GroupedActionListener) ActionListener(org.opensearch.action.ActionListener) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) Collections.unmodifiableList(java.util.Collections.unmodifiableList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) TimeValue(org.opensearch.common.unit.TimeValue)

Aggregations

SnapshotDeletionsInProgress (org.opensearch.cluster.SnapshotDeletionsInProgress)15 SnapshotsInProgress (org.opensearch.cluster.SnapshotsInProgress)13 ClusterState (org.opensearch.cluster.ClusterState)10 ArrayList (java.util.ArrayList)9 HashSet (java.util.HashSet)9 Repository (org.opensearch.repositories.Repository)9 IOException (java.io.IOException)8 List (java.util.List)8 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)8 ActionListener (org.opensearch.action.ActionListener)8 Set (java.util.Set)7 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)7 Version (org.opensearch.Version)7 StepListener (org.opensearch.action.StepListener)7 RepositoryData (org.opensearch.repositories.RepositoryData)7 ObjectObjectCursor (com.carrotsearch.hppc.cursors.ObjectObjectCursor)6 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 Collections.emptySet (java.util.Collections.emptySet)6 HashMap (java.util.HashMap)6