use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testSnapshotPrimaryRelocations.
/**
* Simulates concurrent restarts of data and master nodes as well as relocating a primary shard, while starting and subsequently
* deleting a snapshot.
*/
public void testSnapshotPrimaryRelocations() {
final int masterNodeCount = randomFrom(1, 3, 5);
setupTestCluster(masterNodeCount, randomIntBetween(2, 5));
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
final int shards = randomIntBetween(1, 5);
final TestClusterNodes.TestClusterNode masterNode = testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
final AtomicBoolean createdSnapshot = new AtomicBoolean();
final AdminClient masterAdminClient = masterNode.client.admin();
final StepListener<ClusterStateResponse> clusterStateResponseStepListener = new StepListener<>();
continueOrDie(createRepoAndIndex(repoName, index, shards), createIndexResponse -> client().admin().cluster().state(new ClusterStateRequest(), clusterStateResponseStepListener));
continueOrDie(clusterStateResponseStepListener, clusterStateResponse -> {
final ShardRouting shardToRelocate = clusterStateResponse.getState().routingTable().allShards(index).get(0);
final TestClusterNodes.TestClusterNode currentPrimaryNode = testClusterNodes.nodeById(shardToRelocate.currentNodeId());
final TestClusterNodes.TestClusterNode otherNode = testClusterNodes.randomDataNodeSafe(currentPrimaryNode.node.getName());
scheduleNow(() -> testClusterNodes.stopNode(currentPrimaryNode));
scheduleNow(new Runnable() {
@Override
public void run() {
final StepListener<ClusterStateResponse> updatedClusterStateResponseStepListener = new StepListener<>();
masterAdminClient.cluster().state(new ClusterStateRequest(), updatedClusterStateResponseStepListener);
continueOrDie(updatedClusterStateResponseStepListener, updatedClusterState -> {
final ShardRouting shardRouting = updatedClusterState.getState().routingTable().shardRoutingTable(shardToRelocate.shardId()).primaryShard();
if (shardRouting.unassigned() && shardRouting.unassignedInfo().getReason() == UnassignedInfo.Reason.NODE_LEFT) {
if (masterNodeCount > 1) {
scheduleNow(() -> testClusterNodes.stopNode(masterNode));
}
testClusterNodes.randomDataNodeSafe().client.admin().cluster().prepareCreateSnapshot(repoName, snapshotName).execute(ActionListener.wrap(() -> {
createdSnapshot.set(true);
testClusterNodes.randomDataNodeSafe().client.admin().cluster().deleteSnapshot(new DeleteSnapshotRequest(repoName, snapshotName), noopListener());
}));
scheduleNow(() -> testClusterNodes.randomMasterNodeSafe().client.admin().cluster().reroute(new ClusterRerouteRequest().add(new AllocateEmptyPrimaryAllocationCommand(index, shardRouting.shardId().id(), otherNode.node.getName(), true)), noopListener()));
} else {
scheduleSoon(this);
}
});
}
});
});
runUntil(() -> testClusterNodes.randomMasterNode().map(master -> {
if (createdSnapshot.get() == false) {
return false;
}
return master.clusterService.state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries().isEmpty();
}).orElse(false), TimeUnit.MINUTES.toMillis(1L));
clearDisruptionsAndAwaitSync();
assertTrue(createdSnapshot.get());
assertThat(testClusterNodes.randomDataNodeSafe().clusterService.state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries(), empty());
final Repository repository = testClusterNodes.randomMasterNodeSafe().repositoriesService.repository(repoName);
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
assertThat(snapshotIds, either(hasSize(1)).or(hasSize(0)));
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotsService method beginSnapshot.
/**
* Starts snapshot.
* <p>
* Creates snapshot in repository and updates snapshot metadata record with list of shards that needs to be processed.
* Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
* compatible path for initializing the snapshot in the repository is executed.
*
* @param clusterState cluster state
* @param snapshot snapshot meta data
* @param partial allow partial snapshots
* @param userCreateSnapshotListener listener
*/
private void beginSnapshot(final ClusterState clusterState, final SnapshotsInProgress.Entry snapshot, final boolean partial, final List<String> indices, final Repository repository, final ActionListener<Snapshot> userCreateSnapshotListener) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() {
boolean hadAbortedInitializations;
@Override
protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot());
if (repository.isReadOnly()) {
throw new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository");
}
final String snapshotName = snapshot.snapshot().getSnapshotId().getName();
final StepListener<RepositoryData> repositoryDataListener = new StepListener<>();
repository.getRepositoryData(repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists");
}
if (clusterState.nodes().getMinNodeVersion().onOrAfter(NO_REPO_INITIALIZE_VERSION) == false) {
// In mixed version clusters we initialize the snapshot in the repository so that in case of a master failover to an
// older version master node snapshot finalization (that assumes initializeSnapshot was called) produces a valid
// snapshot.
repository.initializeSnapshot(snapshot.snapshot().getSnapshotId(), snapshot.indices(), metadataForSnapshot(snapshot, clusterState.metadata()));
}
logger.info("snapshot [{}] started", snapshot.snapshot());
final Version version = minCompatibleVersion(clusterState.nodes().getMinNodeVersion(), repositoryData, null);
if (indices.isEmpty()) {
// No indices in this snapshot - we are done
userCreateSnapshotListener.onResponse(snapshot.snapshot());
endSnapshot(SnapshotsInProgress.startedEntry(snapshot.snapshot(), snapshot.includeGlobalState(), snapshot.partial(), Collections.emptyList(), Collections.emptyList(), threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), ImmutableOpenMap.of(), snapshot.userMetadata(), version), clusterState.metadata(), repositoryData);
return;
}
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshot() + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
List<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.snapshot().equals(snapshot.snapshot()) == false) {
entries.add(entry);
continue;
}
if (entry.state() == State.ABORTED) {
entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else {
final List<IndexId> indexIds = repositoryData.resolveNewIndices(indices, Collections.emptyMap());
// Replace the snapshot that was just initialized
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards = shards(snapshots, currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY), currentState.metadata(), currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, entry.repository());
if (!partial) {
Tuple<Set<String>, Set<String>> indicesWithMissingShards = indicesWithMissingShards(shards, currentState.metadata());
Set<String> missing = indicesWithMissingShards.v1();
Set<String> closed = indicesWithMissingShards.v2();
if (missing.isEmpty() == false || closed.isEmpty() == false) {
final StringBuilder failureMessage = new StringBuilder();
if (missing.isEmpty() == false) {
failureMessage.append("Indices don't have primary shards ");
failureMessage.append(missing);
}
if (closed.isEmpty() == false) {
if (failureMessage.length() > 0) {
failureMessage.append("; ");
}
failureMessage.append("Indices are closed ");
failureMessage.append(closed);
}
entries.add(new SnapshotsInProgress.Entry(entry, State.FAILED, indexIds, repositoryData.getGenId(), shards, version, failureMessage.toString()));
continue;
}
}
entries.add(new SnapshotsInProgress.Entry(entry, State.STARTED, indexIds, repositoryData.getGenId(), shards, version, null));
}
}
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries))).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to create snapshot", snapshot.snapshot().getSnapshotId()), e);
removeFailedSnapshotFromClusterState(snapshot.snapshot(), e, null, new CleanupAfterErrorListener(userCreateSnapshotListener, e));
}
@Override
public void onNoLongerMaster(String source) {
// We are not longer a master - we shouldn't try to do any cleanup
// The new master will take care of it
logger.warn("[{}] failed to create snapshot - no longer a master", snapshot.snapshot().getSnapshotId());
userCreateSnapshotListener.onFailure(new SnapshotException(snapshot.snapshot(), "master changed during snapshot initialization"));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
// The userCreateSnapshotListener.onResponse() notifies caller that the snapshot was accepted
// for processing. If client wants to wait for the snapshot completion, it can register snapshot
// completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry, newState.metadata(), repositoryData);
} else {
endCompletedSnapshots(newState);
}
}
});
}, this::onFailure);
}
@Override
public void onFailure(Exception e) {
logger.warn(() -> new ParameterizedMessage("failed to create snapshot [{}]", snapshot.snapshot().getSnapshotId()), e);
removeFailedSnapshotFromClusterState(snapshot.snapshot(), e, null, new CleanupAfterErrorListener(userCreateSnapshotListener, e));
}
});
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotsService method startCloning.
/**
* Determine the number of shards in each index of a clone operation and update the cluster state accordingly.
*
* @param repository repository to run operation on
* @param cloneEntry clone operation in the cluster state
*/
private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) {
final List<IndexId> indices = cloneEntry.indices();
final SnapshotId sourceSnapshot = cloneEntry.source();
final Snapshot targetSnapshot = cloneEntry.snapshot();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// Exception handler for IO exceptions with loading index and repo metadata
final Consumer<Exception> onFailure = e -> {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null);
};
// 1. step, load SnapshotInfo to make sure that source snapshot was successful for the indices we want to clone
// TODO: we could skip this step for snapshots with state SUCCESS
final StepListener<SnapshotInfo> snapshotInfoListener = new StepListener<>();
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshot)));
final StepListener<Collection<Tuple<IndexId, Integer>>> allShardCountsListener = new StepListener<>();
final GroupedActionListener<Tuple<IndexId, Integer>> shardCountListener = new GroupedActionListener<>(allShardCountsListener, indices.size());
snapshotInfoListener.whenComplete(snapshotInfo -> {
for (IndexId indexId : indices) {
if (RestoreService.failed(snapshotInfo, indexId.getName())) {
throw new SnapshotException(targetSnapshot, "Can't clone index [" + indexId + "] because its snapshot was not successful.");
}
}
// 2. step, load the number of shards we have in each index to be cloned from the index metadata.
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
for (IndexId index : indices) {
executor.execute(ActionRunnable.supply(shardCountListener, () -> {
final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index);
return Tuple.tuple(index, metadata.getNumberOfShards());
}));
}
}, onFailure));
}, onFailure);
// 3. step, we have all the shard counts, now update the cluster state to have clone jobs in the snap entry
allShardCountsListener.whenComplete(counts -> repository.executeConsistentStateUpdate(repoData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry updatedEntry;
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> updatedEntries = new ArrayList<>(snapshotsInProgress.entries());
boolean changed = false;
final String localNodeId = currentState.nodes().getLocalNodeId();
final String repoName = cloneEntry.repository();
final ShardGenerations shardGenerations = repoData.shardGenerations();
for (int i = 0; i < updatedEntries.size(); i++) {
if (cloneEntry.snapshot().equals(updatedEntries.get(i).snapshot())) {
final ImmutableOpenMap.Builder<RepositoryShardId, ShardSnapshotStatus> clonesBuilder = ImmutableOpenMap.builder();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
for (Tuple<IndexId, Integer> count : counts) {
for (int shardId = 0; shardId < count.v2(); shardId++) {
final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId);
final String indexName = repoShardId.indexName();
if (inFlightShardStates.isActive(indexName, shardId)) {
clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED);
} else {
clonesBuilder.put(repoShardId, new ShardSnapshotStatus(localNodeId, inFlightShardStates.generationForShard(repoShardId.index(), shardId, shardGenerations)));
}
}
}
updatedEntry = cloneEntry.withClones(clonesBuilder.build());
updatedEntries.set(i, updatedEntry);
changed = true;
break;
}
}
return updateWithSnapshots(currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null);
}
@Override
public void onFailure(String source, Exception e) {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
initializingClones.remove(targetSnapshot);
if (updatedEntry != null) {
final Snapshot target = updatedEntry.snapshot();
final SnapshotId sourceSnapshot = updatedEntry.source();
for (ObjectObjectCursor<RepositoryShardId, ShardSnapshotStatus> indexClone : updatedEntry.clones()) {
final ShardSnapshotStatus shardStatusBefore = indexClone.value;
if (shardStatusBefore.state() != ShardState.INIT) {
continue;
}
final RepositoryShardId repoShardId = indexClone.key;
runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository);
}
} else {
// Extremely unlikely corner case of master failing over between between starting the clone and
// starting shard clones.
logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry);
}
}
}, "start snapshot clone", onFailure), onFailure);
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotsService method finalizeSnapshotEntry.
private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata metadata, RepositoryData repositoryData) {
assert currentlyFinalizing.contains(entry.repository());
try {
final String failure = entry.failure();
final Snapshot snapshot = entry.snapshot();
logger.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure);
ArrayList<SnapshotShardFailure> shardFailures = new ArrayList<>();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
ShardId shardId = shardStatus.key;
ShardSnapshotStatus status = shardStatus.value;
final ShardState state = status.state();
if (state.failed()) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
} else if (state.completed() == false) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, "skipped"));
} else {
assert state == ShardState.SUCCESS;
}
}
final ShardGenerations shardGenerations = buildGenerations(entry, metadata);
final String repository = snapshot.getRepository();
final SnapshotInfo snapshotInfo = new SnapshotInfo(snapshot.getSnapshotId(), shardGenerations.indices().stream().map(IndexId::getName).collect(Collectors.toList()), entry.dataStreams(), entry.startTime(), failure, threadPool.absoluteTimeInMillis(), entry.partial() ? shardGenerations.totalShards() : entry.shards().size(), shardFailures, entry.includeGlobalState(), entry.userMetadata());
final StepListener<Metadata> metadataListener = new StepListener<>();
final Repository repo = repositoriesService.repository(snapshot.getRepository());
if (entry.isClone()) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(metadataListener, () -> {
final Metadata.Builder metaBuilder = Metadata.builder(repo.getSnapshotGlobalMetadata(entry.source()));
for (IndexId index : entry.indices()) {
metaBuilder.put(repo.getSnapshotIndexMetaData(repositoryData, entry.source(), index), false);
}
return metaBuilder.build();
}));
} else {
metadataListener.onResponse(metadata);
}
metadataListener.whenComplete(meta -> repo.finalizeSnapshot(shardGenerations, repositoryData.getGenId(), metadataForSnapshot(entry, meta), snapshotInfo, entry.version(), state -> stateWithoutSnapshot(state, snapshot), ActionListener.wrap(newRepoData -> {
completeListenersIgnoringException(endAndGetListenersToResolve(snapshot), Tuple.tuple(newRepoData, snapshotInfo));
logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state());
runNextQueuedOperation(newRepoData, repository, true);
}, e -> handleFinalizationFailure(e, entry, repositoryData))), e -> handleFinalizationFailure(e, entry, repositoryData));
} catch (Exception e) {
assert false : new AssertionError(e);
handleFinalizationFailure(e, entry, repositoryData);
}
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class RecoverySourceHandlerTests method testCancelRecoveryDuringPhase1.
public void testCancelRecoveryDuringPhase1() throws Exception {
Store store = newStore(createTempDir("source"), false);
IndexShard shard = mock(IndexShard.class);
when(shard.store()).thenReturn(store);
Directory dir = store.directory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
writer.addDocument(document);
}
writer.commit();
writer.close();
AtomicBoolean wasCancelled = new AtomicBoolean();
SetOnce<Runnable> cancelRecovery = new SetOnce<>();
final TestRecoveryTargetHandler recoveryTarget = new TestRecoveryTargetHandler() {
@Override
public void receiveFileInfo(List<String> phase1FileNames, List<Long> phase1FileSizes, List<String> phase1ExistingFileNames, List<Long> phase1ExistingFileSizes, int totalTranslogOps, ActionListener<Void> listener) {
recoveryExecutor.execute(() -> listener.onResponse(null));
if (randomBoolean()) {
wasCancelled.set(true);
cancelRecovery.get().run();
}
}
@Override
public void writeFileChunk(StoreFileMetadata md, long position, BytesReference content, boolean lastChunk, int totalTranslogOps, ActionListener<Void> listener) {
recoveryExecutor.execute(() -> listener.onResponse(null));
if (rarely()) {
wasCancelled.set(true);
cancelRecovery.get().run();
}
}
@Override
public void cleanFiles(int totalTranslogOps, long globalCheckpoint, Store.MetadataSnapshot sourceMetadata, ActionListener<Void> listener) {
recoveryExecutor.execute(() -> listener.onResponse(null));
if (randomBoolean()) {
wasCancelled.set(true);
cancelRecovery.get().run();
}
}
};
final StartRecoveryRequest startRecoveryRequest = getStartRecoveryRequest();
final RecoverySourceHandler handler = new RecoverySourceHandler(shard, recoveryTarget, threadPool, startRecoveryRequest, between(1, 16), between(1, 4), between(1, 4)) {
@Override
void createRetentionLease(long startingSeqNo, ActionListener<RetentionLease> listener) {
final String leaseId = ReplicationTracker.getPeerRecoveryRetentionLeaseId(startRecoveryRequest.targetNode().getId());
listener.onResponse(new RetentionLease(leaseId, startingSeqNo, threadPool.absoluteTimeInMillis(), ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE));
}
};
cancelRecovery.set(() -> handler.cancel("test"));
final StepListener<RecoverySourceHandler.SendFileResult> phase1Listener = new StepListener<>();
try {
final CountDownLatch latch = new CountDownLatch(1);
handler.phase1(DirectoryReader.listCommits(dir).get(0), 0, () -> 0, new LatchedActionListener<>(phase1Listener, latch));
latch.await();
phase1Listener.result();
} catch (Exception e) {
assertTrue(wasCancelled.get());
assertNotNull(ExceptionsHelper.unwrap(e, CancellableThreads.ExecutionCancelledException.class));
}
store.close();
}
Aggregations