use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link IndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
cancellableThreads.checkForCancel();
final Store store = shard.store();
try {
StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.failShard("recovery", ex);
throw ex;
}
for (String name : snapshot.getFileNames()) {
final StoreFileMetadata md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
if (canSkipPhase1(recoverySourceMetadata, request.metadataSnapshot()) == false) {
final List<String> phase1FileNames = new ArrayList<>();
final List<Long> phase1FileSizes = new ArrayList<>();
final List<String> phase1ExistingFileNames = new ArrayList<>();
final List<Long> phase1ExistingFileSizes = new ArrayList<>();
// Total size of segment files that are recovered
long totalSizeInBytes = 0;
// Total size of segment files that were able to be re-used
long existingTotalSizeInBytes = 0;
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetadata md : diff.identical) {
phase1ExistingFileNames.add(md.name());
phase1ExistingFileSizes.add(md.length());
existingTotalSizeInBytes += md.length();
if (logger.isTraceEnabled()) {
logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
}
totalSizeInBytes += md.length();
}
List<StoreFileMetadata> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
phase1Files.addAll(diff.different);
phase1Files.addAll(diff.missing);
for (StoreFileMetadata md : phase1Files) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
}
phase1FileNames.add(md.name());
phase1FileSizes.add(md.length());
totalSizeInBytes += md.length();
}
logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", phase1FileNames.size(), new ByteSizeValue(totalSizeInBytes), phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
final StepListener<Void> sendFileInfoStep = new StepListener<>();
final StepListener<Void> sendFilesStep = new StepListener<>();
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
final StepListener<Void> cleanFilesStep = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames, phase1ExistingFileSizes, translogOps.getAsInt(), sendFileInfoStep);
sendFileInfoStep.whenComplete(r -> sendFiles(store, phase1Files.toArray(new StoreFileMetadata[0]), translogOps, sendFilesStep), listener::onFailure);
sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint : retentionLease + " vs " + lastKnownGlobalCheckpoint;
// Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
// the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
// to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
// the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
}, listener::onFailure);
final long totalSize = totalSizeInBytes;
final long existingTotalSize = existingTotalSizeInBytes;
cleanFilesStep.whenComplete(r -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(phase1FileNames, phase1FileSizes, totalSize, phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took));
}, listener::onFailure);
} else {
logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
// but we must still create a retention lease
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
createRetentionLease(startingSeqNo, createRetentionLeaseStep);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(Collections.emptyList(), Collections.emptyList(), 0L, Collections.emptyList(), Collections.emptyList(), 0L, took));
}, listener::onFailure);
}
} catch (Exception e) {
throw new RecoverFilesRecoveryException(request.shardId(), 0, new ByteSizeValue(0L), e);
}
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testConcurrentSnapshotDeleteAndDeleteIndex.
public void testConcurrentSnapshotDeleteAndDeleteIndex() throws IOException {
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
TestClusterNodes.TestClusterNode masterNode = testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();
final int indices = randomIntBetween(5, 20);
final SetOnce<Index> firstIndex = new SetOnce<>();
continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
firstIndex.set(masterNode.clusterService.state().metadata().index(index).getIndex());
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
// finalization
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
for (int i = 0; i < indices; ++i) {
client().admin().indices().create(new CreateIndexRequest("index-" + i), listener);
}
});
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
final boolean partialSnapshot = randomBoolean();
continueOrDie(createIndicesListener, createIndexResponses -> client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false).setPartial(partialSnapshot).setIncludeGlobalState(randomBoolean()).execute(createSnapshotResponseStepListener));
continueOrDie(createSnapshotResponseStepListener, createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), new ActionListener<AcknowledgedResponse>() {
@Override
public void onResponse(AcknowledgedResponse acknowledgedResponse) {
if (partialSnapshot) {
// Recreate index by the same name to test that we don't snapshot conflicting metadata in this scenario
client().admin().indices().create(new CreateIndexRequest(index), noopListener());
}
}
@Override
public void onFailure(Exception e) {
if (partialSnapshot) {
throw new AssertionError("Delete index should always work during partial snapshots", e);
}
}
}));
deterministicTaskQueue.runAllRunnableTasks();
SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE);
assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false));
final Repository repository = masterNode.repositoriesService.repository(repoName);
final RepositoryData repositoryData = getRepositoryData(repository);
Collection<SnapshotId> snapshotIds = repositoryData.getSnapshotIds();
assertThat(snapshotIds, hasSize(1));
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
if (partialSnapshot) {
assertThat(snapshotInfo.state(), either(is(SnapshotState.SUCCESS)).or(is(SnapshotState.PARTIAL)));
// Single shard for each index so we either get all indices or all except for the deleted index
assertThat(snapshotInfo.successfulShards(), either(is(indices + 1)).or(is(indices)));
if (snapshotInfo.successfulShards() == indices + 1) {
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(repositoryData, snapshotInfo.snapshotId(), repositoryData.resolveIndexId(index));
// Make sure we snapshotted the metadata of this index and not the recreated version
assertEquals(indexMetadata.getIndex(), firstIndex.get());
}
} else {
assertEquals(snapshotInfo.state(), SnapshotState.SUCCESS);
// Index delete must be blocked for non-partial snapshots and we get a snapshot for every index
assertEquals(snapshotInfo.successfulShards(), indices + 1);
}
assertEquals(0, snapshotInfo.failedShards());
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method indexNDocuments.
private void indexNDocuments(int documents, String index, Runnable afterIndexing) {
if (documents == 0) {
afterIndexing.run();
return;
}
final BulkRequest bulkRequest = new BulkRequest().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
for (int i = 0; i < documents; ++i) {
bulkRequest.add(new IndexRequest(index).source(Collections.singletonMap("foo", "bar" + i)));
}
final StepListener<BulkResponse> bulkResponseStepListener = new StepListener<>();
client().bulk(bulkRequest, bulkResponseStepListener);
continueOrDie(bulkResponseStepListener, bulkResponse -> {
assertFalse("Failures in bulk response: " + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures());
assertEquals(documents, bulkResponse.getItems().length);
afterIndexing.run();
});
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testConcurrentSnapshotRestoreAndDeleteOther.
public void testConcurrentSnapshotRestoreAndDeleteOther() {
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
final int shards = randomIntBetween(1, 10);
TestClusterNodes.TestClusterNode masterNode = testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
final int documentsFirstSnapshot = randomIntBetween(0, 100);
continueOrDie(createRepoAndIndex(repoName, index, shards), createIndexResponse -> indexNDocuments(documentsFirstSnapshot, index, () -> client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).execute(createSnapshotResponseStepListener)));
final int documentsSecondSnapshot = randomIntBetween(0, 100);
final StepListener<CreateSnapshotResponse> createOtherSnapshotResponseStepListener = new StepListener<>();
final String secondSnapshotName = "snapshot-2";
continueOrDie(createSnapshotResponseStepListener, createSnapshotResponse -> indexNDocuments(documentsSecondSnapshot, index, () -> client().admin().cluster().prepareCreateSnapshot(repoName, secondSnapshotName).setWaitForCompletion(true).execute(createOtherSnapshotResponseStepListener)));
final StepListener<AcknowledgedResponse> deleteSnapshotStepListener = new StepListener<>();
final StepListener<RestoreSnapshotResponse> restoreSnapshotResponseListener = new StepListener<>();
continueOrDie(createOtherSnapshotResponseStepListener, createSnapshotResponse -> {
scheduleNow(() -> client().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(deleteSnapshotStepListener));
scheduleNow(() -> client().admin().cluster().restoreSnapshot(new RestoreSnapshotRequest(repoName, secondSnapshotName).waitForCompletion(true).renamePattern("(.+)").renameReplacement("restored_$1"), restoreSnapshotResponseListener));
});
final StepListener<SearchResponse> searchResponseListener = new StepListener<>();
continueOrDie(restoreSnapshotResponseListener, restoreSnapshotResponse -> {
assertEquals(shards, restoreSnapshotResponse.getRestoreInfo().totalShards());
client().search(new SearchRequest("restored_" + index).source(new SearchSourceBuilder().size(0).trackTotalHits(true)), searchResponseListener);
});
deterministicTaskQueue.runAllRunnableTasks();
assertEquals(documentsFirstSnapshot + documentsSecondSnapshot, Objects.requireNonNull(searchResponseListener.result().getHits().getTotalHits()).value);
assertThat(deleteSnapshotStepListener.result().isAcknowledged(), is(true));
assertThat(restoreSnapshotResponseListener.result().getRestoreInfo().failedShards(), is(0));
final Repository repository = masterNode.repositoriesService.repository(repoName);
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
assertThat(snapshotIds, contains(createOtherSnapshotResponseStepListener.result().getSnapshotInfo().snapshotId()));
for (SnapshotId snapshotId : snapshotIds) {
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId);
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
assertThat(snapshotInfo.indices(), containsInAnyOrder(index));
assertEquals(shards, snapshotInfo.successfulShards());
assertEquals(0, snapshotInfo.failedShards());
}
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testSnapshotDeleteWithMasterFailover.
public void testSnapshotDeleteWithMasterFailover() {
final int dataNodes = randomIntBetween(2, 10);
final int masterNodes = randomFrom(3, 5);
setupTestCluster(masterNodes, dataNodes);
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
final int shards = randomIntBetween(1, 10);
final boolean waitForSnapshot = randomBoolean();
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
continueOrDie(createRepoAndIndex(repoName, index, shards), createIndexResponse -> testClusterNodes.randomMasterNodeSafe().client.admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(waitForSnapshot).execute(createSnapshotResponseStepListener));
final AtomicBoolean snapshotDeleteResponded = new AtomicBoolean(false);
continueOrDie(createSnapshotResponseStepListener, createSnapshotResponse -> {
scheduleNow(this::disconnectOrRestartMasterNode);
testClusterNodes.randomDataNodeSafe().client.admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(ActionListener.wrap(() -> snapshotDeleteResponded.set(true)));
});
runUntil(() -> testClusterNodes.randomMasterNode().map(master -> snapshotDeleteResponded.get() && master.clusterService.state().custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY).getEntries().isEmpty()).orElse(false), TimeUnit.MINUTES.toMillis(1L));
clearDisruptionsAndAwaitSync();
final TestClusterNodes.TestClusterNode randomMaster = testClusterNodes.randomMasterNode().orElseThrow(() -> new AssertionError("expected to find at least one active master node"));
SnapshotsInProgress finalSnapshotsInProgress = randomMaster.clusterService.state().custom(SnapshotsInProgress.TYPE);
assertThat(finalSnapshotsInProgress.entries(), empty());
final Repository repository = randomMaster.repositoriesService.repository(repoName);
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
assertThat(snapshotIds, hasSize(0));
}
Aggregations