use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class ReplicaShardAllocatorIT method ensureActivePeerRecoveryRetentionLeasesAdvanced.
public static void ensureActivePeerRecoveryRetentionLeasesAdvanced(String indexName) throws Exception {
final ClusterService clusterService = internalCluster().clusterService();
assertBusy(() -> {
Index index = resolveIndex(indexName);
Set<String> activeRetentionLeaseIds = clusterService.state().routingTable().index(index).shard(0).shards().stream().map(shardRouting -> ReplicationTracker.getPeerRecoveryRetentionLeaseId(shardRouting.currentNodeId())).collect(Collectors.toSet());
for (String node : internalCluster().nodesInclude(indexName)) {
IndexService indexService = internalCluster().getInstance(IndicesService.class, node).indexService(index);
if (indexService != null) {
for (IndexShard shard : indexService) {
assertThat(shard.getLastSyncedGlobalCheckpoint(), equalTo(shard.seqNoStats().getMaxSeqNo()));
Set<RetentionLease> activeRetentionLeases = shard.getPeerRecoveryRetentionLeases().stream().filter(lease -> activeRetentionLeaseIds.contains(lease.id())).collect(Collectors.toSet());
assertThat(activeRetentionLeases, hasSize(activeRetentionLeaseIds.size()));
for (RetentionLease lease : activeRetentionLeases) {
assertThat(lease.retainingSequenceNumber(), equalTo(shard.getLastSyncedGlobalCheckpoint() + 1));
}
}
}
}
});
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link IndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
cancellableThreads.checkForCancel();
final Store store = shard.store();
try {
StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.failShard("recovery", ex);
throw ex;
}
for (String name : snapshot.getFileNames()) {
final StoreFileMetadata md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
if (canSkipPhase1(recoverySourceMetadata, request.metadataSnapshot()) == false) {
final List<String> phase1FileNames = new ArrayList<>();
final List<Long> phase1FileSizes = new ArrayList<>();
final List<String> phase1ExistingFileNames = new ArrayList<>();
final List<Long> phase1ExistingFileSizes = new ArrayList<>();
// Total size of segment files that are recovered
long totalSizeInBytes = 0;
// Total size of segment files that were able to be re-used
long existingTotalSizeInBytes = 0;
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetadata md : diff.identical) {
phase1ExistingFileNames.add(md.name());
phase1ExistingFileSizes.add(md.length());
existingTotalSizeInBytes += md.length();
if (logger.isTraceEnabled()) {
logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
}
totalSizeInBytes += md.length();
}
List<StoreFileMetadata> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
phase1Files.addAll(diff.different);
phase1Files.addAll(diff.missing);
for (StoreFileMetadata md : phase1Files) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
}
phase1FileNames.add(md.name());
phase1FileSizes.add(md.length());
totalSizeInBytes += md.length();
}
logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", phase1FileNames.size(), new ByteSizeValue(totalSizeInBytes), phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
final StepListener<Void> sendFileInfoStep = new StepListener<>();
final StepListener<Void> sendFilesStep = new StepListener<>();
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
final StepListener<Void> cleanFilesStep = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames, phase1ExistingFileSizes, translogOps.getAsInt(), sendFileInfoStep);
sendFileInfoStep.whenComplete(r -> sendFiles(store, phase1Files.toArray(new StoreFileMetadata[0]), translogOps, sendFilesStep), listener::onFailure);
sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint : retentionLease + " vs " + lastKnownGlobalCheckpoint;
// Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
// the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
// to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
// the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
}, listener::onFailure);
final long totalSize = totalSizeInBytes;
final long existingTotalSize = existingTotalSizeInBytes;
cleanFilesStep.whenComplete(r -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(phase1FileNames, phase1FileSizes, totalSize, phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took));
}, listener::onFailure);
} else {
logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
// but we must still create a retention lease
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
createRetentionLease(startingSeqNo, createRetentionLeaseStep);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(Collections.emptyList(), Collections.emptyList(), 0L, Collections.emptyList(), Collections.emptyList(), 0L, took));
}, listener::onFailure);
}
} catch (Exception e) {
throw new RecoverFilesRecoveryException(request.shardId(), 0, new ByteSizeValue(0L), e);
}
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class RetentionLeasesReplicationTests method testSimpleSyncRetentionLeases.
public void testSimpleSyncRetentionLeases() throws Exception {
Settings settings = Settings.builder().put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
try (ReplicationGroup group = createGroup(between(0, 2), settings)) {
group.startAll();
List<RetentionLease> leases = new ArrayList<>();
int iterations = between(1, 100);
CountDownLatch latch = new CountDownLatch(iterations);
for (int i = 0; i < iterations; i++) {
if (leases.isEmpty() == false && rarely()) {
RetentionLease leaseToRemove = randomFrom(leases);
leases.remove(leaseToRemove);
group.removeRetentionLease(leaseToRemove.id(), ActionListener.wrap(latch::countDown));
} else {
RetentionLease newLease = group.addRetentionLease(Integer.toString(i), randomNonNegativeLong(), "test-" + i, ActionListener.wrap(latch::countDown));
leases.add(newLease);
}
}
RetentionLeases leasesOnPrimary = group.getPrimary().getRetentionLeases();
assertThat(leasesOnPrimary.version(), equalTo(iterations + group.getReplicas().size() + 1L));
assertThat(leasesOnPrimary.primaryTerm(), equalTo(group.getPrimary().getOperationPrimaryTerm()));
assertThat(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(leasesOnPrimary).values(), containsInAnyOrder(leases.toArray(new RetentionLease[0])));
latch.await();
for (IndexShard replica : group.getReplicas()) {
assertThat(replica.getRetentionLeases(), equalTo(leasesOnPrimary));
}
}
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class IndexShardRetentionLeaseTests method assertRetentionLeases.
private void assertRetentionLeases(final RetentionLeases retentionLeases, final int size, final long[] minimumRetainingSequenceNumbers, final long primaryTerm, final long version) {
assertThat(retentionLeases.primaryTerm(), equalTo(primaryTerm));
assertThat(retentionLeases.version(), equalTo(version));
final Map<String, RetentionLease> idToRetentionLease = new HashMap<>();
for (final RetentionLease retentionLease : retentionLeases.leases()) {
if (ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE.equals(retentionLease.source()) == false) {
idToRetentionLease.put(retentionLease.id(), retentionLease);
}
}
assertThat(idToRetentionLease.entrySet(), hasSize(size));
for (int i = 0; i < size; i++) {
assertThat(idToRetentionLease.keySet(), hasItem(Integer.toString(i)));
final RetentionLease retentionLease = idToRetentionLease.get(Integer.toString(i));
assertThat(retentionLease.retainingSequenceNumber(), equalTo(minimumRetainingSequenceNumbers[i]));
assertThat(retentionLease.source(), equalTo("test-" + i));
}
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class ReplicaShardAllocatorTests method testPreferCopyWithHighestMatchingOperations.
public void testPreferCopyWithHighestMatchingOperations() {
RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
long retainingSeqNoOnPrimary = randomLongBetween(1, Integer.MAX_VALUE);
long retainingSeqNoForNode2 = randomLongBetween(0, retainingSeqNoOnPrimary - 1);
// Rarely use a seqNo above retainingSeqNoOnPrimary, which could in theory happen when primary fails and comes back quickly.
long retainingSeqNoForNode3 = randomLongBetween(retainingSeqNoForNode2 + 1, retainingSeqNoOnPrimary + 100);
List<RetentionLease> retentionLeases = Arrays.asList(newRetentionLease(node1, retainingSeqNoOnPrimary), newRetentionLease(node2, retainingSeqNoForNode2), newRetentionLease(node3, retainingSeqNoForNode3));
testAllocator.addData(node1, retentionLeases, "MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
testAllocator.addData(node2, "NOT_MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
testAllocator.addData(node3, randomSyncId(), new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
allocateAllUnassigned(allocation);
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node3.getId()));
}
Aggregations