use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class ReplicaShardAllocatorIT method testRecentPrimaryInformation.
/**
* Ensure that we fetch the latest shard store from the primary when a new node joins so we won't cancel the current recovery
* for the copy on the newly joined node unless we can perform a noop recovery with that node.
*/
public void testRecentPrimaryInformation() throws Exception {
String indexName = "test";
String nodeWithPrimary = internalCluster().startNode();
assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.getKey(), 0.1f).put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms").put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms").put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "1ms")));
String nodeWithReplica = internalCluster().startDataOnlyNode();
DiscoveryNode discoNodeWithReplica = internalCluster().getInstance(ClusterService.class, nodeWithReplica).localNode();
Settings nodeWithReplicaSettings = internalCluster().dataPathSettings(nodeWithReplica);
ensureGreen(indexName);
indexRandom(randomBoolean(), false, randomBoolean(), IntStream.range(0, between(10, 100)).mapToObj(n -> client().prepareIndex(indexName).setSource("f", "v")).collect(Collectors.toList()));
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeWithReplica));
if (randomBoolean()) {
indexRandom(randomBoolean(), false, randomBoolean(), IntStream.range(0, between(10, 100)).mapToObj(n -> client().prepareIndex(indexName).setSource("f", "v")).collect(Collectors.toList()));
}
CountDownLatch blockRecovery = new CountDownLatch(1);
CountDownLatch recoveryStarted = new CountDownLatch(1);
MockTransportService transportServiceOnPrimary = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeWithPrimary);
transportServiceOnPrimary.addSendBehavior((connection, requestId, action, request, options) -> {
if (PeerRecoveryTargetService.Actions.FILES_INFO.equals(action)) {
recoveryStarted.countDown();
try {
blockRecovery.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
connection.sendRequest(requestId, action, request, options);
});
String newNode = internalCluster().startDataOnlyNode();
recoveryStarted.await();
// Index more documents and flush to destroy sync_id and remove the retention lease (as file_based_recovery_threshold reached).
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, between(50, 200)).mapToObj(n -> client().prepareIndex(indexName).setSource("f", "v")).collect(Collectors.toList()));
client().admin().indices().prepareFlush(indexName).get();
assertBusy(() -> {
for (ShardStats shardStats : client().admin().indices().prepareStats(indexName).get().getShards()) {
for (RetentionLease lease : shardStats.getRetentionLeaseStats().retentionLeases().leases()) {
assertThat(lease.id(), not(equalTo(ReplicationTracker.getPeerRecoveryRetentionLeaseId(discoNodeWithReplica.getId()))));
}
}
});
// AllocationService only calls GatewayAllocator if there are unassigned shards
assertAcked(client().admin().indices().prepareCreate("dummy-index").setWaitForActiveShards(0).setSettings(Settings.builder().put("index.routing.allocation.require.attr", "not-found")));
internalCluster().startDataOnlyNode(nodeWithReplicaSettings);
// need to wait for events to ensure the reroute has happened since we perform it async when a new node joins.
client().admin().cluster().prepareHealth(indexName).setWaitForYellowStatus().setWaitForEvents(Priority.LANGUID).get();
blockRecovery.countDown();
ensureGreen(indexName);
assertThat(internalCluster().nodesInclude(indexName), hasItem(newNode));
for (RecoveryState recovery : client().admin().indices().prepareRecoveries(indexName).get().shardRecoveryStates().get(indexName)) {
if (recovery.getPrimary() == false) {
assertThat(recovery.getIndex().fileDetails(), not(empty()));
}
}
transportServiceOnPrimary.clearAllRules();
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class SoftDeletesPolicy method getMinRetainedSeqNo.
/**
* Returns the min seqno that is retained in the Lucene index.
* Operations whose seq# is least this value should exist in the Lucene index.
*/
synchronized long getMinRetainedSeqNo() {
/*
* When an engine is flushed, we need to provide it the latest collection of retention leases even when the soft deletes policy is
* locked for peer recovery.
*/
final RetentionLeases retentionLeases = retentionLeasesSupplier.get();
// do not advance if the retention lock is held
if (retentionLockCount == 0) {
/*
* This policy retains operations for two purposes: peer-recovery and querying changes history.
* - Peer-recovery is driven by the local checkpoint of the safe commit. In peer-recovery, the primary transfers a safe commit,
* then sends operations after the local checkpoint of that commit. This requires keeping all ops after
* localCheckpointOfSafeCommit.
* - Changes APIs are driven by a combination of the global checkpoint, retention operations, and retention leases. Here we
* prefer using the global checkpoint instead of the maximum sequence number because only operations up to the global
* checkpoint are exposed in the changes APIs.
*/
// calculate the minimum sequence number to retain based on retention leases
final long minimumRetainingSequenceNumber = retentionLeases.leases().stream().mapToLong(RetentionLease::retainingSequenceNumber).min().orElse(Long.MAX_VALUE);
/*
* The minimum sequence number to retain is the minimum of the minimum based on retention leases, and the number of operations
* below the global checkpoint to retain (index.soft_deletes.retention.operations). The additional increments on the global
* checkpoint and the local checkpoint of the safe commit are due to the fact that we want to retain all operations above
* those checkpoints.
*/
final long minSeqNoForQueryingChanges = Math.min(1 + globalCheckpointSupplier.getAsLong() - retentionOperations, minimumRetainingSequenceNumber);
final long minSeqNoToRetain = Math.min(minSeqNoForQueryingChanges, 1 + localCheckpointOfSafeCommit);
/*
* We take the maximum as minSeqNoToRetain can go backward as the retention operations value can be changed in settings, or from
* the addition of leases with a retaining sequence number lower than previous retaining sequence numbers.
*/
minRetainedSeqNo = Math.max(minRetainedSeqNo, minSeqNoToRetain);
}
return minRetainedSeqNo;
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class RelocationIT method assertActiveCopiesEstablishedPeerRecoveryRetentionLeases.
private void assertActiveCopiesEstablishedPeerRecoveryRetentionLeases() throws Exception {
assertBusy(() -> {
for (ObjectCursor<String> it : client().admin().cluster().prepareState().get().getState().metadata().indices().keys()) {
Map<ShardId, List<ShardStats>> byShardId = Stream.of(client().admin().indices().prepareStats(it.value).get().getShards()).collect(Collectors.groupingBy(l -> l.getShardRouting().shardId()));
for (List<ShardStats> shardStats : byShardId.values()) {
Set<String> expectedLeaseIds = shardStats.stream().map(s -> ReplicationTracker.getPeerRecoveryRetentionLeaseId(s.getShardRouting())).collect(Collectors.toSet());
for (ShardStats shardStat : shardStats) {
Set<String> actualLeaseIds = shardStat.getRetentionLeaseStats().retentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet());
assertThat(expectedLeaseIds, everyItem(in(actualLeaseIds)));
}
}
}
});
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class InternalEngineTests method testKeepMinRetainedSeqNoByMergePolicy.
public void testKeepMinRetainedSeqNoByMergePolicy() throws IOException {
IOUtils.close(engine, store);
Settings.Builder settings = Settings.builder().put(defaultSettings.getSettings()).put(IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING.getKey(), randomLongBetween(0, 10));
final IndexMetadata indexMetadata = IndexMetadata.builder(defaultSettings.getIndexMetadata()).settings(settings).build();
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexMetadata);
final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED);
final long primaryTerm = randomLongBetween(1, Long.MAX_VALUE);
final AtomicLong retentionLeasesVersion = new AtomicLong();
final AtomicReference<RetentionLeases> retentionLeasesHolder = new AtomicReference<>(new RetentionLeases(primaryTerm, retentionLeasesVersion.get(), Collections.emptyList()));
final List<Engine.Operation> operations = generateSingleDocHistory(true, randomFrom(VersionType.INTERNAL, VersionType.EXTERNAL), 2, 10, 300, "2");
Randomness.shuffle(operations);
Set<Long> existingSeqNos = new HashSet<>();
store = createStore();
engine = createEngine(config(indexSettings, store, createTempDir(), newMergePolicy(), null, null, globalCheckpoint::get, retentionLeasesHolder::get));
assertThat(engine.getMinRetainedSeqNo(), equalTo(0L));
long lastMinRetainedSeqNo = engine.getMinRetainedSeqNo();
for (Engine.Operation op : operations) {
final Engine.Result result;
if (op instanceof Engine.Index) {
result = engine.index((Engine.Index) op);
} else {
result = engine.delete((Engine.Delete) op);
}
existingSeqNos.add(result.getSeqNo());
if (randomBoolean()) {
// advance persisted local checkpoint
engine.syncTranslog();
assertEquals(engine.getProcessedLocalCheckpoint(), engine.getPersistedLocalCheckpoint());
globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpointTracker().getPersistedCheckpoint()));
}
if (randomBoolean()) {
retentionLeasesVersion.incrementAndGet();
final int length = randomIntBetween(0, 8);
final List<RetentionLease> leases = new ArrayList<>(length);
for (int i = 0; i < length; i++) {
final String id = randomAlphaOfLength(8);
final long retainingSequenceNumber = randomLongBetween(0, Math.max(0, globalCheckpoint.get()));
final long timestamp = randomLongBetween(0L, Long.MAX_VALUE);
final String source = randomAlphaOfLength(8);
leases.add(new RetentionLease(id, retainingSequenceNumber, timestamp, source));
}
retentionLeasesHolder.set(new RetentionLeases(primaryTerm, retentionLeasesVersion.get(), leases));
}
if (rarely()) {
settings.put(IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING.getKey(), randomLongBetween(0, 10));
indexSettings.updateIndexMetadata(IndexMetadata.builder(defaultSettings.getIndexMetadata()).settings(settings).build());
engine.onSettingsChanged(indexSettings.getTranslogRetentionAge(), indexSettings.getTranslogRetentionSize(), indexSettings.getSoftDeleteRetentionOperations());
}
if (rarely()) {
engine.refresh("test");
}
if (rarely()) {
engine.flush(true, true);
assertThat(Long.parseLong(engine.getLastCommittedSegmentInfos().userData.get(Engine.MIN_RETAINED_SEQNO)), equalTo(engine.getMinRetainedSeqNo()));
}
if (rarely()) {
engine.forceMerge(randomBoolean(), 1, false, false, false, UUIDs.randomBase64UUID());
}
try (Closeable ignored = engine.acquireHistoryRetentionLock()) {
long minRetainSeqNos = engine.getMinRetainedSeqNo();
assertThat(minRetainSeqNos, lessThanOrEqualTo(globalCheckpoint.get() + 1));
Long[] expectedOps = existingSeqNos.stream().filter(seqno -> seqno >= minRetainSeqNos).toArray(Long[]::new);
Set<Long> actualOps = readAllOperationsInLucene(engine, createMapperService("test")).stream().map(Translog.Operation::seqNo).collect(Collectors.toSet());
assertThat(actualOps, containsInAnyOrder(expectedOps));
}
try (Engine.IndexCommitRef commitRef = engine.acquireSafeIndexCommit()) {
IndexCommit safeCommit = commitRef.getIndexCommit();
if (safeCommit.getUserData().containsKey(Engine.MIN_RETAINED_SEQNO)) {
lastMinRetainedSeqNo = Long.parseLong(safeCommit.getUserData().get(Engine.MIN_RETAINED_SEQNO));
}
}
}
if (randomBoolean()) {
engine.close();
} else {
engine.flushAndClose();
}
try (InternalEngine recoveringEngine = new InternalEngine(engine.config())) {
assertThat(recoveringEngine.getMinRetainedSeqNo(), equalTo(lastMinRetainedSeqNo));
}
}
use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.
the class SoftDeletesPolicyTests method testSoftDeletesRetentionLock.
/**
* Makes sure we won't advance the retained seq# if the retention lock is held
*/
public void testSoftDeletesRetentionLock() {
long retainedOps = between(0, 10000);
AtomicLong globalCheckpoint = new AtomicLong(NO_OPS_PERFORMED);
final AtomicLong[] retainingSequenceNumbers = new AtomicLong[randomIntBetween(0, 8)];
for (int i = 0; i < retainingSequenceNumbers.length; i++) {
retainingSequenceNumbers[i] = new AtomicLong();
}
final Supplier<RetentionLeases> retentionLeasesSupplier = () -> {
final List<RetentionLease> leases = new ArrayList<>(retainingSequenceNumbers.length);
for (int i = 0; i < retainingSequenceNumbers.length; i++) {
leases.add(new RetentionLease(Integer.toString(i), retainingSequenceNumbers[i].get(), 0L, "test"));
}
return new RetentionLeases(1, 1, leases);
};
long safeCommitCheckpoint = globalCheckpoint.get();
SoftDeletesPolicy policy = new SoftDeletesPolicy(globalCheckpoint::get, between(1, 10000), retainedOps, retentionLeasesSupplier);
long minRetainedSeqNo = policy.getMinRetainedSeqNo();
List<Releasable> locks = new ArrayList<>();
int iters = scaledRandomIntBetween(10, 1000);
for (int i = 0; i < iters; i++) {
if (randomBoolean()) {
locks.add(policy.acquireRetentionLock());
}
// Advances the global checkpoint and the local checkpoint of a safe commit
globalCheckpoint.addAndGet(between(0, 1000));
for (final AtomicLong retainingSequenceNumber : retainingSequenceNumbers) {
retainingSequenceNumber.set(randomLongBetween(retainingSequenceNumber.get(), Math.max(globalCheckpoint.get(), 0L)));
}
safeCommitCheckpoint = randomLongBetween(safeCommitCheckpoint, globalCheckpoint.get());
policy.setLocalCheckpointOfSafeCommit(safeCommitCheckpoint);
if (rarely()) {
retainedOps = between(0, 10000);
policy.setRetentionOperations(retainedOps);
}
// Release some locks
List<Releasable> releasingLocks = randomSubsetOf(locks);
locks.removeAll(releasingLocks);
releasingLocks.forEach(Releasable::close);
// getting the query has side effects, updating the internal state of the policy
final Query query = policy.getRetentionQuery();
assertThat(query, instanceOf(PointRangeQuery.class));
final PointRangeQuery retentionQuery = (PointRangeQuery) query;
// we only expose the minimum sequence number to the merge policy if the retention lock is not held
if (locks.isEmpty()) {
final long minimumRetainingSequenceNumber = Arrays.stream(retainingSequenceNumbers).mapToLong(AtomicLong::get).min().orElse(Long.MAX_VALUE);
long retainedSeqNo = Math.min(1 + safeCommitCheckpoint, Math.min(minimumRetainingSequenceNumber, 1 + globalCheckpoint.get() - retainedOps));
minRetainedSeqNo = Math.max(minRetainedSeqNo, retainedSeqNo);
}
assertThat(retentionQuery.getNumDims(), equalTo(1));
assertThat(LongPoint.decodeDimension(retentionQuery.getLowerPoint(), 0), equalTo(minRetainedSeqNo));
assertThat(LongPoint.decodeDimension(retentionQuery.getUpperPoint(), 0), equalTo(Long.MAX_VALUE));
assertThat(policy.getMinRetainedSeqNo(), equalTo(minRetainedSeqNo));
}
locks.forEach(Releasable::close);
final long minimumRetainingSequenceNumber = Arrays.stream(retainingSequenceNumbers).mapToLong(AtomicLong::get).min().orElse(Long.MAX_VALUE);
long retainedSeqNo = Math.min(1 + safeCommitCheckpoint, Math.min(minimumRetainingSequenceNumber, 1 + globalCheckpoint.get() - retainedOps));
minRetainedSeqNo = Math.max(minRetainedSeqNo, retainedSeqNo);
assertThat(policy.getMinRetainedSeqNo(), equalTo(minRetainedSeqNo));
}
Aggregations