Search in sources :

Example 16 with RetentionLease

use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.

the class ReplicaShardAllocatorIT method ensureActivePeerRecoveryRetentionLeasesAdvanced.

public static void ensureActivePeerRecoveryRetentionLeasesAdvanced(String indexName) throws Exception {
    final ClusterService clusterService = internalCluster().clusterService();
    assertBusy(() -> {
        Index index = resolveIndex(indexName);
        Set<String> activeRetentionLeaseIds = clusterService.state().routingTable().index(index).shard(0).shards().stream().map(shardRouting -> ReplicationTracker.getPeerRecoveryRetentionLeaseId(shardRouting.currentNodeId())).collect(Collectors.toSet());
        for (String node : internalCluster().nodesInclude(indexName)) {
            IndexService indexService = internalCluster().getInstance(IndicesService.class, node).indexService(index);
            if (indexService != null) {
                for (IndexShard shard : indexService) {
                    assertThat(shard.getLastSyncedGlobalCheckpoint(), equalTo(shard.seqNoStats().getMaxSeqNo()));
                    Set<RetentionLease> activeRetentionLeases = shard.getPeerRecoveryRetentionLeases().stream().filter(lease -> activeRetentionLeaseIds.contains(lease.id())).collect(Collectors.toSet());
                    assertThat(activeRetentionLeases, hasSize(activeRetentionLeaseIds.size()));
                    for (RetentionLease lease : activeRetentionLeases) {
                        assertThat(lease.retainingSequenceNumber(), equalTo(shard.getLastSyncedGlobalCheckpoint() + 1));
                    }
                }
            }
        }
    });
}
Also used : IntStream(java.util.stream.IntStream) SequenceNumbers(org.opensearch.index.seqno.SequenceNumbers) Arrays(java.util.Arrays) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Matchers.not(org.hamcrest.Matchers.not) Priority(org.opensearch.common.Priority) CircuitBreaker(org.opensearch.common.breaker.CircuitBreaker) MockTransportService(org.opensearch.test.transport.MockTransportService) InternalTestCluster(org.opensearch.test.InternalTestCluster) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexShard(org.opensearch.index.shard.IndexShard) PeerRecoveryTargetService(org.opensearch.indices.recovery.PeerRecoveryTargetService) InternalSettingsPlugin(org.opensearch.test.InternalSettingsPlugin) LegacyESVersion(org.opensearch.LegacyESVersion) RecoveryState(org.opensearch.indices.recovery.RecoveryState) Matchers.hasSize(org.hamcrest.Matchers.hasSize) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) RetentionLease(org.opensearch.index.seqno.RetentionLease) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) OpenSearchAssertions.assertAcked(org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked) Matchers.empty(org.hamcrest.Matchers.empty) Index(org.opensearch.index.Index) Semaphore(java.util.concurrent.Semaphore) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) IndicesService(org.opensearch.indices.IndicesService) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) ReplicationTracker(org.opensearch.index.seqno.ReplicationTracker) IndexService(org.opensearch.index.IndexService) TransportService(org.opensearch.transport.TransportService) Collectors(java.util.stream.Collectors) Plugin(org.opensearch.plugins.Plugin) CountDownLatch(java.util.concurrent.CountDownLatch) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ClusterService(org.opensearch.cluster.service.ClusterService) IndexSettings(org.opensearch.index.IndexSettings) RecoveryCleanFilesRequest(org.opensearch.indices.recovery.RecoveryCleanFilesRequest) ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) OpenSearchIntegTestCase(org.opensearch.test.OpenSearchIntegTestCase) ClusterService(org.opensearch.cluster.service.ClusterService) IndexService(org.opensearch.index.IndexService) RetentionLease(org.opensearch.index.seqno.RetentionLease) IndexShard(org.opensearch.index.shard.IndexShard) IndicesService(org.opensearch.indices.IndicesService) Index(org.opensearch.index.Index)

Example 17 with RetentionLease

use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.

the class RecoverySourceHandler method phase1.

/**
 * Perform phase1 of the recovery operations. Once this {@link IndexCommit}
 * snapshot has been performed no commit operations (files being fsync'd)
 * are effectively allowed on this index until all recovery phases are done
 * <p>
 * Phase1 examines the segment files on the target node and copies over the
 * segments that are missing. Only segments that have the same size and
 * checksum can be reused
 */
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
    cancellableThreads.checkForCancel();
    final Store store = shard.store();
    try {
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.failShard("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFileNames()) {
            final StoreFileMetadata md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
                throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
            }
        }
        if (canSkipPhase1(recoverySourceMetadata, request.metadataSnapshot()) == false) {
            final List<String> phase1FileNames = new ArrayList<>();
            final List<Long> phase1FileSizes = new ArrayList<>();
            final List<String> phase1ExistingFileNames = new ArrayList<>();
            final List<Long> phase1ExistingFileSizes = new ArrayList<>();
            // Total size of segment files that are recovered
            long totalSizeInBytes = 0;
            // Total size of segment files that were able to be re-used
            long existingTotalSizeInBytes = 0;
            // Generate a "diff" of all the identical, different, and missing
            // segment files on the target node, using the existing files on
            // the source node
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetadata md : diff.identical) {
                phase1ExistingFileNames.add(md.name());
                phase1ExistingFileSizes.add(md.length());
                existingTotalSizeInBytes += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
                }
                totalSizeInBytes += md.length();
            }
            List<StoreFileMetadata> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
            phase1Files.addAll(diff.different);
            phase1Files.addAll(diff.missing);
            for (StoreFileMetadata md : phase1Files) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
                }
                phase1FileNames.add(md.name());
                phase1FileSizes.add(md.length());
                totalSizeInBytes += md.length();
            }
            logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", phase1FileNames.size(), new ByteSizeValue(totalSizeInBytes), phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
            final StepListener<Void> sendFileInfoStep = new StepListener<>();
            final StepListener<Void> sendFilesStep = new StepListener<>();
            final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
            final StepListener<Void> cleanFilesStep = new StepListener<>();
            cancellableThreads.checkForCancel();
            recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames, phase1ExistingFileSizes, translogOps.getAsInt(), sendFileInfoStep);
            sendFileInfoStep.whenComplete(r -> sendFiles(store, phase1Files.toArray(new StoreFileMetadata[0]), translogOps, sendFilesStep), listener::onFailure);
            sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
            createRetentionLeaseStep.whenComplete(retentionLease -> {
                final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
                assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint : retentionLease + " vs " + lastKnownGlobalCheckpoint;
                // Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
                // the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
                // to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
                // the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
                cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
            }, listener::onFailure);
            final long totalSize = totalSizeInBytes;
            final long existingTotalSize = existingTotalSizeInBytes;
            cleanFilesStep.whenComplete(r -> {
                final TimeValue took = stopWatch.totalTime();
                logger.trace("recovery [phase1]: took [{}]", took);
                listener.onResponse(new SendFileResult(phase1FileNames, phase1FileSizes, totalSize, phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took));
            }, listener::onFailure);
        } else {
            logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
            // but we must still create a retention lease
            final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
            createRetentionLease(startingSeqNo, createRetentionLeaseStep);
            createRetentionLeaseStep.whenComplete(retentionLease -> {
                final TimeValue took = stopWatch.totalTime();
                logger.trace("recovery [phase1]: took [{}]", took);
                listener.onResponse(new SendFileResult(Collections.emptyList(), Collections.emptyList(), 0L, Collections.emptyList(), Collections.emptyList(), 0L, took));
            }, listener::onFailure);
        }
    } catch (Exception e) {
        throw new RecoverFilesRecoveryException(request.shardId(), 0, new ByteSizeValue(0L), e);
    }
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) ByteSizeValue(org.opensearch.common.unit.ByteSizeValue) Store(org.opensearch.index.store.Store) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) TimeValue(org.opensearch.common.unit.TimeValue) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) RecoveryEngineException(org.opensearch.index.engine.RecoveryEngineException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) IndexShardClosedException(org.opensearch.index.shard.IndexShardClosedException) IOException(java.io.IOException) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) IndexShardRelocatedException(org.opensearch.index.shard.IndexShardRelocatedException) RetentionLeaseNotFoundException(org.opensearch.index.seqno.RetentionLeaseNotFoundException) StopWatch(org.opensearch.common.StopWatch) RetentionLease(org.opensearch.index.seqno.RetentionLease) AtomicLong(java.util.concurrent.atomic.AtomicLong) StepListener(org.opensearch.action.StepListener) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException)

Example 18 with RetentionLease

use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.

the class RetentionLeasesReplicationTests method testSimpleSyncRetentionLeases.

public void testSimpleSyncRetentionLeases() throws Exception {
    Settings settings = Settings.builder().put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
    try (ReplicationGroup group = createGroup(between(0, 2), settings)) {
        group.startAll();
        List<RetentionLease> leases = new ArrayList<>();
        int iterations = between(1, 100);
        CountDownLatch latch = new CountDownLatch(iterations);
        for (int i = 0; i < iterations; i++) {
            if (leases.isEmpty() == false && rarely()) {
                RetentionLease leaseToRemove = randomFrom(leases);
                leases.remove(leaseToRemove);
                group.removeRetentionLease(leaseToRemove.id(), ActionListener.wrap(latch::countDown));
            } else {
                RetentionLease newLease = group.addRetentionLease(Integer.toString(i), randomNonNegativeLong(), "test-" + i, ActionListener.wrap(latch::countDown));
                leases.add(newLease);
            }
        }
        RetentionLeases leasesOnPrimary = group.getPrimary().getRetentionLeases();
        assertThat(leasesOnPrimary.version(), equalTo(iterations + group.getReplicas().size() + 1L));
        assertThat(leasesOnPrimary.primaryTerm(), equalTo(group.getPrimary().getOperationPrimaryTerm()));
        assertThat(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(leasesOnPrimary).values(), containsInAnyOrder(leases.toArray(new RetentionLease[0])));
        latch.await();
        for (IndexShard replica : group.getReplicas()) {
            assertThat(replica.getRetentionLeases(), equalTo(leasesOnPrimary));
        }
    }
}
Also used : RetentionLease(org.opensearch.index.seqno.RetentionLease) IndexShard(org.opensearch.index.shard.IndexShard) ArrayList(java.util.ArrayList) CountDownLatch(java.util.concurrent.CountDownLatch) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) RetentionLeases(org.opensearch.index.seqno.RetentionLeases)

Example 19 with RetentionLease

use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.

the class IndexShardRetentionLeaseTests method assertRetentionLeases.

private void assertRetentionLeases(final RetentionLeases retentionLeases, final int size, final long[] minimumRetainingSequenceNumbers, final long primaryTerm, final long version) {
    assertThat(retentionLeases.primaryTerm(), equalTo(primaryTerm));
    assertThat(retentionLeases.version(), equalTo(version));
    final Map<String, RetentionLease> idToRetentionLease = new HashMap<>();
    for (final RetentionLease retentionLease : retentionLeases.leases()) {
        if (ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE.equals(retentionLease.source()) == false) {
            idToRetentionLease.put(retentionLease.id(), retentionLease);
        }
    }
    assertThat(idToRetentionLease.entrySet(), hasSize(size));
    for (int i = 0; i < size; i++) {
        assertThat(idToRetentionLease.keySet(), hasItem(Integer.toString(i)));
        final RetentionLease retentionLease = idToRetentionLease.get(Integer.toString(i));
        assertThat(retentionLease.retainingSequenceNumber(), equalTo(minimumRetainingSequenceNumbers[i]));
        assertThat(retentionLease.source(), equalTo("test-" + i));
    }
}
Also used : RetentionLease(org.opensearch.index.seqno.RetentionLease) HashMap(java.util.HashMap)

Example 20 with RetentionLease

use of org.opensearch.index.seqno.RetentionLease in project OpenSearch by opensearch-project.

the class ReplicaShardAllocatorTests method testPreferCopyWithHighestMatchingOperations.

public void testPreferCopyWithHighestMatchingOperations() {
    RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
    long retainingSeqNoOnPrimary = randomLongBetween(1, Integer.MAX_VALUE);
    long retainingSeqNoForNode2 = randomLongBetween(0, retainingSeqNoOnPrimary - 1);
    // Rarely use a seqNo above retainingSeqNoOnPrimary, which could in theory happen when primary fails and comes back quickly.
    long retainingSeqNoForNode3 = randomLongBetween(retainingSeqNoForNode2 + 1, retainingSeqNoOnPrimary + 100);
    List<RetentionLease> retentionLeases = Arrays.asList(newRetentionLease(node1, retainingSeqNoOnPrimary), newRetentionLease(node2, retainingSeqNoForNode2), newRetentionLease(node3, retainingSeqNoForNode3));
    testAllocator.addData(node1, retentionLeases, "MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
    testAllocator.addData(node2, "NOT_MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
    testAllocator.addData(node3, randomSyncId(), new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
    allocateAllUnassigned(allocation);
    assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
    assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo(node3.getId()));
}
Also used : RetentionLease(org.opensearch.index.seqno.RetentionLease) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) RoutingAllocation(org.opensearch.cluster.routing.allocation.RoutingAllocation)

Aggregations

RetentionLease (org.opensearch.index.seqno.RetentionLease)23 ArrayList (java.util.ArrayList)16 RetentionLeases (org.opensearch.index.seqno.RetentionLeases)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)9 StoreFileMetadata (org.opensearch.index.store.StoreFileMetadata)9 IOException (java.io.IOException)7 List (java.util.List)7 CountDownLatch (java.util.concurrent.CountDownLatch)7 Settings (org.opensearch.common.settings.Settings)7 IndexSettings (org.opensearch.index.IndexSettings)7 Arrays (java.util.Arrays)6 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)6 IndexShard (org.opensearch.index.shard.IndexShard)6 LongPoint (org.apache.lucene.document.LongPoint)5 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 ActionListener (org.opensearch.action.ActionListener)5 TimeValue (org.opensearch.common.unit.TimeValue)5 ReplicationTracker (org.opensearch.index.seqno.ReplicationTracker)5 Store (org.opensearch.index.store.Store)5 Collections (java.util.Collections)4