use of org.opensearch.index.engine.DocIdSeqNoAndSource in project OpenSearch by opensearch-project.
the class RecoveryDuringReplicationTests method testRollbackOnPromotion.
public void testRollbackOnPromotion() throws Exception {
try (ReplicationGroup shards = createGroup(between(2, 3))) {
shards.startAll();
IndexShard newPrimary = randomFrom(shards.getReplicas());
int initDocs = shards.indexDocs(randomInt(100));
int inFlightOpsOnNewPrimary = 0;
int inFlightOps = scaledRandomIntBetween(10, 200);
for (int i = 0; i < inFlightOps; i++) {
String id = "extra-" + i;
IndexRequest primaryRequest = new IndexRequest(index.getName()).id(id).source("{}", XContentType.JSON);
BulkShardRequest replicationRequest = indexOnPrimary(primaryRequest, shards.getPrimary());
for (IndexShard replica : shards.getReplicas()) {
if (randomBoolean()) {
indexOnReplica(replicationRequest, shards, replica);
if (replica == newPrimary) {
inFlightOpsOnNewPrimary++;
}
}
}
if (randomBoolean()) {
shards.syncGlobalCheckpoint();
}
if (rarely()) {
shards.flush();
}
}
shards.refresh("test");
List<DocIdSeqNoAndSource> docsBelowGlobalCheckpoint = EngineTestCase.getDocIds(getEngine(newPrimary), randomBoolean()).stream().filter(doc -> doc.getSeqNo() <= newPrimary.getLastKnownGlobalCheckpoint()).collect(Collectors.toList());
CountDownLatch latch = new CountDownLatch(1);
final AtomicBoolean done = new AtomicBoolean();
Thread thread = new Thread(() -> {
List<IndexShard> replicas = new ArrayList<>(shards.getReplicas());
replicas.remove(newPrimary);
latch.countDown();
while (done.get() == false) {
try {
List<DocIdSeqNoAndSource> exposedDocs = EngineTestCase.getDocIds(getEngine(randomFrom(replicas)), randomBoolean());
assertThat(docsBelowGlobalCheckpoint, everyItem(is(in(exposedDocs))));
assertThat(randomFrom(replicas).getLocalCheckpoint(), greaterThanOrEqualTo(initDocs - 1L));
} catch (AlreadyClosedException ignored) {
// replica swaps engine during rollback
} catch (Exception e) {
throw new AssertionError(e);
}
}
});
thread.start();
latch.await();
shards.promoteReplicaToPrimary(newPrimary).get();
shards.assertAllEqual(initDocs + inFlightOpsOnNewPrimary);
int moreDocsAfterRollback = shards.indexDocs(scaledRandomIntBetween(1, 20));
shards.assertAllEqual(initDocs + inFlightOpsOnNewPrimary + moreDocsAfterRollback);
done.set(true);
thread.join();
shards.syncGlobalCheckpoint();
for (IndexShard shard : shards) {
shard.flush(new FlushRequest().force(true).waitIfOngoing(true));
assertThat(shard.translogStats().getUncommittedOperations(), equalTo(0));
}
}
}
use of org.opensearch.index.engine.DocIdSeqNoAndSource in project OpenSearch by opensearch-project.
the class IndexShardTests method testResetEngine.
public void testResetEngine() throws Exception {
IndexShard shard = newStartedShard(false);
indexOnReplicaWithGaps(shard, between(0, 1000), Math.toIntExact(shard.getLocalCheckpoint()));
long maxSeqNoBeforeRollback = shard.seqNoStats().getMaxSeqNo();
final long globalCheckpoint = randomLongBetween(shard.getLastKnownGlobalCheckpoint(), shard.getLocalCheckpoint());
shard.updateGlobalCheckpointOnReplica(globalCheckpoint, "test");
Set<String> docBelowGlobalCheckpoint = getShardDocUIDs(shard).stream().filter(id -> Long.parseLong(id) <= globalCheckpoint).collect(Collectors.toSet());
TranslogStats translogStats = shard.translogStats();
AtomicBoolean done = new AtomicBoolean();
CountDownLatch latch = new CountDownLatch(1);
Thread thread = new Thread(() -> {
latch.countDown();
int hitClosedExceptions = 0;
while (done.get() == false) {
try {
List<String> exposedDocIds = EngineTestCase.getDocIds(getEngine(shard), rarely()).stream().map(DocIdSeqNoAndSource::getId).collect(Collectors.toList());
assertThat("every operations before the global checkpoint must be reserved", docBelowGlobalCheckpoint, everyItem(is(in(exposedDocIds))));
} catch (AlreadyClosedException ignored) {
hitClosedExceptions++;
} catch (IOException e) {
throw new AssertionError(e);
}
}
// engine reference was switched twice: current read/write engine -> ready-only engine -> new read/write engine
assertThat(hitClosedExceptions, lessThanOrEqualTo(2));
});
thread.start();
latch.await();
final CountDownLatch engineResetLatch = new CountDownLatch(1);
shard.acquireAllReplicaOperationsPermits(shard.getOperationPrimaryTerm(), globalCheckpoint, 0L, ActionListener.wrap(r -> {
try {
shard.resetEngineToGlobalCheckpoint();
} finally {
r.close();
engineResetLatch.countDown();
}
}, Assert::assertNotNull), TimeValue.timeValueMinutes(1L));
engineResetLatch.await();
assertThat(getShardDocUIDs(shard), equalTo(docBelowGlobalCheckpoint));
assertThat(shard.seqNoStats().getMaxSeqNo(), equalTo(globalCheckpoint));
if (shard.indexSettings.isSoftDeleteEnabled()) {
// we might have trimmed some operations if the translog retention policy is ignored (when soft-deletes enabled).
assertThat(shard.translogStats().estimatedNumberOfOperations(), lessThanOrEqualTo(translogStats.estimatedNumberOfOperations()));
} else {
assertThat(shard.translogStats().estimatedNumberOfOperations(), equalTo(translogStats.estimatedNumberOfOperations()));
}
assertThat(shard.getMaxSeqNoOfUpdatesOrDeletes(), equalTo(maxSeqNoBeforeRollback));
done.set(true);
thread.join();
closeShard(shard, false);
}
use of org.opensearch.index.engine.DocIdSeqNoAndSource in project OpenSearch by opensearch-project.
the class RecoveryTests method testRecoveryTrimsLocalTranslog.
public void testRecoveryTrimsLocalTranslog() throws Exception {
try (ReplicationGroup shards = createGroup(between(1, 2))) {
shards.startAll();
IndexShard oldPrimary = shards.getPrimary();
shards.indexDocs(scaledRandomIntBetween(1, 100));
if (randomBoolean()) {
shards.flush();
}
int inflightDocs = scaledRandomIntBetween(1, 100);
for (int i = 0; i < inflightDocs; i++) {
final IndexRequest indexRequest = new IndexRequest(index.getName()).id("extra_" + i).source("{}", XContentType.JSON);
final BulkShardRequest bulkShardRequest = indexOnPrimary(indexRequest, oldPrimary);
for (IndexShard replica : randomSubsetOf(shards.getReplicas())) {
indexOnReplica(bulkShardRequest, shards, replica);
}
if (rarely()) {
shards.flush();
}
}
shards.syncGlobalCheckpoint();
shards.promoteReplicaToPrimary(randomFrom(shards.getReplicas())).get();
oldPrimary.close("demoted", false);
oldPrimary.store().close();
oldPrimary = shards.addReplicaWithExistingPath(oldPrimary.shardPath(), oldPrimary.routingEntry().currentNodeId());
shards.recoverReplica(oldPrimary);
for (IndexShard shard : shards) {
assertConsistentHistoryBetweenTranslogAndLucene(shard);
}
final List<DocIdSeqNoAndSource> docsAfterRecovery = getDocIdAndSeqNos(shards.getPrimary());
for (IndexShard shard : shards.getReplicas()) {
assertThat(shard.routingEntry().toString(), getDocIdAndSeqNos(shard), equalTo(docsAfterRecovery));
}
shards.promoteReplicaToPrimary(oldPrimary).get();
for (IndexShard shard : shards) {
assertThat(shard.routingEntry().toString(), getDocIdAndSeqNos(shard), equalTo(docsAfterRecovery));
assertConsistentHistoryBetweenTranslogAndLucene(shard);
}
}
}
use of org.opensearch.index.engine.DocIdSeqNoAndSource in project OpenSearch by opensearch-project.
the class InternalTestCluster method assertSameDocIdsOnShards.
/**
* Asserts that all shards with the same shardId should have document Ids.
*/
public void assertSameDocIdsOnShards() throws Exception {
assertBusy(() -> {
ClusterState state = client().admin().cluster().prepareState().get().getState();
for (ObjectObjectCursor<String, IndexRoutingTable> indexRoutingTable : state.routingTable().indicesRouting()) {
for (IntObjectCursor<IndexShardRoutingTable> indexShardRoutingTable : indexRoutingTable.value.shards()) {
ShardRouting primaryShardRouting = indexShardRoutingTable.value.primaryShard();
IndexShard primaryShard = getShardOrNull(state, primaryShardRouting);
if (primaryShard == null) {
continue;
}
final List<DocIdSeqNoAndSource> docsOnPrimary;
try {
docsOnPrimary = IndexShardTestCase.getDocIdAndSeqNos(primaryShard);
} catch (AlreadyClosedException ex) {
continue;
}
for (ShardRouting replicaShardRouting : indexShardRoutingTable.value.replicaShards()) {
IndexShard replicaShard = getShardOrNull(state, replicaShardRouting);
if (replicaShard == null) {
continue;
}
final List<DocIdSeqNoAndSource> docsOnReplica;
try {
docsOnReplica = IndexShardTestCase.getDocIdAndSeqNos(replicaShard);
} catch (AlreadyClosedException ex) {
continue;
}
assertThat("out of sync shards: primary=[" + primaryShardRouting + "] num_docs_on_primary=[" + docsOnPrimary.size() + "] vs replica=[" + replicaShardRouting + "] num_docs_on_replica=[" + docsOnReplica.size() + "]", docsOnReplica, equalTo(docsOnPrimary));
}
}
}
});
}
Aggregations