use of org.opensearch.indices.replication.common.ReplicationLuceneIndex in project OpenSearch by opensearch-project.
the class StoreRecovery method recoveryListener.
private ActionListener<Boolean> recoveryListener(IndexShard indexShard, ActionListener<Boolean> listener) {
return ActionListener.wrap(res -> {
if (res) {
// Check that the gateway didn't leave the shard in init or recovering stage. it is up to the gateway
// to call post recovery.
final IndexShardState shardState = indexShard.state();
final RecoveryState recoveryState = indexShard.recoveryState();
assert shardState != IndexShardState.CREATED && shardState != IndexShardState.RECOVERING : "recovery process of " + shardId + " didn't get to post_recovery. shardState [" + shardState + "]";
if (logger.isTraceEnabled()) {
ReplicationLuceneIndex index = recoveryState.getIndex();
StringBuilder sb = new StringBuilder();
sb.append(" index : files [").append(index.totalFileCount()).append("] with total_size [").append(new ByteSizeValue(index.totalBytes())).append("], took[").append(TimeValue.timeValueMillis(index.time())).append("]\n");
sb.append(" : recovered_files [").append(index.recoveredFileCount()).append("] with total_size [").append(new ByteSizeValue(index.recoveredBytes())).append("]\n");
sb.append(" : reusing_files [").append(index.reusedFileCount()).append("] with total_size [").append(new ByteSizeValue(index.reusedBytes())).append("]\n");
sb.append(" verify_index : took [").append(TimeValue.timeValueMillis(recoveryState.getVerifyIndex().time())).append("], check_index [").append(timeValueMillis(recoveryState.getVerifyIndex().checkIndexTime())).append("]\n");
sb.append(" translog : number_of_operations [").append(recoveryState.getTranslog().recoveredOperations()).append("], took [").append(TimeValue.timeValueMillis(recoveryState.getTranslog().time())).append("]");
logger.trace("recovery completed from [shard_store], took [{}]\n{}", timeValueMillis(recoveryState.getTimer().time()), sb);
} else if (logger.isDebugEnabled()) {
logger.debug("recovery completed from [shard_store], took [{}]", timeValueMillis(recoveryState.getTimer().time()));
}
}
listener.onResponse(res);
}, ex -> {
if (ex instanceof IndexShardRecoveryException) {
if (indexShard.state() == IndexShardState.CLOSED) {
// got closed on us, just ignore this recovery
listener.onResponse(false);
return;
}
if ((ex.getCause() instanceof IndexShardClosedException) || (ex.getCause() instanceof IndexShardNotStartedException)) {
// got closed on us, just ignore this recovery
listener.onResponse(false);
return;
}
listener.onFailure(ex);
} else if (ex instanceof IndexShardClosedException || ex instanceof IndexShardNotStartedException) {
listener.onResponse(false);
} else {
if (indexShard.state() == IndexShardState.CLOSED) {
// got closed on us, just ignore this recovery
listener.onResponse(false);
} else {
listener.onFailure(new IndexShardRecoveryException(shardId, "failed recovery", ex));
}
}
});
}
use of org.opensearch.indices.replication.common.ReplicationLuceneIndex in project OpenSearch by opensearch-project.
the class RecoveryTarget method receiveFileInfo.
@Override
public void receiveFileInfo(List<String> phase1FileNames, List<Long> phase1FileSizes, List<String> phase1ExistingFileNames, List<Long> phase1ExistingFileSizes, int totalTranslogOps, ActionListener<Void> listener) {
ActionListener.completeWith(listener, () -> {
indexShard.resetRecoveryStage();
indexShard.prepareForIndexRecovery();
final ReplicationLuceneIndex index = state().getIndex();
for (int i = 0; i < phase1ExistingFileNames.size(); i++) {
index.addFileDetail(phase1ExistingFileNames.get(i), phase1ExistingFileSizes.get(i), true);
}
for (int i = 0; i < phase1FileNames.size(); i++) {
index.addFileDetail(phase1FileNames.get(i), phase1FileSizes.get(i), false);
}
index.setFileDetailsComplete();
state().getTranslog().totalOperations(totalTranslogOps);
state().getTranslog().totalOperationsOnStart(totalTranslogOps);
return null;
});
}
use of org.opensearch.indices.replication.common.ReplicationLuceneIndex in project OpenSearch by opensearch-project.
the class RecoveryTargetTests method testConcurrentModificationIndexFileDetailsMap.
public void testConcurrentModificationIndexFileDetailsMap() throws InterruptedException {
final ReplicationLuceneIndex index = new ReplicationLuceneIndex();
final AtomicBoolean stop = new AtomicBoolean(false);
Streamer<ReplicationLuceneIndex> readWriteIndex = new Streamer<ReplicationLuceneIndex>(stop, index) {
@Override
ReplicationLuceneIndex createObj(StreamInput in) throws IOException {
return new ReplicationLuceneIndex(in);
}
};
Thread modifyThread = new Thread() {
@Override
public void run() {
for (int i = 0; i < 1000; i++) {
index.addFileDetail(randomAlphaOfLength(10), 100, true);
}
stop.set(true);
}
};
readWriteIndex.start();
modifyThread.start();
modifyThread.join();
readWriteIndex.join();
assertThat(readWriteIndex.error.get(), equalTo(null));
}
use of org.opensearch.indices.replication.common.ReplicationLuceneIndex in project OpenSearch by opensearch-project.
the class RecoverySourceHandlerTests method testSendFiles.
public void testSendFiles() throws Throwable {
final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service);
final StartRecoveryRequest request = getStartRecoveryRequest();
Store store = newStore(createTempDir());
Directory dir = store.directory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
writer.addDocument(document);
}
writer.commit();
writer.close();
Store.MetadataSnapshot metadata = store.getMetadata();
ReplicationLuceneIndex luceneIndex = new ReplicationLuceneIndex();
List<StoreFileMetadata> metas = new ArrayList<>();
for (StoreFileMetadata md : metadata) {
metas.add(md);
luceneIndex.addFileDetail(md.name(), md.length(), false);
}
Store targetStore = newStore(createTempDir());
MultiFileWriter multiFileWriter = new MultiFileWriter(targetStore, luceneIndex, "", logger, () -> {
});
RecoveryTargetHandler target = new TestRecoveryTargetHandler() {
@Override
public void writeFileChunk(StoreFileMetadata md, long position, BytesReference content, boolean lastChunk, int totalTranslogOps, ActionListener<Void> listener) {
ActionListener.completeWith(listener, () -> {
multiFileWriter.writeFileChunk(md, position, content, lastChunk);
return null;
});
}
};
RecoverySourceHandler handler = new RecoverySourceHandler(null, new AsyncRecoveryTarget(target, recoveryExecutor), threadPool, request, Math.toIntExact(recoverySettings.getChunkSize().getBytes()), between(1, 5), between(1, 5));
PlainActionFuture<Void> sendFilesFuture = new PlainActionFuture<>();
handler.sendFiles(store, metas.toArray(new StoreFileMetadata[0]), () -> 0, sendFilesFuture);
sendFilesFuture.actionGet();
Store.MetadataSnapshot targetStoreMetadata = targetStore.getMetadata();
Store.RecoveryDiff recoveryDiff = targetStoreMetadata.recoveryDiff(metadata);
assertEquals(metas.size(), recoveryDiff.identical.size());
assertEquals(0, recoveryDiff.different.size());
assertEquals(0, recoveryDiff.missing.size());
IndexReader reader = DirectoryReader.open(targetStore.directory());
assertEquals(numDocs, reader.maxDoc());
IOUtils.close(reader, store, multiFileWriter, targetStore);
}
use of org.opensearch.indices.replication.common.ReplicationLuceneIndex in project OpenSearch by opensearch-project.
the class RecoverySourceHandlerTests method testHandleCorruptedIndexOnSendSendFiles.
public void testHandleCorruptedIndexOnSendSendFiles() throws Throwable {
Settings settings = Settings.builder().put("indices.recovery.concurrent_streams", 1).put("indices.recovery.concurrent_small_file_streams", 1).build();
final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
final StartRecoveryRequest request = getStartRecoveryRequest();
Path tempDir = createTempDir();
Store store = newStore(tempDir, false);
AtomicBoolean failedEngine = new AtomicBoolean(false);
Directory dir = store.directory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
writer.addDocument(document);
}
writer.commit();
writer.close();
ReplicationLuceneIndex luceneIndex = new ReplicationLuceneIndex();
Store.MetadataSnapshot metadata = store.getMetadata();
List<StoreFileMetadata> metas = new ArrayList<>();
for (StoreFileMetadata md : metadata) {
metas.add(md);
luceneIndex.addFileDetail(md.name(), md.length(), false);
}
CorruptionUtils.corruptFile(random(), FileSystemUtils.files(tempDir, (p) -> (p.getFileName().toString().equals("write.lock") || p.getFileName().toString().startsWith("extra")) == false));
Store targetStore = newStore(createTempDir(), false);
MultiFileWriter multiFileWriter = new MultiFileWriter(targetStore, luceneIndex, "", logger, () -> {
});
RecoveryTargetHandler target = new TestRecoveryTargetHandler() {
@Override
public void writeFileChunk(StoreFileMetadata md, long position, BytesReference content, boolean lastChunk, int totalTranslogOps, ActionListener<Void> listener) {
ActionListener.completeWith(listener, () -> {
multiFileWriter.writeFileChunk(md, position, content, lastChunk);
return null;
});
}
};
RecoverySourceHandler handler = new RecoverySourceHandler(null, new AsyncRecoveryTarget(target, recoveryExecutor), threadPool, request, Math.toIntExact(recoverySettings.getChunkSize().getBytes()), between(1, 8), between(1, 8)) {
@Override
protected void failEngine(IOException cause) {
assertFalse(failedEngine.get());
failedEngine.set(true);
}
};
SetOnce<Exception> sendFilesError = new SetOnce<>();
CountDownLatch latch = new CountDownLatch(1);
handler.sendFiles(store, metas.toArray(new StoreFileMetadata[0]), () -> 0, new LatchedActionListener<>(ActionListener.wrap(r -> sendFilesError.set(null), e -> sendFilesError.set(e)), latch));
latch.await();
assertThat(sendFilesError.get(), instanceOf(IOException.class));
assertNotNull(ExceptionsHelper.unwrapCorruption(sendFilesError.get()));
assertTrue(failedEngine.get());
// ensure all chunk requests have been completed; otherwise some files on the target are left open.
IOUtils.close(() -> terminate(threadPool), () -> threadPool = null);
IOUtils.close(store, multiFileWriter, targetStore);
}
Aggregations