use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class ReplicationOperationTests method testReplication.
public void testReplication() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState state = stateWithActivePrimary(index, true, randomInt(5));
IndexMetaData indexMetaData = state.getMetaData().index(index);
final long primaryTerm = indexMetaData.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
state = ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add a few in-sync allocation ids that don't have corresponding routing entries
Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
state = ClusterState.builder(state).metaData(MetaData.builder(state.metaData()).put(IndexMetaData.builder(indexMetaData).putInSyncAllocationIds(0, Sets.union(indexMetaData.inSyncAllocationIds(0), staleAllocationIds)))).build();
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, state);
final Map<ShardRouting, Exception> expectedFailures = new HashMap<>();
final Set<ShardRouting> expectedFailedShards = new HashSet<>();
for (ShardRouting replica : expectedReplicas) {
if (randomBoolean()) {
Exception t;
boolean criticalFailure = randomBoolean();
if (criticalFailure) {
t = new CorruptIndexException("simulated", (String) null);
} else {
t = new IndexShardNotStartedException(shardId, IndexShardState.RECOVERING);
}
logger.debug("--> simulating failure on {} with [{}]", replica, t.getClass().getSimpleName());
expectedFailures.put(replica, t);
if (criticalFailure) {
expectedFailedShards.add(replica);
}
}
}
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final ClusterState finalState = state;
final TestReplicaProxy replicasProxy = new TestReplicaProxy(expectedFailures);
final TestPrimary primary = new TestPrimary(primaryShard, primaryTerm);
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, () -> finalState);
op.execute();
assertThat(request.primaryTerm(), equalTo(primaryTerm));
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
assertThat(replicasProxy.failedReplicas, equalTo(expectedFailedShards));
assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
assertTrue("listener is not marked as done", listener.isDone());
ShardInfo shardInfo = listener.actionGet().getShardInfo();
assertThat(shardInfo.getFailed(), equalTo(expectedFailedShards.size()));
assertThat(shardInfo.getFailures(), arrayWithSize(expectedFailedShards.size()));
assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size() - expectedFailures.size()));
final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size();
assertThat(shardInfo.getTotal(), equalTo(totalShards));
assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
}
use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class ReplicationOperationTests method testDemotedPrimary.
public void testDemotedPrimary() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState state = stateWithActivePrimary(index, true, 1 + randomInt(2), randomInt(2));
IndexMetaData indexMetaData = state.getMetaData().index(index);
final long primaryTerm = indexMetaData.primaryTerm(0);
ShardRouting primaryShard = state.getRoutingTable().shardRoutingTable(shardId).primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
state = ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add in-sync allocation id that doesn't have a corresponding routing entry
state = ClusterState.builder(state).metaData(MetaData.builder(state.metaData()).put(IndexMetaData.builder(indexMetaData).putInSyncAllocationIds(0, Sets.union(indexMetaData.inSyncAllocationIds(0), Sets.newHashSet(randomAsciiOfLength(10)))))).build();
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, state);
final Map<ShardRouting, Exception> expectedFailures = new HashMap<>();
final ShardRouting failedReplica = randomFrom(new ArrayList<>(expectedReplicas));
expectedFailures.put(failedReplica, new CorruptIndexException("simulated", (String) null));
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final ClusterState finalState = state;
final boolean testPrimaryDemotedOnStaleShardCopies = randomBoolean();
final TestReplicaProxy replicasProxy = new TestReplicaProxy(expectedFailures) {
@Override
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception, Runnable onSuccess, Consumer<Exception> onPrimaryDemoted, Consumer<Exception> onIgnoredFailure) {
if (testPrimaryDemotedOnStaleShardCopies) {
super.failShardIfNeeded(replica, primaryTerm, message, exception, onSuccess, onPrimaryDemoted, onIgnoredFailure);
} else {
assertThat(replica, equalTo(failedReplica));
onPrimaryDemoted.accept(new ElasticsearchException("the king is dead"));
}
}
@Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, Runnable onSuccess, Consumer<Exception> onPrimaryDemoted, Consumer<Exception> onIgnoredFailure) {
if (testPrimaryDemotedOnStaleShardCopies) {
onPrimaryDemoted.accept(new ElasticsearchException("the king is dead"));
} else {
super.markShardCopyAsStaleIfNeeded(shardId, allocationId, primaryTerm, onSuccess, onPrimaryDemoted, onIgnoredFailure);
}
}
};
AtomicBoolean primaryFailed = new AtomicBoolean();
final TestPrimary primary = new TestPrimary(primaryShard, primaryTerm) {
@Override
public void failShard(String message, Exception exception) {
assertTrue(primaryFailed.compareAndSet(false, true));
}
};
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, () -> finalState);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertTrue("listener is not marked as done", listener.isDone());
assertTrue(primaryFailed.get());
assertListenerThrows("should throw exception to trigger retry", listener, ReplicationOperation.RetryOnPrimaryException.class);
}
use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class RecoverySourceHandlerTests method testHandleExceptinoOnSendSendFiles.
public void testHandleExceptinoOnSendSendFiles() throws Throwable {
Settings settings = Settings.builder().put("indices.recovery.concurrent_streams", 1).put("indices.recovery.concurrent_small_file_streams", 1).build();
final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
final StartRecoveryRequest request = new StartRecoveryRequest(shardId, new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), null, randomBoolean(), randomNonNegativeLong(), randomBoolean() ? SequenceNumbersService.UNASSIGNED_SEQ_NO : 0L);
Path tempDir = createTempDir();
Store store = newStore(tempDir, false);
AtomicBoolean failedEngine = new AtomicBoolean(false);
RecoverySourceHandler handler = new RecoverySourceHandler(null, null, request, () -> 0L, e -> () -> {
}, recoverySettings.getChunkSize().bytesAsInt(), Settings.EMPTY) {
@Override
protected void failEngine(IOException cause) {
assertFalse(failedEngine.get());
failedEngine.set(true);
}
};
Directory dir = store.directory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
document.add(newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
writer.addDocument(document);
}
writer.commit();
writer.close();
Store.MetadataSnapshot metadata = store.getMetadata(null);
List<StoreFileMetaData> metas = new ArrayList<>();
for (StoreFileMetaData md : metadata) {
metas.add(md);
}
final boolean throwCorruptedIndexException = randomBoolean();
Store targetStore = newStore(createTempDir(), false);
try {
handler.sendFiles(store, metas.toArray(new StoreFileMetaData[0]), (md) -> {
if (throwCorruptedIndexException) {
throw new RuntimeException(new CorruptIndexException("foo", "bar"));
} else {
throw new RuntimeException("boom");
}
});
fail("exception index");
} catch (RuntimeException ex) {
assertNull(ExceptionsHelper.unwrapCorruption(ex));
if (throwCorruptedIndexException) {
assertEquals(ex.getMessage(), "[File corruption occurred on recovery but checksums are ok]");
} else {
assertEquals(ex.getMessage(), "boom");
}
} catch (CorruptIndexException ex) {
fail("not expected here");
}
assertFalse(failedEngine.get());
IOUtils.close(store, targetStore);
}
use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class ShardFailedClusterStateTaskExecutorTests method createNonExistentShards.
private List<ShardStateAction.ShardEntry> createNonExistentShards(ClusterState currentState, String reason) {
// add shards from a non-existent index
String nonExistentIndexUUID = "non-existent";
Index index = new Index("non-existent", nonExistentIndexUUID);
List<String> nodeIds = new ArrayList<>();
for (ObjectCursor<String> nodeId : currentState.nodes().getNodes().keys()) {
nodeIds.add(nodeId.toString());
}
List<ShardRouting> nonExistentShards = new ArrayList<>();
nonExistentShards.add(nonExistentShardRouting(index, nodeIds, true));
for (int i = 0; i < numberOfReplicas; i++) {
nonExistentShards.add(nonExistentShardRouting(index, nodeIds, false));
}
List<ShardStateAction.ShardEntry> existingShards = createExistingShards(currentState, reason);
List<ShardStateAction.ShardEntry> shardsWithMismatchedAllocationIds = new ArrayList<>();
for (ShardStateAction.ShardEntry existingShard : existingShards) {
shardsWithMismatchedAllocationIds.add(new ShardStateAction.ShardEntry(existingShard.shardId, UUIDs.randomBase64UUID(), 0L, existingShard.message, existingShard.failure));
}
List<ShardStateAction.ShardEntry> tasks = new ArrayList<>();
nonExistentShards.forEach(shard -> tasks.add(new ShardStateAction.ShardEntry(shard.shardId(), shard.allocationId().getId(), 0L, reason, new CorruptIndexException("simulated", nonExistentIndexUUID))));
tasks.addAll(shardsWithMismatchedAllocationIds);
return tasks;
}
use of org.apache.lucene.index.CorruptIndexException in project elasticsearch by elastic.
the class MetaDataStateFormat method read.
/**
* Reads the state from a given file and compares the expected version against the actual version of
* the state.
*/
public final T read(NamedXContentRegistry namedXContentRegistry, Path file) throws IOException {
try (Directory dir = newDirectory(file.getParent())) {
try (IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
// We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
CodecUtil.checksumEntireFile(indexInput);
final int fileVersion = CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, MIN_COMPATIBLE_STATE_FILE_VERSION, STATE_FILE_VERSION);
final XContentType xContentType = XContentType.values()[indexInput.readInt()];
if (fileVersion == STATE_FILE_VERSION_ES_2X_AND_BELOW) {
// format version 0, wrote a version that always came from the content state file and was never used
// version currently unused
indexInput.readLong();
}
long filePointer = indexInput.getFilePointer();
long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
try (XContentParser parser = XContentFactory.xContent(xContentType).createParser(namedXContentRegistry, new InputStreamIndexInput(slice, contentSize))) {
return fromXContent(parser);
}
}
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
// we trick this into a dedicated exception with the original stacktrace
throw new CorruptStateException(ex);
}
}
}
Aggregations