use of org.apache.lucene.index.CorruptIndexException in project crate by crate.
the class ExceptionsHelperTests method testSuppressedCycle.
@Test
public void testSuppressedCycle() {
RuntimeException e1 = new RuntimeException();
RuntimeException e2 = new RuntimeException();
e1.addSuppressed(e2);
e2.addSuppressed(e1);
ExceptionsHelper.unwrapCorruption(e1);
final CorruptIndexException corruptIndexException = new CorruptIndexException("corrupt", "resource");
RuntimeException e3 = new RuntimeException(corruptIndexException);
e3.addSuppressed(e1);
assertThat(ExceptionsHelper.unwrapCorruption(e3), equalTo(corruptIndexException));
RuntimeException e4 = new RuntimeException(e1);
e4.addSuppressed(corruptIndexException);
assertThat(ExceptionsHelper.unwrapCorruption(e4), equalTo(corruptIndexException));
}
use of org.apache.lucene.index.CorruptIndexException in project crate by crate.
the class TranslogHeader method read.
/**
* Read a translog header from the given path and file channel
*/
static TranslogHeader read(final String translogUUID, final Path path, final FileChannel channel) throws IOException {
try {
// This input is intentionally not closed because closing it will close the FileChannel.
final BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(new InputStreamStreamInput(java.nio.channels.Channels.newInputStream(channel), channel.size()), path.toString());
final int version;
try {
version = CodecUtil.checkHeader(new InputStreamDataInput(in), TRANSLOG_CODEC, VERSION_CHECKSUMS, VERSION_PRIMARY_TERM);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
tryReportOldVersionError(path, channel);
throw new TranslogCorruptedException(path.toString(), "translog header corrupted", e);
}
if (version == VERSION_CHECKSUMS) {
throw new IllegalStateException("pre-2.0 translog found [" + path + "]");
}
// Read the translogUUID
final int uuidLen = in.readInt();
if (uuidLen > channel.size()) {
throw new TranslogCorruptedException(path.toString(), "UUID length can't be larger than the translog");
}
if (uuidLen <= 0) {
throw new TranslogCorruptedException(path.toString(), "UUID length must be positive");
}
final BytesRef uuid = new BytesRef(uuidLen);
uuid.length = uuidLen;
in.read(uuid.bytes, uuid.offset, uuid.length);
// Read the primary term
final long primaryTerm;
if (version == VERSION_PRIMARY_TERM) {
primaryTerm = in.readLong();
} else {
// This can be dropped with 5.0 as BWC with older versions is not required anymore
assert version == VERSION_CHECKPOINTS : "Unknown header version [" + version + "]";
primaryTerm = UNASSIGNED_PRIMARY_TERM;
}
// Verify the checksum (can be always verified on >= 5.0 as version must be primary term based without BWC)
if (version >= VERSION_PRIMARY_TERM) {
Translog.verifyChecksum(in);
}
assert primaryTerm >= 0 : "Primary term must be non-negative [" + primaryTerm + "]; translog path [" + path + "]";
final int headerSizeInBytes = headerSizeInBytes(version, uuid.length);
assert channel.position() == headerSizeInBytes : "Header is not fully read; header size [" + headerSizeInBytes + "], position [" + channel.position() + "]";
// verify UUID only after checksum, to ensure that UUID is not corrupted
final BytesRef expectedUUID = new BytesRef(translogUUID);
if (uuid.bytesEquals(expectedUUID) == false) {
throw new TranslogCorruptedException(path.toString(), "expected shard UUID " + expectedUUID + " but got: " + uuid + " this translog file belongs to a different translog");
}
return new TranslogHeader(translogUUID, primaryTerm, headerSizeInBytes);
} catch (EOFException e) {
throw new TranslogCorruptedException(path.toString(), "translog header truncated", e);
}
}
use of org.apache.lucene.index.CorruptIndexException in project crate by crate.
the class Store method failIfCorrupted.
private static void failIfCorrupted(Directory directory) throws IOException {
final String[] files = directory.listAll();
List<CorruptIndexException> ex = new ArrayList<>();
for (String file : files) {
if (file.startsWith(CORRUPTED_MARKER_NAME_PREFIX)) {
try (ChecksumIndexInput input = directory.openChecksumInput(file, IOContext.READONCE)) {
CodecUtil.checkHeader(input, CODEC, CORRUPTED_MARKER_CODEC_VERSION, CORRUPTED_MARKER_CODEC_VERSION);
final int size = input.readVInt();
final byte[] buffer = new byte[size];
input.readBytes(buffer, 0, buffer.length);
StreamInput in = StreamInput.wrap(buffer);
Exception t = in.readException();
if (t instanceof CorruptIndexException) {
ex.add((CorruptIndexException) t);
} else {
ex.add(new CorruptIndexException(t.getMessage(), "preexisting_corruption", t));
}
CodecUtil.checkFooter(input);
}
}
}
if (ex.isEmpty() == false) {
ExceptionsHelper.rethrowAndSuppress(ex);
}
}
use of org.apache.lucene.index.CorruptIndexException in project crate by crate.
the class ClusterDisruptionIT method testSendingShardFailure.
// simulate handling of sending shard failure during an isolation
@Test
public void testSendingShardFailure() throws Exception {
List<String> nodes = startCluster(3);
String masterNode = internalCluster().getMasterName();
List<String> nonMasterNodes = nodes.stream().filter(node -> !node.equals(masterNode)).collect(Collectors.toList());
String nonMasterNode = randomFrom(nonMasterNodes);
execute("create table t (id int primary key, x string) clustered into 3 shards with (number_of_replicas = 2)");
ensureGreen();
String nonMasterNodeId = internalCluster().clusterService(nonMasterNode).localNode().getId();
// fail a random shard
ShardRouting failedShard = randomFrom(clusterService().state().getRoutingNodes().node(nonMasterNodeId).shardsWithState(ShardRoutingState.STARTED));
ShardStateAction service = internalCluster().getInstance(ShardStateAction.class, nonMasterNode);
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean success = new AtomicBoolean();
String isolatedNode = randomBoolean() ? masterNode : nonMasterNode;
NetworkDisruption.TwoPartitions partitions = isolateNode(isolatedNode);
// we cannot use the NetworkUnresponsive disruption type here as it will swallow the "shard failed" request, calling neither
// onSuccess nor onFailure on the provided listener.
NetworkLinkDisruptionType disruptionType = new NetworkDisruption.NetworkDisconnect();
NetworkDisruption networkDisruption = new NetworkDisruption(partitions, disruptionType);
setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
service.localShardFailed(failedShard, "simulated", new CorruptIndexException("simulated", (String) null), new ActionListener<>() {
@Override
public void onResponse(Void aVoid) {
success.set(true);
latch.countDown();
}
@Override
public void onFailure(Exception e) {
success.set(false);
latch.countDown();
assert false;
}
});
if (isolatedNode.equals(nonMasterNode)) {
assertNoMaster(nonMasterNode);
} else {
ensureStableCluster(2, nonMasterNode);
}
// heal the partition
networkDisruption.removeAndEnsureHealthy(internalCluster());
// the cluster should stabilize
ensureStableCluster(3);
latch.await();
// the listener should be notified
assertTrue(success.get());
// the failed shard should be gone
List<ShardRouting> shards = clusterService().state().getRoutingTable().allShards(toIndexName(sqlExecutor.getCurrentSchema(), "t", null));
for (ShardRouting shard : shards) {
assertThat(shard.allocationId(), not(equalTo(failedShard.allocationId())));
}
}
use of org.apache.lucene.index.CorruptIndexException in project crate by crate.
the class BlobStoreRepository method restoreShard.
@Override
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState, ActionListener<Void> listener) {
final ShardId shardId = store.shardId();
final ActionListener<Void> restoreListener = ActionListener.delegateResponse(listener, (l, e) -> l.onFailure(new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId + "]", e)));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final BlobContainer container = shardContainer(indexId, snapshotShardId);
executor.execute(ActionRunnable.wrap(restoreListener, l -> {
final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId);
final SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles());
new FileRestoreContext(metadata.name(), shardId, snapshotId, recoveryState) {
@Override
protected void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store, ActionListener<Void> listener) {
if (filesToRecover.isEmpty()) {
listener.onResponse(null);
} else {
// Start as many workers as fit into the snapshot pool at once at the most
int maxPoolSize = executor instanceof ThreadPoolExecutor ? ((ThreadPoolExecutor) executor).getMaximumPoolSize() : 1;
final int workers = Math.min(maxPoolSize, snapshotFiles.indexFiles().size());
final BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files = new LinkedBlockingQueue<>(filesToRecover);
final ActionListener<Void> allFilesListener = fileQueueListener(files, workers, ActionListener.map(listener, v -> null));
// restore the files from the snapshot to the Lucene store
for (int i = 0; i < workers; ++i) {
executor.execute(ActionRunnable.run(allFilesListener, () -> {
store.incRef();
try {
BlobStoreIndexShardSnapshot.FileInfo fileToRecover;
while ((fileToRecover = files.poll(0L, TimeUnit.MILLISECONDS)) != null) {
restoreFile(fileToRecover, store);
}
} finally {
store.decRef();
}
}));
}
}
}
private void restoreFile(BlobStoreIndexShardSnapshot.FileInfo fileInfo, Store store) throws IOException {
boolean success = false;
try (InputStream stream = maybeRateLimit(new SlicedInputStream(fileInfo.numberOfParts()) {
@Override
protected InputStream openSlice(long slice) throws IOException {
return container.readBlob(fileInfo.partName(slice));
}
}, restoreRateLimiter, restoreRateLimitingTimeInNanos)) {
try (IndexOutput indexOutput = store.createVerifyingOutput(fileInfo.physicalName(), fileInfo.metadata(), IOContext.DEFAULT)) {
final byte[] buffer = new byte[BUFFER_SIZE];
int length;
while ((length = stream.read(buffer)) > 0) {
indexOutput.writeBytes(buffer, 0, length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.physicalName(), length);
}
Store.verify(indexOutput);
indexOutput.close();
store.directory().sync(Collections.singleton(fileInfo.physicalName()));
success = true;
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
try {
store.markStoreCorrupted(ex);
} catch (IOException e) {
LOGGER.warn("store cannot be marked as corrupted", e);
}
throw ex;
} finally {
if (success == false) {
store.deleteQuiet(fileInfo.physicalName());
}
}
}
}
}.restore(snapshotFiles, store, l);
}));
}
Aggregations