use of io.pravega.test.common.ErrorInjector in project pravega by pravega.
the class DurableLogTests method testRecoveryFailures.
/**
* Tests the DurableLog recovery process in a scenario when there are failures during the process
* (these may or may not be DataCorruptionExceptions).
*/
@Test
public void testRecoveryFailures() throws Exception {
int streamSegmentCount = 50;
int appendsPerStreamSegment = 20;
// Fail DataLog reads after X reads.
int failReadAfter = 2;
// Setup a DurableLog and start it.
AtomicReference<TestDurableDataLog> dataLog = new AtomicReference<>();
@Cleanup TestDurableDataLogFactory dataLogFactory = new TestDurableDataLogFactory(new InMemoryDurableDataLogFactory(MAX_DATA_LOG_APPEND_SIZE, executorService()), dataLog::set);
@Cleanup Storage storage = InMemoryStorageFactory.newStorage(executorService());
storage.initialize(1);
HashSet<Long> streamSegmentIds;
List<OperationWithCompletion> completionFutures;
// First DurableLog. We use this for generating data.
UpdateableContainerMetadata metadata = new MetadataBuilder(CONTAINER_ID).build();
@Cleanup InMemoryCacheFactory cacheFactory = new InMemoryCacheFactory();
@Cleanup CacheManager cacheManager = new CacheManager(DEFAULT_READ_INDEX_CONFIG.getCachePolicy(), executorService());
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
durableLog.startAsync().awaitRunning();
// Generate some test data (we need to do this after we started the DurableLog because in the process of
// recovery, it wipes away all existing metadata).
streamSegmentIds = createStreamSegmentsWithOperations(streamSegmentCount, metadata, durableLog, storage);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
// Process all generated operations and wait for them to complete
completionFutures = processOperations(operations, durableLog);
OperationWithCompletion.allOf(completionFutures).join();
// Stop the processor.
durableLog.stopAsync().awaitTerminated();
}
// Recovery failure due to DataLog Failures.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Inject some artificial error into the DataLogRead after a few reads.
ErrorInjector<Exception> readNextInjector = new ErrorInjector<>(count -> count > failReadAfter, () -> new DataLogNotAvailableException("intentional"));
dataLog.get().setReadErrorInjectors(null, readNextInjector);
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DurableDataLogException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
if (ex == null) {
try {
// We need this to enter a FAILED state to get its failure cause.
durableLog.awaitTerminated();
} catch (Exception ex2) {
ex = durableLog.failureCause();
}
}
ex = Exceptions.unwrap(ex);
return ex instanceof DataLogNotAvailableException && ex.getMessage().equals("intentional");
});
}
// Recovery failure due to DataCorruptionException.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Reset error injectors to nothing.
dataLog.get().setReadErrorInjectors(null, null);
AtomicInteger readCounter = new AtomicInteger();
dataLog.get().setReadInterceptor(readItem -> {
if (readCounter.incrementAndGet() > failReadAfter && readItem.getLength() > DataFrame.MIN_ENTRY_LENGTH_NEEDED) {
// Mangle with the payload and overwrite its contents with a DataFrame having a bogus
// previous sequence number.
DataFrame df = DataFrame.ofSize(readItem.getLength());
df.seal();
ArrayView serialization = df.getData();
return new InjectedReadItem(serialization.getReader(), serialization.getLength(), readItem.getAddress());
}
return readItem;
});
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DataCorruptionException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
return Exceptions.unwrap(ex) instanceof DataCorruptionException;
});
// Verify that the underlying DurableDataLog has been disabled.
val disabledDataLog = dataLogFactory.createDurableDataLog(CONTAINER_ID);
AssertExtensions.assertThrows("DurableDataLog has not been disabled following a recovery failure with DataCorruptionException.", () -> disabledDataLog.initialize(TIMEOUT), ex -> ex instanceof DataLogDisabledException);
}
}
use of io.pravega.test.common.ErrorInjector in project pravega by pravega.
the class DurableLogTests method testAddWithDataLogFailures.
/**
* Tests the ability of the DurableLog to process Operations when there are DataLog write failures.
*/
@Test
public void testAddWithDataLogFailures() throws Exception {
int streamSegmentCount = 10;
int appendsPerStreamSegment = 80;
// Fail (asynchronously) after X DataFrame commits (to DataLog).
int failAsyncAfter = 5;
// Setup a DurableLog and start it.
@Cleanup ContainerSetup setup = new ContainerSetup(executorService());
@Cleanup DurableLog durableLog = setup.createDurableLog();
durableLog.startAsync().awaitRunning();
Assert.assertNotNull("Internal error: could not grab a pointer to the created TestDurableDataLog.", setup.dataLog.get());
// Generate some test data (we need to do this after we started the DurableLog because in the process of
// recovery, it wipes away all existing metadata).
HashSet<Long> streamSegmentIds = createStreamSegmentsInMetadata(streamSegmentCount, setup.metadata);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
ErrorInjector<Exception> aSyncErrorInjector = new ErrorInjector<>(count -> count >= failAsyncAfter, () -> new DurableDataLogException("intentional"));
setup.dataLog.get().setAppendErrorInjectors(null, aSyncErrorInjector);
// Process all generated operations.
List<OperationWithCompletion> completionFutures = processOperations(operations, durableLog);
// Wait for all such operations to complete. We are expecting exceptions, so verify that we do.
AssertExtensions.assertThrows("No operations failed.", OperationWithCompletion.allOf(completionFutures)::join, super::isExpectedExceptionForNonDataCorruption);
// Wait for the DurableLog to shutdown with failure.
ServiceListeners.awaitShutdown(durableLog, TIMEOUT, false);
Assert.assertEquals("Expected the DurableLog to fail after DurableDataLogException encountered.", Service.State.FAILED, durableLog.state());
// We can't really check the DurableLog or the DurableDataLog contents since they are both closed.
performMetadataChecks(streamSegmentIds, new HashSet<>(), new HashMap<>(), completionFutures, setup.metadata, false, false);
performReadIndexChecks(completionFutures, setup.readIndex);
}
use of io.pravega.test.common.ErrorInjector in project pravega by pravega.
the class OperationProcessorTests method testWithDataLogFailures.
/**
* Tests the ability of the OperationProcessor to process Operations when there are DataLog write failures. The expected
* outcome is that the OperationProcessor will auto-shutdown when such errors are encountered.
*/
@Test
public void testWithDataLogFailures() throws Exception {
int streamSegmentCount = 10;
int appendsPerStreamSegment = 80;
// Fail (asynchronously) after X DataFrame commits (to DataLog).
int failAfterCommits = 5;
@Cleanup TestContext context = new TestContext();
// Generate some test data (no need to complicate ourselves with Transactions here; that is tested in the no-failure test).
HashSet<Long> streamSegmentIds = createStreamSegmentsInMetadata(streamSegmentCount, context.metadata);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
// Setup an OperationProcessor and start it.
@Cleanup TestDurableDataLog dataLog = TestDurableDataLog.create(CONTAINER_ID, MAX_DATA_LOG_APPEND_SIZE, executorService());
dataLog.initialize(TIMEOUT);
@Cleanup OperationProcessor operationProcessor = new OperationProcessor(context.metadata, context.stateUpdater, dataLog, getNoOpCheckpointPolicy(), executorService());
operationProcessor.startAsync().awaitRunning();
ErrorInjector<Exception> aSyncErrorInjector = new ErrorInjector<>(count -> count >= failAfterCommits, () -> new DurableDataLogException("intentional"));
dataLog.setAppendErrorInjectors(null, aSyncErrorInjector);
// Process all generated operations.
List<OperationWithCompletion> completionFutures = processOperations(operations, operationProcessor);
// Wait for all such operations to complete. We are expecting exceptions, so verify that we do.
AssertExtensions.assertThrows("No operations failed.", OperationWithCompletion.allOf(completionFutures)::join, super::isExpectedExceptionForNonDataCorruption);
// Wait for the OperationProcessor to shutdown with failure.
ServiceListeners.awaitShutdown(operationProcessor, TIMEOUT, false);
Assert.assertEquals("Expected the OperationProcessor to fail after DurableDataLogException encountered.", Service.State.FAILED, operationProcessor.state());
performLogOperationChecks(completionFutures, context.memoryLog, dataLog, context.metadata);
performMetadataChecks(streamSegmentIds, new HashSet<>(), new HashMap<>(), completionFutures, context.metadata, false, false);
performReadIndexChecks(completionFutures, context.readIndex);
}
use of io.pravega.test.common.ErrorInjector in project pravega by pravega.
the class DataFrameBuilderTests method testAppendWithCommitFailure.
/**
* Tests the case when the DataLog fails to commit random frames.
* Commit errors should affect only the LogItems that were part of it. It should cause data to be dropped
* and affected appends failed.
* This should be done both with large and with small LogItems. Large items span multiple frames.
*/
@Test
public void testAppendWithCommitFailure() throws Exception {
// Fail the commit to DurableDataLog after this many writes.
int failAt = 7;
List<TestLogItem> records = DataFrameTestHelpers.generateLogItems(RECORD_COUNT / 2, SMALL_RECORD_MIN_SIZE, SMALL_RECORD_MAX_SIZE, 0);
records.addAll(DataFrameTestHelpers.generateLogItems(RECORD_COUNT / 2, LARGE_RECORD_MIN_SIZE, LARGE_RECORD_MAX_SIZE, records.size()));
@Cleanup TestDurableDataLog dataLog = TestDurableDataLog.create(CONTAINER_ID, FRAME_SIZE, executorService());
dataLog.initialize(TIMEOUT);
val asyncInjector = new ErrorInjector<Exception>(count -> count >= failAt, IntentionalException::new);
dataLog.setAppendErrorInjectors(null, asyncInjector);
AtomicInteger failCount = new AtomicInteger();
List<DataFrameBuilder.CommitArgs> successCommits = Collections.synchronizedList(new ArrayList<>());
// Keep a reference to the builder (once created) so we can inspect its failure cause).
val builderRef = new AtomicReference<DataFrameBuilder>();
val attemptCount = new AtomicInteger();
BiConsumer<Throwable, DataFrameBuilder.CommitArgs> errorCallback = (ex, a) -> {
attemptCount.decrementAndGet();
// Check that we actually did want an exception to happen.
Throwable expectedError = Exceptions.unwrap(asyncInjector.getLastCycleException());
Assert.assertNotNull("An error happened but none was expected: " + ex, expectedError);
Throwable actualError = Exceptions.unwrap(ex);
if (!(ex instanceof ObjectClosedException)) {
// First failure.
Assert.assertEquals("Unexpected error occurred upon commit.", expectedError, actualError);
}
if (builderRef.get().failureCause() != null) {
checkFailureCause(builderRef.get(), ce -> ce instanceof IntentionalException);
}
failCount.incrementAndGet();
};
val args = new DataFrameBuilder.Args(ca -> attemptCount.incrementAndGet(), successCommits::add, errorCallback, executorService());
try (DataFrameBuilder<TestLogItem> b = new DataFrameBuilder<>(dataLog, SERIALIZER, args)) {
builderRef.set(b);
try {
for (val r : records) {
b.append(r);
}
b.close();
} catch (ObjectClosedException ex) {
await(() -> b.failureCause() != null, 20);
// If DataFrameBuilder is closed, then we must have had an exception thrown via the callback before.
Assert.assertNotNull("DataFrameBuilder is closed, yet failure cause is not set yet.", b.failureCause());
checkFailureCause(b, ce -> ce instanceof IntentionalException);
}
}
await(() -> successCommits.size() >= attemptCount.get(), 20);
// Read all committed items.
val reader = new DataFrameReader<TestLogItem>(dataLog, new TestSerializer(), CONTAINER_ID);
val readItems = new ArrayList<TestLogItem>();
DataFrameRecord<TestLogItem> readItem;
while ((readItem = reader.getNext()) != null) {
readItems.add(readItem.getItem());
}
val lastCommitSeqNo = successCommits.stream().mapToLong(DataFrameBuilder.CommitArgs::getLastFullySerializedSequenceNumber).max().orElse(-1);
val expectedItems = records.stream().filter(r -> r.getSequenceNumber() <= lastCommitSeqNo).collect(Collectors.toList());
AssertExtensions.assertListEquals("Items read back do not match expected values.", expectedItems, readItems, TestLogItem::equals);
// Read all entries in the Log and interpret them as DataFrames, then verify the records can be reconstructed.
val frames = dataLog.getAllEntries(ri -> DataFrame.read(ri.getPayload(), ri.getLength(), ri.getAddress()));
// Check the correctness of the commit callback.
AssertExtensions.assertGreaterThan("Not enough Data Frames were generated.", 1, frames.size());
Assert.assertEquals("Unexpected number of frames generated.", successCommits.size(), frames.size());
}
use of io.pravega.test.common.ErrorInjector in project pravega by pravega.
the class DurableLogTests method testAddWithDataLogWriterNotPrimaryException.
/**
* Tests the ability of the DurableLog to handle a DataLogWriterNotPrimaryException.
*/
@Test
public void testAddWithDataLogWriterNotPrimaryException() throws Exception {
int streamSegmentCount = 1;
int appendsPerStreamSegment = 1;
// Setup a DurableLog and start it.
@Cleanup ContainerSetup setup = new ContainerSetup(executorService());
@Cleanup DurableLog durableLog = setup.createDurableLog();
durableLog.startAsync().awaitRunning();
HashSet<Long> streamSegmentIds = createStreamSegmentsInMetadata(streamSegmentCount, setup.metadata);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
ErrorInjector<Exception> aSyncErrorInjector = new ErrorInjector<>(count -> true, () -> new CompletionException(new DataLogWriterNotPrimaryException("intentional")));
setup.dataLog.get().setAppendErrorInjectors(null, aSyncErrorInjector);
// Process all generated operations.
List<OperationWithCompletion> completionFutures = processOperations(operations, durableLog);
// Wait for all such operations to complete. We are expecting exceptions, so verify that we do.
AssertExtensions.assertThrows("No operations failed.", OperationWithCompletion.allOf(completionFutures)::join, ex -> ex instanceof IOException || ex instanceof DataLogWriterNotPrimaryException);
// Verify that the OperationProcessor automatically shuts down and that it has the right failure cause.
ServiceListeners.awaitShutdown(durableLog, TIMEOUT, false);
Assert.assertEquals("DurableLog is not in a failed state after fence-out detected.", Service.State.FAILED, durableLog.state());
Assert.assertTrue("DurableLog did not fail with the correct exception.", Exceptions.unwrap(durableLog.failureCause()) instanceof DataLogWriterNotPrimaryException);
}
Aggregations