use of io.pravega.segmentstore.storage.DataLogDisabledException in project pravega by pravega.
the class DurableLogTests method testRecoveryFailures.
/**
* Tests the DurableLog recovery process in a scenario when there are failures during the process
* (these may or may not be DataCorruptionExceptions).
*/
@Test
public void testRecoveryFailures() throws Exception {
int streamSegmentCount = 50;
int appendsPerStreamSegment = 20;
// Fail DataLog reads after X reads.
int failReadAfter = 2;
// Setup a DurableLog and start it.
AtomicReference<TestDurableDataLog> dataLog = new AtomicReference<>();
@Cleanup TestDurableDataLogFactory dataLogFactory = new TestDurableDataLogFactory(new InMemoryDurableDataLogFactory(MAX_DATA_LOG_APPEND_SIZE, executorService()), dataLog::set);
@Cleanup Storage storage = InMemoryStorageFactory.newStorage(executorService());
storage.initialize(1);
Set<Long> streamSegmentIds;
List<OperationWithCompletion> completionFutures;
// First DurableLog. We use this for generating data.
UpdateableContainerMetadata metadata = new MetadataBuilder(CONTAINER_ID).build();
@Cleanup CacheStorage cacheStorage = new DirectMemoryCache(Integer.MAX_VALUE);
@Cleanup CacheManager cacheManager = new CacheManager(CachePolicy.INFINITE, cacheStorage, executorService());
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
durableLog.startAsync().awaitRunning();
// Generate some test data (we need to do this after we started the DurableLog because in the process of
// recovery, it wipes away all existing metadata).
streamSegmentIds = createStreamSegmentsWithOperations(streamSegmentCount, durableLog);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
// Process all generated operations and wait for them to complete
completionFutures = processOperations(operations, durableLog);
OperationWithCompletion.allOf(completionFutures).join();
// Stop the processor.
durableLog.stopAsync().awaitTerminated();
}
// Recovery failure due to DataLog Failures.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Inject some artificial error into the DataLogRead after a few reads.
ErrorInjector<Exception> readNextInjector = new ErrorInjector<>(count -> count > failReadAfter, () -> new DataLogNotAvailableException("intentional"));
dataLog.get().setReadErrorInjectors(null, readNextInjector);
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DurableDataLogException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
if (ex == null) {
try {
// We need this to enter a FAILED state to get its failure cause.
durableLog.awaitTerminated();
} catch (Exception ex2) {
ex = durableLog.failureCause();
}
}
ex = Exceptions.unwrap(ex);
return ex instanceof DataLogNotAvailableException && ex.getMessage().equals("intentional");
});
}
// Recovery failure due to DataCorruptionException.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Reset error injectors to nothing.
dataLog.get().setReadErrorInjectors(null, null);
AtomicInteger readCounter = new AtomicInteger();
dataLog.get().setReadInterceptor(readItem -> {
if (readCounter.incrementAndGet() > failReadAfter && readItem.getLength() > DataFrame.MIN_ENTRY_LENGTH_NEEDED) {
// Mangle with the payload and overwrite its contents with a DataFrame having a bogus
// previous sequence number.
DataFrame df = DataFrame.ofSize(readItem.getLength());
df.seal();
CompositeArrayView serialization = df.getData();
return new InjectedReadItem(serialization.getReader(), serialization.getLength(), readItem.getAddress());
}
return readItem;
});
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DataCorruptionException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
return Exceptions.unwrap(ex) instanceof DataCorruptionException;
});
// Verify that the underlying DurableDataLog has been disabled.
val disabledDataLog = dataLogFactory.createDurableDataLog(CONTAINER_ID);
AssertExtensions.assertThrows("DurableDataLog has not been disabled following a recovery failure with DataCorruptionException.", () -> disabledDataLog.initialize(TIMEOUT), ex -> ex instanceof DataLogDisabledException);
}
}
use of io.pravega.segmentstore.storage.DataLogDisabledException in project pravega by pravega.
the class BookKeeperLog method initialize.
// endregion
// region DurableDataLog Implementation
/**
* Open-Fences this BookKeeper log using the following protocol:
* 1. Read Log Metadata from ZooKeeper.
* 2. Fence at least the last 2 ledgers in the Ledger List.
* 3. Create a new Ledger.
* 3.1 If any of the steps so far fails, the process is interrupted at the point of failure, and no cleanup is attempted.
* 4. Update Log Metadata using compare-and-set (this update contains the new ledger and new epoch).
* 4.1 If CAS fails on metadata update, the newly created Ledger is deleted (this means we were fenced out by some
* other instance) and no other update is performed.
*
* @param timeout Timeout for the operation.
* @throws DataLogWriterNotPrimaryException If we were fenced-out during this process.
* @throws DataLogNotAvailableException If BookKeeper or ZooKeeper are not available.
* @throws DataLogDisabledException If the BookKeeperLog is disabled. No fencing is attempted in this case.
* @throws DataLogInitializationException If a general initialization error occurred.
* @throws DurableDataLogException If another type of exception occurred.
*/
@Override
public void initialize(Duration timeout) throws DurableDataLogException {
List<Long> ledgersToDelete;
LogMetadata newMetadata;
synchronized (this.lock) {
Preconditions.checkState(this.writeLedger == null, "BookKeeperLog is already initialized.");
assert this.logMetadata == null : "writeLedger == null but logMetadata != null";
// Get metadata about the current state of the log, if any.
LogMetadata oldMetadata = loadMetadata();
if (oldMetadata != null) {
if (!oldMetadata.isEnabled()) {
throw new DataLogDisabledException("BookKeeperLog is disabled. Cannot initialize.");
}
// Fence out ledgers.
val emptyLedgerIds = Ledgers.fenceOut(oldMetadata.getLedgers(), this.bookKeeper, this.config, this.traceObjectId);
// Update Metadata to reflect those newly found empty ledgers.
oldMetadata = oldMetadata.updateLedgerStatus(emptyLedgerIds);
}
// Create new ledger.
WriteHandle newLedger = Ledgers.create(this.bookKeeper, this.config, this.logId);
log.info("{}: Created Ledger {}.", this.traceObjectId, newLedger.getId());
// Update Metadata with new Ledger and persist to ZooKeeper.
newMetadata = updateMetadata(oldMetadata, newLedger, true);
LedgerMetadata ledgerMetadata = newMetadata.getLedger(newLedger.getId());
assert ledgerMetadata != null : "cannot find newly added ledger metadata";
this.writeLedger = new WriteLedger(newLedger, ledgerMetadata);
this.logMetadata = newMetadata;
ledgersToDelete = getLedgerIdsToDelete(oldMetadata, newMetadata);
}
// Delete the orphaned ledgers from BookKeeper.
ledgersToDelete.forEach(id -> {
try {
Ledgers.delete(id, this.bookKeeper);
log.info("{}: Deleted orphan empty ledger {}.", this.traceObjectId, id);
} catch (DurableDataLogException ex) {
// A failure here has no effect on the initialization of BookKeeperLog. In this case, the (empty) Ledger
// will remain in BookKeeper until manually deleted by a cleanup tool.
log.warn("{}: Unable to delete orphan empty ledger {}.", this.traceObjectId, id, ex);
}
});
log.info("{}: Initialized (Epoch = {}, UpdateVersion = {}).", this.traceObjectId, newMetadata.getEpoch(), newMetadata.getUpdateVersion());
}
Aggregations