use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.
the class BookKeeperLog method persistMetadata.
/**
* Persists the given metadata into ZooKeeper.
*
* @param metadata The LogMetadata to persist. At the end of this method, this metadata will have its Version updated
* to the one in ZooKeeper.
* @param create Whether to create (true) or update (false) the data in ZooKeeper.
* @throws DataLogWriterNotPrimaryException If the metadata update failed (if we were asked to create and the node
* already exists or if we had to update and there was a version mismatch).
* @throws DurableDataLogException If another kind of exception occurred.
*/
private void persistMetadata(LogMetadata metadata, boolean create) throws DurableDataLogException {
try {
byte[] serializedMetadata = LogMetadata.SERIALIZER.serialize(metadata).getCopy();
if (create) {
this.zkClient.create().creatingParentsIfNeeded().forPath(this.logNodePath, serializedMetadata);
// Set version to 0 as that will match the ZNode's version.
metadata.withUpdateVersion(0);
} else {
this.zkClient.setData().withVersion(metadata.getUpdateVersion()).forPath(this.logNodePath, serializedMetadata);
// Increment the version to keep up with the ZNode's value (after writing it to ZK).
metadata.withUpdateVersion(metadata.getUpdateVersion() + 1);
}
} catch (KeeperException.NodeExistsException | KeeperException.BadVersionException keeperEx) {
// We were fenced out. Clean up and throw appropriate exception.
throw new DataLogWriterNotPrimaryException(String.format("Unable to acquire exclusive write lock for log (path = '%s%s').", this.zkClient.getNamespace(), this.logNodePath), keeperEx);
} catch (Exception generalEx) {
// General exception. Clean up and rethrow appropriate exception.
throw new DataLogInitializationException(String.format("Unable to update ZNode for path '%s%s'.", this.zkClient.getNamespace(), this.logNodePath), generalEx);
}
log.info("{} Metadata persisted ({}).", this.traceObjectId, metadata);
}
use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.
the class BookKeeperLog method processPendingWrites.
/**
* Executes pending Writes to BookKeeper. This method is not thread safe and should only be invoked as part of
* the Write Processor.
* @return True if the no errors, false if at least one write failed.
*/
private boolean processPendingWrites() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.traceObjectId, "processPendingWrites");
// Clean up the write queue of all finished writes that are complete (successfully or failed for good)
val cs = this.writes.removeFinishedWrites();
if (cs.contains(WriteQueue.CleanupStatus.WriteFailed)) {
// We encountered a failed write. As such, we must close immediately and not process anything else.
// Closing will automatically cancel all pending writes.
close();
LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, WriteQueue.CleanupStatus.WriteFailed);
return false;
} else if (cs.contains(WriteQueue.CleanupStatus.QueueEmpty)) {
// Queue is empty - nothing else to do.
LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, WriteQueue.CleanupStatus.QueueEmpty);
return true;
}
// Calculate how much estimated space there is in the current ledger.
final long maxTotalSize = this.config.getBkLedgerMaxSize() - getWriteLedger().ledger.getLength();
// Get the writes to execute from the queue.
List<Write> toExecute = this.writes.getWritesToExecute(maxTotalSize);
// appropriate, or retried).
if (handleClosedLedgers(toExecute)) {
// If any changes were made to the Writes in the list, re-do the search to get a more accurate list of Writes
// to execute (since some may have changed Ledgers, more writes may not be eligible for execution).
toExecute = this.writes.getWritesToExecute(maxTotalSize);
}
// Execute the writes.
log.debug("{}: Executing {} writes.", this.traceObjectId, toExecute.size());
for (int i = 0; i < toExecute.size(); i++) {
Write w = toExecute.get(i);
try {
// Record the beginning of a new attempt.
int attemptCount = w.beginAttempt();
if (attemptCount > this.config.getMaxWriteAttempts()) {
// Retried too many times.
throw new RetriesExhaustedException(w.getFailureCause());
}
// Invoke the BookKeeper write.
w.getWriteLedger().ledger.asyncAddEntry(w.data.array(), w.data.arrayOffset(), w.data.getLength(), this::addCallback, w);
} catch (Throwable ex) {
// Synchronous failure (or RetriesExhausted). Fail current write.
boolean isFinal = !isRetryable(ex);
w.fail(ex, isFinal);
// And fail all remaining writes as well.
for (int j = i + 1; j < toExecute.size(); j++) {
toExecute.get(j).fail(new DurableDataLogException("Previous write failed.", ex), isFinal);
}
LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, i);
return false;
}
}
// After every run where we did write, check if need to trigger a rollover.
this.rolloverProcessor.runAsync();
LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, toExecute.size());
return true;
}
use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.
the class DurableLogTests method testRecoveryFailures.
/**
* Tests the DurableLog recovery process in a scenario when there are failures during the process
* (these may or may not be DataCorruptionExceptions).
*/
@Test
public void testRecoveryFailures() throws Exception {
int streamSegmentCount = 50;
int appendsPerStreamSegment = 20;
// Fail DataLog reads after X reads.
int failReadAfter = 2;
// Setup a DurableLog and start it.
AtomicReference<TestDurableDataLog> dataLog = new AtomicReference<>();
@Cleanup TestDurableDataLogFactory dataLogFactory = new TestDurableDataLogFactory(new InMemoryDurableDataLogFactory(MAX_DATA_LOG_APPEND_SIZE, executorService()), dataLog::set);
@Cleanup Storage storage = InMemoryStorageFactory.newStorage(executorService());
storage.initialize(1);
HashSet<Long> streamSegmentIds;
List<OperationWithCompletion> completionFutures;
// First DurableLog. We use this for generating data.
UpdateableContainerMetadata metadata = new MetadataBuilder(CONTAINER_ID).build();
@Cleanup InMemoryCacheFactory cacheFactory = new InMemoryCacheFactory();
@Cleanup CacheManager cacheManager = new CacheManager(DEFAULT_READ_INDEX_CONFIG.getCachePolicy(), executorService());
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
durableLog.startAsync().awaitRunning();
// Generate some test data (we need to do this after we started the DurableLog because in the process of
// recovery, it wipes away all existing metadata).
streamSegmentIds = createStreamSegmentsWithOperations(streamSegmentCount, metadata, durableLog, storage);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
// Process all generated operations and wait for them to complete
completionFutures = processOperations(operations, durableLog);
OperationWithCompletion.allOf(completionFutures).join();
// Stop the processor.
durableLog.stopAsync().awaitTerminated();
}
// Recovery failure due to DataLog Failures.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Inject some artificial error into the DataLogRead after a few reads.
ErrorInjector<Exception> readNextInjector = new ErrorInjector<>(count -> count > failReadAfter, () -> new DataLogNotAvailableException("intentional"));
dataLog.get().setReadErrorInjectors(null, readNextInjector);
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DurableDataLogException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
if (ex == null) {
try {
// We need this to enter a FAILED state to get its failure cause.
durableLog.awaitTerminated();
} catch (Exception ex2) {
ex = durableLog.failureCause();
}
}
ex = Exceptions.unwrap(ex);
return ex instanceof DataLogNotAvailableException && ex.getMessage().equals("intentional");
});
}
// Recovery failure due to DataCorruptionException.
metadata = new MetadataBuilder(CONTAINER_ID).build();
dataLog.set(null);
try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
// Reset error injectors to nothing.
dataLog.get().setReadErrorInjectors(null, null);
AtomicInteger readCounter = new AtomicInteger();
dataLog.get().setReadInterceptor(readItem -> {
if (readCounter.incrementAndGet() > failReadAfter && readItem.getLength() > DataFrame.MIN_ENTRY_LENGTH_NEEDED) {
// Mangle with the payload and overwrite its contents with a DataFrame having a bogus
// previous sequence number.
DataFrame df = DataFrame.ofSize(readItem.getLength());
df.seal();
ArrayView serialization = df.getData();
return new InjectedReadItem(serialization.getReader(), serialization.getLength(), readItem.getAddress());
}
return readItem;
});
// Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
// multiple layers, so we need to dig deep into it.
AssertExtensions.assertThrows("Recovery did not fail properly when expecting DataCorruptionException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
if (ex instanceof IllegalStateException) {
ex = ex.getCause();
}
return Exceptions.unwrap(ex) instanceof DataCorruptionException;
});
// Verify that the underlying DurableDataLog has been disabled.
val disabledDataLog = dataLogFactory.createDurableDataLog(CONTAINER_ID);
AssertExtensions.assertThrows("DurableDataLog has not been disabled following a recovery failure with DataCorruptionException.", () -> disabledDataLog.initialize(TIMEOUT), ex -> ex instanceof DataLogDisabledException);
}
}
use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.
the class DurableLogTests method testAddWithDataLogFailures.
/**
* Tests the ability of the DurableLog to process Operations when there are DataLog write failures.
*/
@Test
public void testAddWithDataLogFailures() throws Exception {
int streamSegmentCount = 10;
int appendsPerStreamSegment = 80;
// Fail (asynchronously) after X DataFrame commits (to DataLog).
int failAsyncAfter = 5;
// Setup a DurableLog and start it.
@Cleanup ContainerSetup setup = new ContainerSetup(executorService());
@Cleanup DurableLog durableLog = setup.createDurableLog();
durableLog.startAsync().awaitRunning();
Assert.assertNotNull("Internal error: could not grab a pointer to the created TestDurableDataLog.", setup.dataLog.get());
// Generate some test data (we need to do this after we started the DurableLog because in the process of
// recovery, it wipes away all existing metadata).
HashSet<Long> streamSegmentIds = createStreamSegmentsInMetadata(streamSegmentCount, setup.metadata);
List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
ErrorInjector<Exception> aSyncErrorInjector = new ErrorInjector<>(count -> count >= failAsyncAfter, () -> new DurableDataLogException("intentional"));
setup.dataLog.get().setAppendErrorInjectors(null, aSyncErrorInjector);
// Process all generated operations.
List<OperationWithCompletion> completionFutures = processOperations(operations, durableLog);
// Wait for all such operations to complete. We are expecting exceptions, so verify that we do.
AssertExtensions.assertThrows("No operations failed.", OperationWithCompletion.allOf(completionFutures)::join, super::isExpectedExceptionForNonDataCorruption);
// Wait for the DurableLog to shutdown with failure.
ServiceListeners.awaitShutdown(durableLog, TIMEOUT, false);
Assert.assertEquals("Expected the DurableLog to fail after DurableDataLogException encountered.", Service.State.FAILED, durableLog.state());
// We can't really check the DurableLog or the DurableDataLog contents since they are both closed.
performMetadataChecks(streamSegmentIds, new HashSet<>(), new HashMap<>(), completionFutures, setup.metadata, false, false);
performReadIndexChecks(completionFutures, setup.readIndex);
}
use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.
the class DataFrameInputStream method fetchNextEntry.
private void fetchNextEntry() throws IOException {
Exceptions.checkNotClosed(this.closed, this);
if (this.prefetchedEntry) {
assert this.currentEntry != null : "prefetchEntry==true, but currentEntry==null";
this.prefetchedEntry = false;
return;
}
while (!this.closed) {
DataFrame.DataFrameEntry nextEntry;
try {
nextEntry = getNextFrameEntry();
} catch (DurableDataLogException ex) {
throw new IOException(ex);
}
if (nextEntry == null) {
// 'null' means no more entries (or frames). Since we are still in the while loop, it means we were in the
// middle of an entry that hasn't been fully committed. We need to discard it and mark the end of the
// DataFrameInputStream.
close();
throw new NoMoreRecordsException();
}
if (nextEntry.isFirstRecordEntry()) {
// unable to write the rest of it.
if (this.currentEntry != null && !this.currentEntry.isLastRecordEntry()) {
resetContext();
setCurrentFrameEntry(nextEntry);
this.prefetchedEntry = true;
throw new RecordResetException();
}
} else if (this.currentEntry == null) {
// In this case, this entry is garbage, so it should be skipped.
if (this.hasReadAnyData) {
// this in the middle of a log, we very likely have some sort of corruption.
throw new SerializationException(String.format("Found a DataFrameRecord which is not marked as " + "'First Record Entry', but no active record is being read. DataFrameAddress = %s", nextEntry.getFrameAddress()));
}
continue;
}
setCurrentFrameEntry(nextEntry);
break;
}
}
Aggregations