Search in sources :

Example 1 with DurableDataLogException

use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.

the class BookKeeperLog method persistMetadata.

/**
 * Persists the given metadata into ZooKeeper.
 *
 * @param metadata The LogMetadata to persist. At the end of this method, this metadata will have its Version updated
 *                 to the one in ZooKeeper.
 * @param create   Whether to create (true) or update (false) the data in ZooKeeper.
 * @throws DataLogWriterNotPrimaryException If the metadata update failed (if we were asked to create and the node
 *                                          already exists or if we had to update and there was a version mismatch).
 * @throws DurableDataLogException          If another kind of exception occurred.
 */
private void persistMetadata(LogMetadata metadata, boolean create) throws DurableDataLogException {
    try {
        byte[] serializedMetadata = LogMetadata.SERIALIZER.serialize(metadata).getCopy();
        if (create) {
            this.zkClient.create().creatingParentsIfNeeded().forPath(this.logNodePath, serializedMetadata);
            // Set version to 0 as that will match the ZNode's version.
            metadata.withUpdateVersion(0);
        } else {
            this.zkClient.setData().withVersion(metadata.getUpdateVersion()).forPath(this.logNodePath, serializedMetadata);
            // Increment the version to keep up with the ZNode's value (after writing it to ZK).
            metadata.withUpdateVersion(metadata.getUpdateVersion() + 1);
        }
    } catch (KeeperException.NodeExistsException | KeeperException.BadVersionException keeperEx) {
        // We were fenced out. Clean up and throw appropriate exception.
        throw new DataLogWriterNotPrimaryException(String.format("Unable to acquire exclusive write lock for log (path = '%s%s').", this.zkClient.getNamespace(), this.logNodePath), keeperEx);
    } catch (Exception generalEx) {
        // General exception. Clean up and rethrow appropriate exception.
        throw new DataLogInitializationException(String.format("Unable to update ZNode for path '%s%s'.", this.zkClient.getNamespace(), this.logNodePath), generalEx);
    }
    log.info("{} Metadata persisted ({}).", this.traceObjectId, metadata);
}
Also used : DataLogWriterNotPrimaryException(io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException) ObjectClosedException(io.pravega.common.ObjectClosedException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) DataLogInitializationException(io.pravega.segmentstore.storage.DataLogInitializationException) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) KeeperException(org.apache.zookeeper.KeeperException) CancellationException(java.util.concurrent.CancellationException) DataLogWriterNotPrimaryException(io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException) RetriesExhaustedException(io.pravega.common.util.RetriesExhaustedException) BKException(org.apache.bookkeeper.client.BKException) WriteFailureException(io.pravega.segmentstore.storage.WriteFailureException) WriteTooLongException(io.pravega.segmentstore.storage.WriteTooLongException) DataLogInitializationException(io.pravega.segmentstore.storage.DataLogInitializationException)

Example 2 with DurableDataLogException

use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.

the class BookKeeperLog method processPendingWrites.

/**
 * Executes pending Writes to BookKeeper. This method is not thread safe and should only be invoked as part of
 * the Write Processor.
 * @return True if the no errors, false if at least one write failed.
 */
private boolean processPendingWrites() {
    long traceId = LoggerHelpers.traceEnterWithContext(log, this.traceObjectId, "processPendingWrites");
    // Clean up the write queue of all finished writes that are complete (successfully or failed for good)
    val cs = this.writes.removeFinishedWrites();
    if (cs.contains(WriteQueue.CleanupStatus.WriteFailed)) {
        // We encountered a failed write. As such, we must close immediately and not process anything else.
        // Closing will automatically cancel all pending writes.
        close();
        LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, WriteQueue.CleanupStatus.WriteFailed);
        return false;
    } else if (cs.contains(WriteQueue.CleanupStatus.QueueEmpty)) {
        // Queue is empty - nothing else to do.
        LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, WriteQueue.CleanupStatus.QueueEmpty);
        return true;
    }
    // Calculate how much estimated space there is in the current ledger.
    final long maxTotalSize = this.config.getBkLedgerMaxSize() - getWriteLedger().ledger.getLength();
    // Get the writes to execute from the queue.
    List<Write> toExecute = this.writes.getWritesToExecute(maxTotalSize);
    // appropriate, or retried).
    if (handleClosedLedgers(toExecute)) {
        // If any changes were made to the Writes in the list, re-do the search to get a more accurate list of Writes
        // to execute (since some may have changed Ledgers, more writes may not be eligible for execution).
        toExecute = this.writes.getWritesToExecute(maxTotalSize);
    }
    // Execute the writes.
    log.debug("{}: Executing {} writes.", this.traceObjectId, toExecute.size());
    for (int i = 0; i < toExecute.size(); i++) {
        Write w = toExecute.get(i);
        try {
            // Record the beginning of a new attempt.
            int attemptCount = w.beginAttempt();
            if (attemptCount > this.config.getMaxWriteAttempts()) {
                // Retried too many times.
                throw new RetriesExhaustedException(w.getFailureCause());
            }
            // Invoke the BookKeeper write.
            w.getWriteLedger().ledger.asyncAddEntry(w.data.array(), w.data.arrayOffset(), w.data.getLength(), this::addCallback, w);
        } catch (Throwable ex) {
            // Synchronous failure (or RetriesExhausted). Fail current write.
            boolean isFinal = !isRetryable(ex);
            w.fail(ex, isFinal);
            // And fail all remaining writes as well.
            for (int j = i + 1; j < toExecute.size(); j++) {
                toExecute.get(j).fail(new DurableDataLogException("Previous write failed.", ex), isFinal);
            }
            LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, i);
            return false;
        }
    }
    // After every run where we did write, check if need to trigger a rollover.
    this.rolloverProcessor.runAsync();
    LoggerHelpers.traceLeave(log, this.traceObjectId, "processPendingWrites", traceId, toExecute.size());
    return true;
}
Also used : lombok.val(lombok.val) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) RetriesExhaustedException(io.pravega.common.util.RetriesExhaustedException)

Example 3 with DurableDataLogException

use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.

the class DurableLogTests method testRecoveryFailures.

/**
 * Tests the DurableLog recovery process in a scenario when there are failures during the process
 * (these may or may not be DataCorruptionExceptions).
 */
@Test
public void testRecoveryFailures() throws Exception {
    int streamSegmentCount = 50;
    int appendsPerStreamSegment = 20;
    // Fail DataLog reads after X reads.
    int failReadAfter = 2;
    // Setup a DurableLog and start it.
    AtomicReference<TestDurableDataLog> dataLog = new AtomicReference<>();
    @Cleanup TestDurableDataLogFactory dataLogFactory = new TestDurableDataLogFactory(new InMemoryDurableDataLogFactory(MAX_DATA_LOG_APPEND_SIZE, executorService()), dataLog::set);
    @Cleanup Storage storage = InMemoryStorageFactory.newStorage(executorService());
    storage.initialize(1);
    HashSet<Long> streamSegmentIds;
    List<OperationWithCompletion> completionFutures;
    // First DurableLog. We use this for generating data.
    UpdateableContainerMetadata metadata = new MetadataBuilder(CONTAINER_ID).build();
    @Cleanup InMemoryCacheFactory cacheFactory = new InMemoryCacheFactory();
    @Cleanup CacheManager cacheManager = new CacheManager(DEFAULT_READ_INDEX_CONFIG.getCachePolicy(), executorService());
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        durableLog.startAsync().awaitRunning();
        // Generate some test data (we need to do this after we started the DurableLog because in the process of
        // recovery, it wipes away all existing metadata).
        streamSegmentIds = createStreamSegmentsWithOperations(streamSegmentCount, metadata, durableLog, storage);
        List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
        // Process all generated operations and wait for them to complete
        completionFutures = processOperations(operations, durableLog);
        OperationWithCompletion.allOf(completionFutures).join();
        // Stop the processor.
        durableLog.stopAsync().awaitTerminated();
    }
    // Recovery failure due to DataLog Failures.
    metadata = new MetadataBuilder(CONTAINER_ID).build();
    dataLog.set(null);
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        // Inject some artificial error into the DataLogRead after a few reads.
        ErrorInjector<Exception> readNextInjector = new ErrorInjector<>(count -> count > failReadAfter, () -> new DataLogNotAvailableException("intentional"));
        dataLog.get().setReadErrorInjectors(null, readNextInjector);
        // Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
        // multiple layers, so we need to dig deep into it.
        AssertExtensions.assertThrows("Recovery did not fail properly when expecting DurableDataLogException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
            if (ex instanceof IllegalStateException) {
                ex = ex.getCause();
            }
            if (ex == null) {
                try {
                    // We need this to enter a FAILED state to get its failure cause.
                    durableLog.awaitTerminated();
                } catch (Exception ex2) {
                    ex = durableLog.failureCause();
                }
            }
            ex = Exceptions.unwrap(ex);
            return ex instanceof DataLogNotAvailableException && ex.getMessage().equals("intentional");
        });
    }
    // Recovery failure due to DataCorruptionException.
    metadata = new MetadataBuilder(CONTAINER_ID).build();
    dataLog.set(null);
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, cacheFactory, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        // Reset error injectors to nothing.
        dataLog.get().setReadErrorInjectors(null, null);
        AtomicInteger readCounter = new AtomicInteger();
        dataLog.get().setReadInterceptor(readItem -> {
            if (readCounter.incrementAndGet() > failReadAfter && readItem.getLength() > DataFrame.MIN_ENTRY_LENGTH_NEEDED) {
                // Mangle with the payload and overwrite its contents with a DataFrame having a bogus
                // previous sequence number.
                DataFrame df = DataFrame.ofSize(readItem.getLength());
                df.seal();
                ArrayView serialization = df.getData();
                return new InjectedReadItem(serialization.getReader(), serialization.getLength(), readItem.getAddress());
            }
            return readItem;
        });
        // Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
        // multiple layers, so we need to dig deep into it.
        AssertExtensions.assertThrows("Recovery did not fail properly when expecting DataCorruptionException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
            if (ex instanceof IllegalStateException) {
                ex = ex.getCause();
            }
            return Exceptions.unwrap(ex) instanceof DataCorruptionException;
        });
        // Verify that the underlying DurableDataLog has been disabled.
        val disabledDataLog = dataLogFactory.createDurableDataLog(CONTAINER_ID);
        AssertExtensions.assertThrows("DurableDataLog has not been disabled following a recovery failure with DataCorruptionException.", () -> disabledDataLog.initialize(TIMEOUT), ex -> ex instanceof DataLogDisabledException);
    }
}
Also used : TestDurableDataLog(io.pravega.segmentstore.server.TestDurableDataLog) UpdateableContainerMetadata(io.pravega.segmentstore.server.UpdateableContainerMetadata) StorageMetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.StorageMetadataCheckpointOperation) ProbeOperation(io.pravega.segmentstore.server.logs.operations.ProbeOperation) Operation(io.pravega.segmentstore.server.logs.operations.Operation) StreamSegmentMapOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentMapOperation) MetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.MetadataCheckpointOperation) StorageOperation(io.pravega.segmentstore.server.logs.operations.StorageOperation) StreamSegmentAppendOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentAppendOperation) Cleanup(lombok.Cleanup) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) CacheManager(io.pravega.segmentstore.server.reading.CacheManager) lombok.val(lombok.val) MetadataBuilder(io.pravega.segmentstore.server.MetadataBuilder) ErrorInjector(io.pravega.test.common.ErrorInjector) ContainerReadIndex(io.pravega.segmentstore.server.reading.ContainerReadIndex) ReadIndex(io.pravega.segmentstore.server.ReadIndex) InMemoryCacheFactory(io.pravega.segmentstore.storage.mocks.InMemoryCacheFactory) AtomicReference(java.util.concurrent.atomic.AtomicReference) InMemoryDurableDataLogFactory(io.pravega.segmentstore.storage.mocks.InMemoryDurableDataLogFactory) ContainerReadIndex(io.pravega.segmentstore.server.reading.ContainerReadIndex) StreamSegmentNotExistsException(io.pravega.segmentstore.contracts.StreamSegmentNotExistsException) TimeoutException(java.util.concurrent.TimeoutException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) StreamSegmentSealedException(io.pravega.segmentstore.contracts.StreamSegmentSealedException) ContainerOfflineException(io.pravega.segmentstore.server.ContainerOfflineException) CompletionException(java.util.concurrent.CompletionException) DataLogWriterNotPrimaryException(io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException) StreamSegmentException(io.pravega.segmentstore.contracts.StreamSegmentException) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) IntentionalException(io.pravega.test.common.IntentionalException) IOException(java.io.IOException) DataCorruptionException(io.pravega.segmentstore.server.DataCorruptionException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) Storage(io.pravega.segmentstore.storage.Storage) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestDurableDataLogFactory(io.pravega.segmentstore.server.TestDurableDataLogFactory) ArrayView(io.pravega.common.util.ArrayView) DataCorruptionException(io.pravega.segmentstore.server.DataCorruptionException) Test(org.junit.Test)

Example 4 with DurableDataLogException

use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.

the class DurableLogTests method testAddWithDataLogFailures.

/**
 * Tests the ability of the DurableLog to process Operations when there are DataLog write failures.
 */
@Test
public void testAddWithDataLogFailures() throws Exception {
    int streamSegmentCount = 10;
    int appendsPerStreamSegment = 80;
    // Fail (asynchronously) after X DataFrame commits (to DataLog).
    int failAsyncAfter = 5;
    // Setup a DurableLog and start it.
    @Cleanup ContainerSetup setup = new ContainerSetup(executorService());
    @Cleanup DurableLog durableLog = setup.createDurableLog();
    durableLog.startAsync().awaitRunning();
    Assert.assertNotNull("Internal error: could not grab a pointer to the created TestDurableDataLog.", setup.dataLog.get());
    // Generate some test data (we need to do this after we started the DurableLog because in the process of
    // recovery, it wipes away all existing metadata).
    HashSet<Long> streamSegmentIds = createStreamSegmentsInMetadata(streamSegmentCount, setup.metadata);
    List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
    ErrorInjector<Exception> aSyncErrorInjector = new ErrorInjector<>(count -> count >= failAsyncAfter, () -> new DurableDataLogException("intentional"));
    setup.dataLog.get().setAppendErrorInjectors(null, aSyncErrorInjector);
    // Process all generated operations.
    List<OperationWithCompletion> completionFutures = processOperations(operations, durableLog);
    // Wait for all such operations to complete. We are expecting exceptions, so verify that we do.
    AssertExtensions.assertThrows("No operations failed.", OperationWithCompletion.allOf(completionFutures)::join, super::isExpectedExceptionForNonDataCorruption);
    // Wait for the DurableLog to shutdown with failure.
    ServiceListeners.awaitShutdown(durableLog, TIMEOUT, false);
    Assert.assertEquals("Expected the DurableLog to fail after DurableDataLogException encountered.", Service.State.FAILED, durableLog.state());
    // We can't really check the DurableLog or the DurableDataLog contents since they are both closed.
    performMetadataChecks(streamSegmentIds, new HashSet<>(), new HashMap<>(), completionFutures, setup.metadata, false, false);
    performReadIndexChecks(completionFutures, setup.readIndex);
}
Also used : ErrorInjector(io.pravega.test.common.ErrorInjector) StorageMetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.StorageMetadataCheckpointOperation) ProbeOperation(io.pravega.segmentstore.server.logs.operations.ProbeOperation) Operation(io.pravega.segmentstore.server.logs.operations.Operation) StreamSegmentMapOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentMapOperation) MetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.MetadataCheckpointOperation) StorageOperation(io.pravega.segmentstore.server.logs.operations.StorageOperation) StreamSegmentAppendOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentAppendOperation) Cleanup(lombok.Cleanup) StreamSegmentNotExistsException(io.pravega.segmentstore.contracts.StreamSegmentNotExistsException) TimeoutException(java.util.concurrent.TimeoutException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) StreamSegmentSealedException(io.pravega.segmentstore.contracts.StreamSegmentSealedException) ContainerOfflineException(io.pravega.segmentstore.server.ContainerOfflineException) CompletionException(java.util.concurrent.CompletionException) DataLogWriterNotPrimaryException(io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException) StreamSegmentException(io.pravega.segmentstore.contracts.StreamSegmentException) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) IntentionalException(io.pravega.test.common.IntentionalException) IOException(java.io.IOException) DataCorruptionException(io.pravega.segmentstore.server.DataCorruptionException) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) Test(org.junit.Test)

Example 5 with DurableDataLogException

use of io.pravega.segmentstore.storage.DurableDataLogException in project pravega by pravega.

the class DataFrameInputStream method fetchNextEntry.

private void fetchNextEntry() throws IOException {
    Exceptions.checkNotClosed(this.closed, this);
    if (this.prefetchedEntry) {
        assert this.currentEntry != null : "prefetchEntry==true, but currentEntry==null";
        this.prefetchedEntry = false;
        return;
    }
    while (!this.closed) {
        DataFrame.DataFrameEntry nextEntry;
        try {
            nextEntry = getNextFrameEntry();
        } catch (DurableDataLogException ex) {
            throw new IOException(ex);
        }
        if (nextEntry == null) {
            // 'null' means no more entries (or frames). Since we are still in the while loop, it means we were in the
            // middle of an entry that hasn't been fully committed. We need to discard it and mark the end of the
            // DataFrameInputStream.
            close();
            throw new NoMoreRecordsException();
        }
        if (nextEntry.isFirstRecordEntry()) {
            // unable to write the rest of it.
            if (this.currentEntry != null && !this.currentEntry.isLastRecordEntry()) {
                resetContext();
                setCurrentFrameEntry(nextEntry);
                this.prefetchedEntry = true;
                throw new RecordResetException();
            }
        } else if (this.currentEntry == null) {
            // In this case, this entry is garbage, so it should be skipped.
            if (this.hasReadAnyData) {
                // this in the middle of a log, we very likely have some sort of corruption.
                throw new SerializationException(String.format("Found a DataFrameRecord which is not marked as " + "'First Record Entry', but no active record is being read. DataFrameAddress = %s", nextEntry.getFrameAddress()));
            }
            continue;
        }
        setCurrentFrameEntry(nextEntry);
        break;
    }
}
Also used : DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) SerializationException(io.pravega.common.io.SerializationException) IOException(java.io.IOException)

Aggregations

DurableDataLogException (io.pravega.segmentstore.storage.DurableDataLogException)13 lombok.val (lombok.val)8 DataLogDisabledException (io.pravega.segmentstore.storage.DataLogDisabledException)5 DataLogWriterNotPrimaryException (io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException)5 IOException (java.io.IOException)5 RetriesExhaustedException (io.pravega.common.util.RetriesExhaustedException)4 DataLogNotAvailableException (io.pravega.segmentstore.storage.DataLogNotAvailableException)4 CancellationException (java.util.concurrent.CancellationException)4 ObjectClosedException (io.pravega.common.ObjectClosedException)3 StreamSegmentException (io.pravega.segmentstore.contracts.StreamSegmentException)3 StreamSegmentNotExistsException (io.pravega.segmentstore.contracts.StreamSegmentNotExistsException)3 StreamSegmentSealedException (io.pravega.segmentstore.contracts.StreamSegmentSealedException)3 Operation (io.pravega.segmentstore.server.logs.operations.Operation)3 ProbeOperation (io.pravega.segmentstore.server.logs.operations.ProbeOperation)3 StorageOperation (io.pravega.segmentstore.server.logs.operations.StorageOperation)3 StreamSegmentAppendOperation (io.pravega.segmentstore.server.logs.operations.StreamSegmentAppendOperation)3 DurableDataLog (io.pravega.segmentstore.storage.DurableDataLog)3 ErrorInjector (io.pravega.test.common.ErrorInjector)3 IntentionalException (io.pravega.test.common.IntentionalException)3 CompletionException (java.util.concurrent.CompletionException)3