Search in sources :

Example 1 with DataLogDisabledException

use of io.pravega.segmentstore.storage.DataLogDisabledException in project pravega by pravega.

the class DurableLogTests method testRecoveryFailures.

/**
 * Tests the DurableLog recovery process in a scenario when there are failures during the process
 * (these may or may not be DataCorruptionExceptions).
 */
@Test
public void testRecoveryFailures() throws Exception {
    int streamSegmentCount = 50;
    int appendsPerStreamSegment = 20;
    // Fail DataLog reads after X reads.
    int failReadAfter = 2;
    // Setup a DurableLog and start it.
    AtomicReference<TestDurableDataLog> dataLog = new AtomicReference<>();
    @Cleanup TestDurableDataLogFactory dataLogFactory = new TestDurableDataLogFactory(new InMemoryDurableDataLogFactory(MAX_DATA_LOG_APPEND_SIZE, executorService()), dataLog::set);
    @Cleanup Storage storage = InMemoryStorageFactory.newStorage(executorService());
    storage.initialize(1);
    Set<Long> streamSegmentIds;
    List<OperationWithCompletion> completionFutures;
    // First DurableLog. We use this for generating data.
    UpdateableContainerMetadata metadata = new MetadataBuilder(CONTAINER_ID).build();
    @Cleanup CacheStorage cacheStorage = new DirectMemoryCache(Integer.MAX_VALUE);
    @Cleanup CacheManager cacheManager = new CacheManager(CachePolicy.INFINITE, cacheStorage, executorService());
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        durableLog.startAsync().awaitRunning();
        // Generate some test data (we need to do this after we started the DurableLog because in the process of
        // recovery, it wipes away all existing metadata).
        streamSegmentIds = createStreamSegmentsWithOperations(streamSegmentCount, durableLog);
        List<Operation> operations = generateOperations(streamSegmentIds, new HashMap<>(), appendsPerStreamSegment, METADATA_CHECKPOINT_EVERY, false, false);
        // Process all generated operations and wait for them to complete
        completionFutures = processOperations(operations, durableLog);
        OperationWithCompletion.allOf(completionFutures).join();
        // Stop the processor.
        durableLog.stopAsync().awaitTerminated();
    }
    // Recovery failure due to DataLog Failures.
    metadata = new MetadataBuilder(CONTAINER_ID).build();
    dataLog.set(null);
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        // Inject some artificial error into the DataLogRead after a few reads.
        ErrorInjector<Exception> readNextInjector = new ErrorInjector<>(count -> count > failReadAfter, () -> new DataLogNotAvailableException("intentional"));
        dataLog.get().setReadErrorInjectors(null, readNextInjector);
        // Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
        // multiple layers, so we need to dig deep into it.
        AssertExtensions.assertThrows("Recovery did not fail properly when expecting DurableDataLogException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
            if (ex instanceof IllegalStateException) {
                ex = ex.getCause();
            }
            if (ex == null) {
                try {
                    // We need this to enter a FAILED state to get its failure cause.
                    durableLog.awaitTerminated();
                } catch (Exception ex2) {
                    ex = durableLog.failureCause();
                }
            }
            ex = Exceptions.unwrap(ex);
            return ex instanceof DataLogNotAvailableException && ex.getMessage().equals("intentional");
        });
    }
    // Recovery failure due to DataCorruptionException.
    metadata = new MetadataBuilder(CONTAINER_ID).build();
    dataLog.set(null);
    try (ReadIndex readIndex = new ContainerReadIndex(DEFAULT_READ_INDEX_CONFIG, metadata, storage, cacheManager, executorService());
        DurableLog durableLog = new DurableLog(ContainerSetup.defaultDurableLogConfig(), metadata, dataLogFactory, readIndex, executorService())) {
        // Reset error injectors to nothing.
        dataLog.get().setReadErrorInjectors(null, null);
        AtomicInteger readCounter = new AtomicInteger();
        dataLog.get().setReadInterceptor(readItem -> {
            if (readCounter.incrementAndGet() > failReadAfter && readItem.getLength() > DataFrame.MIN_ENTRY_LENGTH_NEEDED) {
                // Mangle with the payload and overwrite its contents with a DataFrame having a bogus
                // previous sequence number.
                DataFrame df = DataFrame.ofSize(readItem.getLength());
                df.seal();
                CompositeArrayView serialization = df.getData();
                return new InjectedReadItem(serialization.getReader(), serialization.getLength(), readItem.getAddress());
            }
            return readItem;
        });
        // Verify the exception thrown from startAsync() is of the right kind. This exception will be wrapped in
        // multiple layers, so we need to dig deep into it.
        AssertExtensions.assertThrows("Recovery did not fail properly when expecting DataCorruptionException.", () -> durableLog.startAsync().awaitRunning(), ex -> {
            if (ex instanceof IllegalStateException) {
                ex = ex.getCause();
            }
            return Exceptions.unwrap(ex) instanceof DataCorruptionException;
        });
        // Verify that the underlying DurableDataLog has been disabled.
        val disabledDataLog = dataLogFactory.createDurableDataLog(CONTAINER_ID);
        AssertExtensions.assertThrows("DurableDataLog has not been disabled following a recovery failure with DataCorruptionException.", () -> disabledDataLog.initialize(TIMEOUT), ex -> ex instanceof DataLogDisabledException);
    }
}
Also used : DirectMemoryCache(io.pravega.segmentstore.storage.cache.DirectMemoryCache) TestDurableDataLog(io.pravega.segmentstore.server.TestDurableDataLog) CompositeArrayView(io.pravega.common.util.CompositeArrayView) UpdateableContainerMetadata(io.pravega.segmentstore.server.UpdateableContainerMetadata) StorageMetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.StorageMetadataCheckpointOperation) MergeSegmentOperation(io.pravega.segmentstore.server.logs.operations.MergeSegmentOperation) Operation(io.pravega.segmentstore.server.logs.operations.Operation) StreamSegmentMapOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentMapOperation) MetadataCheckpointOperation(io.pravega.segmentstore.server.logs.operations.MetadataCheckpointOperation) CachedStreamSegmentAppendOperation(io.pravega.segmentstore.server.logs.operations.CachedStreamSegmentAppendOperation) StorageOperation(io.pravega.segmentstore.server.logs.operations.StorageOperation) StreamSegmentAppendOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentAppendOperation) DeleteSegmentOperation(io.pravega.segmentstore.server.logs.operations.DeleteSegmentOperation) StreamSegmentSealOperation(io.pravega.segmentstore.server.logs.operations.StreamSegmentSealOperation) Cleanup(lombok.Cleanup) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) CacheManager(io.pravega.segmentstore.server.CacheManager) CacheStorage(io.pravega.segmentstore.storage.cache.CacheStorage) lombok.val(lombok.val) MetadataBuilder(io.pravega.segmentstore.server.MetadataBuilder) ErrorInjector(io.pravega.test.common.ErrorInjector) ContainerReadIndex(io.pravega.segmentstore.server.reading.ContainerReadIndex) ReadIndex(io.pravega.segmentstore.server.ReadIndex) AtomicReference(java.util.concurrent.atomic.AtomicReference) InMemoryDurableDataLogFactory(io.pravega.segmentstore.storage.mocks.InMemoryDurableDataLogFactory) ContainerReadIndex(io.pravega.segmentstore.server.reading.ContainerReadIndex) StreamSegmentNotExistsException(io.pravega.segmentstore.contracts.StreamSegmentNotExistsException) TimeoutException(java.util.concurrent.TimeoutException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) StreamSegmentSealedException(io.pravega.segmentstore.contracts.StreamSegmentSealedException) ContainerOfflineException(io.pravega.segmentstore.server.ContainerOfflineException) CompletionException(java.util.concurrent.CompletionException) DataLogWriterNotPrimaryException(io.pravega.segmentstore.storage.DataLogWriterNotPrimaryException) StreamSegmentException(io.pravega.segmentstore.contracts.StreamSegmentException) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException) IntentionalException(io.pravega.test.common.IntentionalException) IOException(java.io.IOException) DataCorruptionException(io.pravega.segmentstore.server.DataCorruptionException) DataLogNotAvailableException(io.pravega.segmentstore.storage.DataLogNotAvailableException) Storage(io.pravega.segmentstore.storage.Storage) CacheStorage(io.pravega.segmentstore.storage.cache.CacheStorage) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestDurableDataLogFactory(io.pravega.segmentstore.server.TestDurableDataLogFactory) DataCorruptionException(io.pravega.segmentstore.server.DataCorruptionException) Test(org.junit.Test)

Example 2 with DataLogDisabledException

use of io.pravega.segmentstore.storage.DataLogDisabledException in project pravega by pravega.

the class BookKeeperLog method initialize.

// endregion
// region DurableDataLog Implementation
/**
 * Open-Fences this BookKeeper log using the following protocol:
 * 1. Read Log Metadata from ZooKeeper.
 * 2. Fence at least the last 2 ledgers in the Ledger List.
 * 3. Create a new Ledger.
 * 3.1 If any of the steps so far fails, the process is interrupted at the point of failure, and no cleanup is attempted.
 * 4. Update Log Metadata using compare-and-set (this update contains the new ledger and new epoch).
 * 4.1 If CAS fails on metadata update, the newly created Ledger is deleted (this means we were fenced out by some
 * other instance) and no other update is performed.
 *
 * @param timeout Timeout for the operation.
 * @throws DataLogWriterNotPrimaryException If we were fenced-out during this process.
 * @throws DataLogNotAvailableException     If BookKeeper or ZooKeeper are not available.
 * @throws DataLogDisabledException         If the BookKeeperLog is disabled. No fencing is attempted in this case.
 * @throws DataLogInitializationException   If a general initialization error occurred.
 * @throws DurableDataLogException          If another type of exception occurred.
 */
@Override
public void initialize(Duration timeout) throws DurableDataLogException {
    List<Long> ledgersToDelete;
    LogMetadata newMetadata;
    synchronized (this.lock) {
        Preconditions.checkState(this.writeLedger == null, "BookKeeperLog is already initialized.");
        assert this.logMetadata == null : "writeLedger == null but logMetadata != null";
        // Get metadata about the current state of the log, if any.
        LogMetadata oldMetadata = loadMetadata();
        if (oldMetadata != null) {
            if (!oldMetadata.isEnabled()) {
                throw new DataLogDisabledException("BookKeeperLog is disabled. Cannot initialize.");
            }
            // Fence out ledgers.
            val emptyLedgerIds = Ledgers.fenceOut(oldMetadata.getLedgers(), this.bookKeeper, this.config, this.traceObjectId);
            // Update Metadata to reflect those newly found empty ledgers.
            oldMetadata = oldMetadata.updateLedgerStatus(emptyLedgerIds);
        }
        // Create new ledger.
        WriteHandle newLedger = Ledgers.create(this.bookKeeper, this.config, this.logId);
        log.info("{}: Created Ledger {}.", this.traceObjectId, newLedger.getId());
        // Update Metadata with new Ledger and persist to ZooKeeper.
        newMetadata = updateMetadata(oldMetadata, newLedger, true);
        LedgerMetadata ledgerMetadata = newMetadata.getLedger(newLedger.getId());
        assert ledgerMetadata != null : "cannot find newly added ledger metadata";
        this.writeLedger = new WriteLedger(newLedger, ledgerMetadata);
        this.logMetadata = newMetadata;
        ledgersToDelete = getLedgerIdsToDelete(oldMetadata, newMetadata);
    }
    // Delete the orphaned ledgers from BookKeeper.
    ledgersToDelete.forEach(id -> {
        try {
            Ledgers.delete(id, this.bookKeeper);
            log.info("{}: Deleted orphan empty ledger {}.", this.traceObjectId, id);
        } catch (DurableDataLogException ex) {
            // A failure here has no effect on the initialization of BookKeeperLog. In this case, the (empty) Ledger
            // will remain in BookKeeper until manually deleted by a cleanup tool.
            log.warn("{}: Unable to delete orphan empty ledger {}.", this.traceObjectId, id, ex);
        }
    });
    log.info("{}: Initialized (Epoch = {}, UpdateVersion = {}).", this.traceObjectId, newMetadata.getEpoch(), newMetadata.getUpdateVersion());
}
Also used : lombok.val(lombok.val) DurableDataLogException(io.pravega.segmentstore.storage.DurableDataLogException) WriteHandle(org.apache.bookkeeper.client.api.WriteHandle) DataLogDisabledException(io.pravega.segmentstore.storage.DataLogDisabledException)

Aggregations

DataLogDisabledException (io.pravega.segmentstore.storage.DataLogDisabledException)2 DurableDataLogException (io.pravega.segmentstore.storage.DurableDataLogException)2 lombok.val (lombok.val)2 CompositeArrayView (io.pravega.common.util.CompositeArrayView)1 StreamSegmentException (io.pravega.segmentstore.contracts.StreamSegmentException)1 StreamSegmentNotExistsException (io.pravega.segmentstore.contracts.StreamSegmentNotExistsException)1 StreamSegmentSealedException (io.pravega.segmentstore.contracts.StreamSegmentSealedException)1 CacheManager (io.pravega.segmentstore.server.CacheManager)1 ContainerOfflineException (io.pravega.segmentstore.server.ContainerOfflineException)1 DataCorruptionException (io.pravega.segmentstore.server.DataCorruptionException)1 MetadataBuilder (io.pravega.segmentstore.server.MetadataBuilder)1 ReadIndex (io.pravega.segmentstore.server.ReadIndex)1 TestDurableDataLog (io.pravega.segmentstore.server.TestDurableDataLog)1 TestDurableDataLogFactory (io.pravega.segmentstore.server.TestDurableDataLogFactory)1 UpdateableContainerMetadata (io.pravega.segmentstore.server.UpdateableContainerMetadata)1 CachedStreamSegmentAppendOperation (io.pravega.segmentstore.server.logs.operations.CachedStreamSegmentAppendOperation)1 DeleteSegmentOperation (io.pravega.segmentstore.server.logs.operations.DeleteSegmentOperation)1 MergeSegmentOperation (io.pravega.segmentstore.server.logs.operations.MergeSegmentOperation)1 MetadataCheckpointOperation (io.pravega.segmentstore.server.logs.operations.MetadataCheckpointOperation)1 Operation (io.pravega.segmentstore.server.logs.operations.Operation)1