Search in sources :

Example 56 with LogSequenceNumber

use of herddb.log.LogSequenceNumber in project herddb by diennea.

the class TableManager method checkpoint.

/**
 * @param sequenceNumber
 * @param dirtyThreshold
 * @param fillThreshold
 * @param checkpointTargetTime checkpoint target max milliseconds
 * @param cleanupTargetTime    cleanup target max milliseconds
 * @param compactionTargetTime compaction target max milliseconds
 * @return
 * @throws DataStorageManagerException
 */
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long cleanupTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
    LOGGER.log(Level.FINE, "tableCheckpoint dirtyThreshold: " + dirtyThreshold + ", {0}.{1} (pin: {2})", new Object[] { tableSpaceUUID, table.name, pin });
    if (createdInTransaction > 0) {
        LOGGER.log(Level.FINE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
        return null;
    }
    final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
    final long dirtyPageThreshold = dirtyThreshold > 0 ? (long) (dirtyThreshold * maxLogicalPageSize) : -1;
    long start = System.currentTimeMillis();
    long end;
    long getlock;
    long pageAnalysis;
    long dirtyPagesFlush;
    long smallPagesFlush;
    long newPagesFlush;
    long keytopagecheckpoint;
    long indexcheckpoint;
    long tablecheckpoint;
    final List<PostCheckpointAction> actions = new ArrayList<>();
    TableCheckpoint result;
    boolean lockAcquired;
    try {
        lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
    } catch (InterruptedException err) {
        throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
    }
    if (!lockAcquired) {
        throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
    }
    try {
        LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
        getlock = System.currentTimeMillis();
        checkPointRunning = true;
        final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
        final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
        long flushedRecords = 0;
        List<CheckpointingPage> flushingDirtyPages = new ArrayList<>();
        List<CheckpointingPage> flushingSmallPages = new ArrayList<>();
        final Set<Long> flushedPages = new HashSet<>();
        int flushedDirtyPages = 0;
        int flushedSmallPages = 0;
        for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
            final Long pageId = ref.getKey();
            final DataPageMetaData metadata = ref.getValue();
            final long dirt = metadata.dirt.sum();
            /* Check dirtiness (flush here even small pages if enough dirty) */
            if (dirt > 0 && dirt >= dirtyPageThreshold) {
                flushingDirtyPages.add(new CheckpointingPage(pageId, dirt, dirt > 0));
                continue;
            }
            /* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
            if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
                flushingSmallPages.add(new CheckpointingPage(pageId, metadata.size, dirt > 0));
                continue;
            }
        }
        /* Clean dirtier first */
        flushingDirtyPages.sort(CheckpointingPage.DESCENDING_ORDER);
        /* Clean smaller first */
        flushingSmallPages.sort(CheckpointingPage.ASCENDING_ORDER);
        pageAnalysis = System.currentTimeMillis();
        /* Should currently new rebuild page kept on memory or discarded? */
        boolean keepFlushedPageInMemory = false;
        /* New page actually rebuilt */
        DataPage buildingPage = createMutablePage(nextPageId++, 0, 0);
        if (!flushingDirtyPages.isEmpty()) {
            final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(pageAnalysis, cleanupTargetTime));
            /*
                 * Do not continue if we have used up all configured cleanup or checkpoint time (but still compact
                 * at least the smaller page (normally the leftover from last checkpoint)
                 */
            CleanAndCompactResult dirtyResult = cleanAndCompactPages(flushingDirtyPages, buildingPage, keepFlushedPageInMemory, timeLimit);
            flushedDirtyPages = dirtyResult.flushedPages.size();
            flushedPages.addAll(dirtyResult.flushedPages);
            flushedRecords += dirtyResult.flushedRecords;
            keepFlushedPageInMemory = dirtyResult.keepFlushedPageInMemory;
            buildingPage = dirtyResult.buildingPage;
        }
        dirtyPagesFlush = System.currentTimeMillis();
        /* **************************** */
        /* *** Small pages handling *** */
        /* **************************** */
        /*
             * Small pages could be dirty pages too so we need to check every page if has been already handled
             * during dirty pages cleanup. Small pages should be a really small set (normally just last flushed
             * page), the filter is then no critical or heavy to require some optimization
             */
        /* Filter out dirty pages flushed from flushing small pages (a page could be "small" and "dirty") */
        flushingSmallPages = flushingSmallPages.stream().filter(wp -> !flushedPages.contains(wp.pageId)).collect(Collectors.toList());
        /*
             * If there is only one clean small page without additional data to add rebuilding the page make no
             * sense: is too probable to rebuild an identical page!
             */
        if (/* Just one small page */
        flushingSmallPages.size() == 1 && /* Not dirty */
        !flushingSmallPages.get(0).dirty && /* No spare data remaining */
        buildingPage.isEmpty() && /* No new data */
        !newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent()) {
            /* Avoid small page compaction */
            flushingSmallPages.clear();
        }
        if (!flushingSmallPages.isEmpty()) {
            final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(dirtyPagesFlush, compactionTargetTime));
            /*
                 * Do not continue if we have used up all configured compaction or checkpoint time (but still
                 * compact at least the smaller page (normally the leftover from last checkpoint)
                 */
            CleanAndCompactResult smallResult = cleanAndCompactPages(flushingSmallPages, buildingPage, keepFlushedPageInMemory, timeLimit);
            flushedSmallPages = smallResult.flushedPages.size();
            flushedPages.addAll(smallResult.flushedPages);
            flushedRecords += smallResult.flushedRecords;
            keepFlushedPageInMemory = smallResult.keepFlushedPageInMemory;
            buildingPage = smallResult.buildingPage;
        }
        smallPagesFlush = System.currentTimeMillis();
        /* ************************** */
        /* *** New pages handling *** */
        /* ************************** */
        /*
             * Retrieve the "current" new page. It can be held in memory because no writes are executed during
             * a checkpoint and thus the page cannot change (nor be flushed due to an unload because it isn't
             * known to page replacement policy)
             */
        final long lastKnownPageId = currentDirtyRecordsPage.get();
        /*
             * Flush dirty records (and remaining records from previous step).
             *
             * Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
             * Just write the pages as they are.
             *
             * New empty pages won't be written
             */
        long flushedNewPages = 0;
        for (DataPage dataPage : newPages.values()) {
            /* Flush every dirty page (but not the "current" dirty page if empty) */
            if (lastKnownPageId != dataPage.pageId || !dataPage.isEmpty()) {
                flushNewPageForCheckpoint(dataPage, buildingPage);
                ++flushedNewPages;
                flushedRecords += dataPage.size();
            }
        }
        /*
             * Flush remaining records.
             *
             * To keep or not flushed page in memory is a "best guess" here: we don't known if records that
             * needed to be kept in memory were already be flushed during newPage filling (see
             * flushNewPageForCheckpoint). So we still use keepFlushedPageInMemory (possibily true) even if
             * remaining records came from an old unused page.
             */
        if (!buildingPage.isEmpty()) {
            flushMutablePage(buildingPage, keepFlushedPageInMemory);
        } else {
            /* Remove unused empty building page from memory */
            pages.remove(buildingPage.pageId);
        }
        /*
             * Never Never Never revert unused nextPageId! Even if we didn't used booked nextPageId is better to
             * throw it away, reverting generated id could be "strange" for now but simply wrong in the future
             * (if checkpoint will permit concurrent page creation for example..)
             */
        newPagesFlush = System.currentTimeMillis();
        if (flushedDirtyPages > 0 || flushedSmallPages > 0 || flushedNewPages > 0 || flushedRecords > 0) {
            LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
        }
        /* Checkpoint the key to page too */
        actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
        keytopagecheckpoint = System.currentTimeMillis();
        /* Checkpoint secondary indexes too */
        final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
        if (indexes != null) {
            for (AbstractIndexManager indexManager : indexes.values()) {
                // Checkpoint at the same position of current TableManager
                actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
            }
        }
        indexcheckpoint = System.currentTimeMillis();
        pageSet.checkpointDone(flushedPages);
        TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.longToByteArray(nextPrimaryKeyValue.get()), nextPageId, pageSet.getActivePages());
        actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
        tablecheckpoint = System.currentTimeMillis();
        /*
             * Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
             * unloaded) due do a deletion: all pages will be removed and no page will remain alive.
             */
        if (newPages.isEmpty()) {
            /* Allocate live handles the correct policy load/unload of last dirty page */
            allocateLivePage(lastKnownPageId);
        }
        checkPointRunning = false;
        result = new TableCheckpoint(table.name, sequenceNumber, actions);
        end = System.currentTimeMillis();
        if (flushedRecords > 0) {
            LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
        }
    } finally {
        checkpointLock.asWriteLock().unlock();
    }
    long delta = end - start;
    if (delta > 1000) {
        long delta_lock = getlock - start;
        long delta_pageAnalysis = pageAnalysis - getlock;
        long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
        long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
        long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
        long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
        long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
        long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
        long delta_unload = end - tablecheckpoint;
        LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
    }
    return result;
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) ArrayList(java.util.ArrayList) LogSequenceNumber(herddb.log.LogSequenceNumber) DataPageMetaData(herddb.core.PageSet.DataPageMetaData) AtomicLong(java.util.concurrent.atomic.AtomicLong) TableStatus(herddb.storage.TableStatus) HashSet(java.util.HashSet)

Example 57 with LogSequenceNumber

use of herddb.log.LogSequenceNumber in project herddb by diennea.

the class TableManager method apply.

@Override
public void apply(CommitLogResult writeResult, LogEntry entry, boolean recovery) throws DataStorageManagerException, LogNotAvailableException {
    if (recovery) {
        if (writeResult.deferred) {
            throw new DataStorageManagerException("impossibile to have a deferred CommitLogResult during recovery");
        }
        LogSequenceNumber position = writeResult.getLogSequenceNumber();
        if (dumpLogSequenceNumber != null && !position.after(dumpLogSequenceNumber)) {
            // in "restore mode" the 'position" parameter is from the 'old' transaction log
            Transaction transaction = null;
            if (entry.transactionId > 0) {
                transaction = tableSpaceManager.getTransaction(entry.transactionId);
            }
            if (transaction != null) {
                transaction.touch();
                LOGGER.log(Level.FINER, "{0}.{1} keep {2} at {3}, table restored from position {4}, it belongs to transaction {5} which was in progress during the dump of the table", new Object[] { table.tablespace, table.name, entry, position, dumpLogSequenceNumber, entry.transactionId });
            } else {
                LOGGER.log(Level.FINER, "{0}.{1} skip {2} at {3}, table restored from position {4}", new Object[] { table.tablespace, table.name, entry, position, dumpLogSequenceNumber });
                return;
            }
        } else if (!position.after(bootSequenceNumber)) {
            // recovery mode
            Transaction transaction = null;
            if (entry.transactionId > 0) {
                transaction = tableSpaceManager.getTransaction(entry.transactionId);
            }
            if (transaction != null) {
                transaction.touch();
                LOGGER.log(Level.FINER, "{0}.{1} keep {2} at {3}, table booted at {4}, it belongs to transaction {5} which was in progress during the flush of the table", new Object[] { table.tablespace, table.name, entry, position, bootSequenceNumber, entry.transactionId });
            } else {
                LOGGER.log(Level.FINER, "{0}.{1} skip {2} at {3}, table booted at {4}", new Object[] { table.tablespace, table.name, entry, position, bootSequenceNumber });
                return;
            }
        }
    }
    if (writeResult.sync) {
        // wait for data to be stored to log
        writeResult.getLogSequenceNumber();
    }
    switch(entry.type) {
        case LogEntryType.DELETE:
            {
                // remove the record from the set of existing records
                Bytes key = entry.key;
                if (entry.transactionId > 0) {
                    Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
                    if (transaction == null) {
                        /* Ignore missing transaction only if during recovery and ignore property is active */
                        if (recovery && ignoreMissingTransactionsOnRecovery) {
                            LOGGER.log(Level.WARNING, "Ignoring delete of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
                        } else {
                            throw new DataStorageManagerException("no such transaction " + entry.transactionId);
                        }
                    } else {
                        transaction.registerDeleteOnTable(this.table.name, key, writeResult);
                    }
                } else {
                    applyDelete(key);
                }
                break;
            }
        case LogEntryType.UPDATE:
            {
                Bytes key = entry.key;
                Bytes value = entry.value;
                if (entry.transactionId > 0) {
                    Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
                    if (transaction == null) {
                        /* Ignore missing transaction only if during recovery and ignore property is active */
                        if (recovery && ignoreMissingTransactionsOnRecovery) {
                            LOGGER.log(Level.WARNING, "Ignoring update of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
                        } else {
                            throw new DataStorageManagerException("no such transaction " + entry.transactionId);
                        }
                    } else {
                        transaction.registerRecordUpdate(this.table.name, key, value, writeResult);
                    }
                } else {
                    applyUpdate(key, value);
                }
                break;
            }
        case LogEntryType.INSERT:
            {
                Bytes key = entry.key;
                Bytes value = entry.value;
                if (entry.transactionId > 0) {
                    Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
                    if (transaction == null) {
                        /* Ignore missing transaction only if during recovery and ignore property is active */
                        if (recovery && ignoreMissingTransactionsOnRecovery) {
                            LOGGER.log(Level.WARNING, "Ignoring insert of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
                        } else {
                            throw new DataStorageManagerException("no such transaction " + entry.transactionId);
                        }
                    } else {
                        transaction.registerInsertOnTable(table.name, key, value, writeResult);
                    }
                } else {
                    applyInsert(key, value, false);
                }
                break;
            }
        case LogEntryType.TRUNCATE_TABLE:
            {
                applyTruncate();
            }
            break;
        default:
            throw new IllegalArgumentException("unhandled entry type " + entry.type);
    }
}
Also used : Bytes(herddb.utils.Bytes) DataStorageManagerException(herddb.storage.DataStorageManagerException) Transaction(herddb.model.Transaction) LogSequenceNumber(herddb.log.LogSequenceNumber)

Example 58 with LogSequenceNumber

use of herddb.log.LogSequenceNumber in project herddb by diennea.

the class TableSpaceManager method apply.

void apply(CommitLogResult position, LogEntry entry, boolean recovery) throws DataStorageManagerException, DDLException {
    if (!position.deferred || position.sync) {
        // this will wait for the write to be acknowledged by the log
        // it can throw LogNotAvailableException
        this.actualLogSequenceNumber = position.getLogSequenceNumber();
        if (LOGGER.isLoggable(Level.FINEST)) {
            LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position.getLogSequenceNumber(), entry });
        }
    } else {
        if (LOGGER.isLoggable(Level.FINEST)) {
            LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position, entry });
        }
    }
    switch(entry.type) {
        case LogEntryType.NOOP:
            {
            // NOOP
            }
            break;
        case LogEntryType.BEGINTRANSACTION:
            {
                long id = entry.transactionId;
                Transaction transaction = new Transaction(id, tableSpaceName, position);
                transactions.put(id, transaction);
            }
            break;
        case LogEntryType.ROLLBACKTRANSACTION:
            {
                long id = entry.transactionId;
                Transaction transaction = transactions.get(id);
                if (transaction == null) {
                    throw new DataStorageManagerException("invalid transaction id " + id + ", only " + transactions.keySet());
                }
                List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
                for (AbstractIndexManager indexManager : indexManagers) {
                    if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
                        indexManager.onTransactionRollback(transaction);
                    }
                }
                List<AbstractTableManager> managers = new ArrayList<>(tables.values());
                for (AbstractTableManager manager : managers) {
                    if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
                        Table table = manager.getTable();
                        if (transaction.isNewTable(table.name)) {
                            LOGGER.log(Level.INFO, "rollback CREATE TABLE " + table.tablespace + "." + table.name);
                            disposeTable(manager);
                            Map<String, AbstractIndexManager> indexes = indexesByTable.remove(manager.getTable().name);
                            if (indexes != null) {
                                for (AbstractIndexManager indexManager : indexes.values()) {
                                    disposeIndexManager(indexManager);
                                }
                            }
                        } else {
                            manager.onTransactionRollback(transaction);
                        }
                    }
                }
                transactions.remove(transaction.transactionId);
            }
            break;
        case LogEntryType.COMMITTRANSACTION:
            {
                long id = entry.transactionId;
                Transaction transaction = transactions.get(id);
                if (transaction == null) {
                    throw new DataStorageManagerException("invalid transaction id " + id);
                }
                LogSequenceNumber commit = position.getLogSequenceNumber();
                transaction.sync(commit);
                List<AbstractTableManager> managers = new ArrayList<>(tables.values());
                for (AbstractTableManager manager : managers) {
                    if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
                        manager.onTransactionCommit(transaction, recovery);
                    }
                }
                List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
                for (AbstractIndexManager indexManager : indexManagers) {
                    if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
                        indexManager.onTransactionCommit(transaction, recovery);
                    }
                }
                if ((transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
                    if (transaction.droppedTables != null) {
                        for (String dropped : transaction.droppedTables) {
                            for (AbstractTableManager manager : managers) {
                                if (manager.getTable().name.equals(dropped)) {
                                    disposeTable(manager);
                                }
                            }
                        }
                    }
                    if (transaction.droppedIndexes != null) {
                        for (String dropped : transaction.droppedIndexes) {
                            for (AbstractIndexManager manager : indexManagers) {
                                if (manager.getIndex().name.equals(dropped)) {
                                    disposeIndexManager(manager);
                                }
                            }
                        }
                    }
                }
                if ((transaction.newTables != null && !transaction.newTables.isEmpty()) || (transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.newIndexes != null && !transaction.newIndexes.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
                    writeTablesOnDataStorageManager(position, false);
                    dbmanager.getPlanner().clearCache();
                }
                transactions.remove(transaction.transactionId);
            }
            break;
        case LogEntryType.CREATE_TABLE:
            {
                Table table = Table.deserialize(entry.value.to_array());
                if (entry.transactionId > 0) {
                    long id = entry.transactionId;
                    Transaction transaction = transactions.get(id);
                    transaction.registerNewTable(table, position);
                }
                bootTable(table, entry.transactionId, null, true);
                if (entry.transactionId <= 0) {
                    writeTablesOnDataStorageManager(position, false);
                }
            }
            break;
        case LogEntryType.CREATE_INDEX:
            {
                Index index = Index.deserialize(entry.value.to_array());
                if (entry.transactionId > 0) {
                    long id = entry.transactionId;
                    Transaction transaction = transactions.get(id);
                    transaction.registerNewIndex(index, position);
                }
                AbstractTableManager tableManager = tables.get(index.table);
                if (tableManager == null) {
                    throw new RuntimeException("table " + index.table + " does not exists");
                }
                bootIndex(index, tableManager, true, entry.transactionId, true, false);
                if (entry.transactionId <= 0) {
                    writeTablesOnDataStorageManager(position, false);
                }
            }
            break;
        case LogEntryType.DROP_TABLE:
            {
                String tableName = entry.tableName;
                if (entry.transactionId > 0) {
                    long id = entry.transactionId;
                    Transaction transaction = transactions.get(id);
                    transaction.registerDropTable(tableName, position);
                } else {
                    AbstractTableManager manager = tables.get(tableName);
                    if (manager != null) {
                        disposeTable(manager);
                        Map<String, AbstractIndexManager> indexes = indexesByTable.get(tableName);
                        if (indexes != null && !indexes.isEmpty()) {
                            LOGGER.log(Level.SEVERE, "It looks like we are dropping a table " + tableName + " with these indexes " + indexes);
                        }
                    }
                }
                if (entry.transactionId <= 0) {
                    writeTablesOnDataStorageManager(position, false);
                }
            }
            break;
        case LogEntryType.DROP_INDEX:
            {
                String indexName = entry.value.to_string();
                if (entry.transactionId > 0) {
                    long id = entry.transactionId;
                    Transaction transaction = transactions.get(id);
                    transaction.registerDropIndex(indexName, position);
                } else {
                    AbstractIndexManager manager = indexes.get(indexName);
                    if (manager != null) {
                        disposeIndexManager(manager);
                    }
                }
                if (entry.transactionId <= 0) {
                    writeTablesOnDataStorageManager(position, false);
                    dbmanager.getPlanner().clearCache();
                }
            }
            break;
        case LogEntryType.ALTER_TABLE:
            {
                Table table = Table.deserialize(entry.value.to_array());
                alterTable(table, null);
                writeTablesOnDataStorageManager(position, false);
            }
            break;
        case LogEntryType.TABLE_CONSISTENCY_CHECK:
            {
                /*
                    In recovery mode, we need to skip the consistency check.
                    The tablespace may not be avaible yet and therefore calcite will not able to performed the select query.
                */
                if (recovery) {
                    LOGGER.log(Level.INFO, "skip {0} consistency check LogEntry {1}", new Object[] { tableSpaceName, entry });
                    break;
                }
                try {
                    TableChecksum check = MAPPER.readValue(entry.value.to_array(), TableChecksum.class);
                    String tableSpace = check.getTableSpaceName();
                    String query = check.getQuery();
                    String tableName = entry.tableName;
                    // In the entry type = 14, the follower will have to run the query on the transaction log
                    if (!isLeader()) {
                        AbstractTableManager tablemanager = this.getTableManager(tableName);
                        DBManager manager = this.getDbmanager();
                        if (tablemanager == null || tablemanager.getCreatedInTransaction() > 0) {
                            throw new TableDoesNotExistException(String.format("Table %s does not exist.", tablemanager));
                        }
                        /*
                            scan = true
                            allowCache = false
                            returnValues = false
                            maxRows = -1
                        */
                        TranslatedQuery translated = manager.getPlanner().translate(tableSpace, query, Collections.emptyList(), true, false, false, -1);
                        TableChecksum scanResult = TableDataChecksum.createChecksum(manager, translated, this, tableSpace, tableName);
                        long followerDigest = scanResult.getDigest();
                        long leaderDigest = check.getDigest();
                        long leaderNumRecords = check.getNumRecords();
                        long followerNumRecords = scanResult.getNumRecords();
                        // the necessary condition to pass the check is to have exactly the same digest and the number of records processed
                        if (followerDigest == leaderDigest && leaderNumRecords == followerNumRecords) {
                            LOGGER.log(Level.INFO, "Data consistency check PASS for table {0}  tablespace {1} with  Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
                        } else {
                            LOGGER.log(Level.SEVERE, "Data consistency check FAILED for table {0} in tablespace {1} with Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
                        }
                    } else {
                        long digest = check.getDigest();
                        LOGGER.log(Level.INFO, "Created checksum {0}  for table {1} in tablespace {2} on node {3}", new Object[] { digest, entry.tableName, tableSpace, this.getDbmanager().getNodeId() });
                    }
                } catch (IOException | DataScannerException ex) {
                    LOGGER.log(Level.SEVERE, "Error during table consistency check ", ex);
                }
            }
            break;
        default:
            // other entry types are not important for the tablespacemanager
            break;
    }
    if (entry.tableName != null && entry.type != LogEntryType.CREATE_TABLE && entry.type != LogEntryType.CREATE_INDEX && entry.type != LogEntryType.ALTER_TABLE && entry.type != LogEntryType.DROP_TABLE && entry.type != LogEntryType.TABLE_CONSISTENCY_CHECK) {
        AbstractTableManager tableManager = tables.get(entry.tableName);
        tableManager.apply(position, entry, recovery);
    }
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) Table(herddb.model.Table) TranslatedQuery(herddb.sql.TranslatedQuery) LogSequenceNumber(herddb.log.LogSequenceNumber) Index(herddb.model.Index) TableDoesNotExistException(herddb.model.TableDoesNotExistException) Transaction(herddb.model.Transaction) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) TableChecksum(herddb.data.consistency.TableChecksum) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 59 with LogSequenceNumber

use of herddb.log.LogSequenceNumber in project herddb by diennea.

the class TableSpaceManager method recover.

void recover(TableSpace tableSpaceInfo) throws DataStorageManagerException, LogNotAvailableException, MetadataStorageManagerException {
    if (recoveryInProgress) {
        throw new HerdDBInternalException("Cannot run recovery twice");
    }
    recoveryInProgress = true;
    LogSequenceNumber logSequenceNumber = dataStorageManager.getLastcheckpointSequenceNumber(tableSpaceUUID);
    actualLogSequenceNumber = logSequenceNumber;
    LOGGER.log(Level.INFO, "{0} recover {1}, logSequenceNumber from DataStorage: {2}", new Object[] { nodeId, tableSpaceName, logSequenceNumber });
    List<Table> tablesAtBoot = dataStorageManager.loadTables(logSequenceNumber, tableSpaceUUID);
    List<Index> indexesAtBoot = dataStorageManager.loadIndexes(logSequenceNumber, tableSpaceUUID);
    String tableNames = tablesAtBoot.stream().map(t -> {
        return t.name;
    }).collect(Collectors.joining(","));
    String indexNames = indexesAtBoot.stream().map(t -> {
        return t.name + " on table " + t.table;
    }).collect(Collectors.joining(","));
    if (!tableNames.isEmpty()) {
        LOGGER.log(Level.INFO, "{0} {1} tablesAtBoot: {2}, indexesAtBoot: {3}", new Object[] { nodeId, tableSpaceName, tableNames, indexNames });
    }
    for (Table table : tablesAtBoot) {
        TableManager tableManager = bootTable(table, 0, null, false);
        for (Index index : indexesAtBoot) {
            if (index.table.equals(table.name)) {
                bootIndex(index, tableManager, false, 0, false, false);
            }
        }
    }
    dataStorageManager.loadTransactions(logSequenceNumber, tableSpaceUUID, t -> {
        transactions.put(t.transactionId, t);
        LOGGER.log(Level.FINER, "{0} {1} tx {2} at boot lsn {3}", new Object[] { nodeId, tableSpaceName, t.transactionId, t.lastSequenceNumber });
        try {
            if (t.newTables != null) {
                for (Table table : t.newTables.values()) {
                    if (!tables.containsKey(table.name)) {
                        bootTable(table, t.transactionId, null, false);
                    }
                }
            }
            if (t.newIndexes != null) {
                for (Index index : t.newIndexes.values()) {
                    if (!indexes.containsKey(index.name)) {
                        AbstractTableManager tableManager = tables.get(index.table);
                        bootIndex(index, tableManager, false, t.transactionId, false, false);
                    }
                }
            }
        } catch (Exception err) {
            LOGGER.log(Level.SEVERE, "error while booting tmp tables " + err, err);
            throw new RuntimeException(err);
        }
    });
    if (LogSequenceNumber.START_OF_TIME.equals(logSequenceNumber) && dbmanager.getServerConfiguration().getBoolean(ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT, ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT_DEFAULT)) {
        LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is forced (" + ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT + "=true) for tableSpace " + tableSpaceName);
        downloadTableSpaceData();
        log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
    } else {
        try {
            log.recovery(logSequenceNumber, new ApplyEntryOnRecovery(), false);
        } catch (FullRecoveryNeededException fullRecoveryNeeded) {
            LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is needed for tableSpace " + tableSpaceName, fullRecoveryNeeded);
            downloadTableSpaceData();
            log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
        }
    }
    recoveryInProgress = false;
    if (!LogSequenceNumber.START_OF_TIME.equals(actualLogSequenceNumber)) {
        LOGGER.log(Level.INFO, "Recovery finished for {0} seqNum {1}", new Object[] { tableSpaceName, actualLogSequenceNumber });
        checkpoint(false, false, false);
    }
}
Also used : HDBException(herddb.client.HDBException) SystablesTableManager(herddb.core.system.SystablesTableManager) CommitTransactionStatement(herddb.model.commands.CommitTransactionStatement) TableCheckpoint(herddb.core.AbstractTableManager.TableCheckpoint) Table(herddb.model.Table) ClientConfiguration(herddb.client.ClientConfiguration) SyslogstatusManager(herddb.core.system.SyslogstatusManager) OpStatsLogger(org.apache.bookkeeper.stats.OpStatsLogger) IndexAlreadyExistsException(herddb.model.IndexAlreadyExistsException) Map(java.util.Map) DDLStatementExecutionResult(herddb.model.DDLStatementExecutionResult) LogNotAvailableException(herddb.log.LogNotAvailableException) PduCodec(herddb.proto.PduCodec) CommitLogResult(herddb.log.CommitLogResult) ClientSideMetadataProviderException(herddb.client.ClientSideMetadataProviderException) LogSequenceNumber(herddb.log.LogSequenceNumber) Set(java.util.Set) ScanStatement(herddb.model.commands.ScanStatement) CountDownLatch(java.util.concurrent.CountDownLatch) Stream(java.util.stream.Stream) HDBClient(herddb.client.HDBClient) TranslatedQuery(herddb.sql.TranslatedQuery) StatsLogger(org.apache.bookkeeper.stats.StatsLogger) TableDataChecksum(herddb.data.consistency.TableDataChecksum) DropTableStatement(herddb.model.commands.DropTableStatement) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Bytes(herddb.utils.Bytes) SysdualTableManager(herddb.core.system.SysdualTableManager) LogEntry(herddb.log.LogEntry) SysnodesTableManager(herddb.core.system.SysnodesTableManager) ArrayList(java.util.ArrayList) CreateIndexStatement(herddb.model.commands.CreateIndexStatement) TableSpaceDoesNotExistException(herddb.model.TableSpaceDoesNotExistException) TransactionContext(herddb.model.TransactionContext) Transaction(herddb.model.Transaction) BRINIndexManager(herddb.index.brin.BRINIndexManager) SystablestatsTableManager(herddb.core.system.SystablestatsTableManager) BiConsumer(java.util.function.BiConsumer) CommitLogListener(herddb.log.CommitLogListener) ForeignKeyDef(herddb.model.ForeignKeyDef) IndexDoesNotExistException(herddb.model.IndexDoesNotExistException) TableSpaceManagerStats(herddb.core.stats.TableSpaceManagerStats) LogEntryType(herddb.log.LogEntryType) LogEntryFactory(herddb.log.LogEntryFactory) IOException(java.io.IOException) DataStorageManager(herddb.storage.DataStorageManager) DropIndexStatement(herddb.model.commands.DropIndexStatement) ColumnTypes(herddb.model.ColumnTypes) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Column(herddb.model.Column) KeyValue(herddb.utils.KeyValue) FullRecoveryNeededException(herddb.log.FullRecoveryNeededException) StampedLock(java.util.concurrent.locks.StampedLock) ServerHostData(herddb.network.ServerHostData) TableAlreadyExistsException(herddb.model.TableAlreadyExistsException) RollbackTransactionStatement(herddb.model.commands.RollbackTransactionStatement) CreateTableStatement(herddb.model.commands.CreateTableStatement) TimeoutException(java.util.concurrent.TimeoutException) JMXUtils(herddb.jmx.JMXUtils) TransactionResult(herddb.model.TransactionResult) MetadataStorageManager(herddb.metadata.MetadataStorageManager) Channel(herddb.network.Channel) ServerConfiguration(herddb.server.ServerConfiguration) Futures(herddb.utils.Futures) DataStorageManagerException(herddb.storage.DataStorageManagerException) Index(herddb.model.Index) TableDoesNotExistException(herddb.model.TableDoesNotExistException) AlterTableStatement(herddb.model.commands.AlterTableStatement) SysindexcolumnsTableManager(herddb.core.system.SysindexcolumnsTableManager) DataScanner(herddb.model.DataScanner) DDLException(herddb.model.DDLException) StatementExecutionException(herddb.model.StatementExecutionException) Collection(java.util.Collection) SysstatementsTableManager(herddb.core.system.SysstatementsTableManager) BeginTransactionStatement(herddb.model.commands.BeginTransactionStatement) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CompletionException(java.util.concurrent.CompletionException) TableAwareStatement(herddb.model.TableAwareStatement) Logger(java.util.logging.Logger) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) HDBConnection(herddb.client.HDBConnection) List(java.util.List) FullTableScanConsumer(herddb.storage.FullTableScanConsumer) SystransactionsTableManager(herddb.core.system.SystransactionsTableManager) NodeMetadata(herddb.model.NodeMetadata) Entry(java.util.Map.Entry) Optional(java.util.Optional) TableSpace(herddb.model.TableSpace) SysconfigTableManager(herddb.core.system.SysconfigTableManager) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) SysindexesTableManager(herddb.core.system.SysindexesTableManager) Statement(herddb.model.Statement) MetadataStorageManagerException(herddb.metadata.MetadataStorageManagerException) DataScannerException(herddb.model.DataScannerException) SystablespacesTableManager(herddb.core.system.SystablespacesTableManager) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Pdu(herddb.proto.Pdu) Level(java.util.logging.Level) HashSet(java.util.HashSet) SysclientsTableManager(herddb.core.system.SysclientsTableManager) TableChecksum(herddb.data.consistency.TableChecksum) ExecutorService(java.util.concurrent.ExecutorService) DumpedLogEntry(herddb.backup.DumpedLogEntry) SysforeignkeysTableManager(herddb.core.system.SysforeignkeysTableManager) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) StatementExecutionResult(herddb.model.StatementExecutionResult) TimeUnit(java.util.concurrent.TimeUnit) CommitLog(herddb.log.CommitLog) ClientSideMetadataProvider(herddb.client.ClientSideMetadataProvider) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) SystablespacereplicastateTableManager(herddb.core.system.SystablespacereplicastateTableManager) SQLPlannedOperationStatement(herddb.model.commands.SQLPlannedOperationStatement) StatementEvaluationContext(herddb.model.StatementEvaluationContext) SyscolumnsTableManager(herddb.core.system.SyscolumnsTableManager) Comparator(java.util.Comparator) Collections(java.util.Collections) TableManagerStats(herddb.core.stats.TableManagerStats) MemoryHashIndexManager(herddb.index.MemoryHashIndexManager) SystemProperties(herddb.utils.SystemProperties) Table(herddb.model.Table) LogSequenceNumber(herddb.log.LogSequenceNumber) Index(herddb.model.Index) HDBException(herddb.client.HDBException) IndexAlreadyExistsException(herddb.model.IndexAlreadyExistsException) LogNotAvailableException(herddb.log.LogNotAvailableException) ClientSideMetadataProviderException(herddb.client.ClientSideMetadataProviderException) TableSpaceDoesNotExistException(herddb.model.TableSpaceDoesNotExistException) IndexDoesNotExistException(herddb.model.IndexDoesNotExistException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FullRecoveryNeededException(herddb.log.FullRecoveryNeededException) TableAlreadyExistsException(herddb.model.TableAlreadyExistsException) TimeoutException(java.util.concurrent.TimeoutException) DataStorageManagerException(herddb.storage.DataStorageManagerException) TableDoesNotExistException(herddb.model.TableDoesNotExistException) DDLException(herddb.model.DDLException) StatementExecutionException(herddb.model.StatementExecutionException) CompletionException(java.util.concurrent.CompletionException) EOFException(java.io.EOFException) MetadataStorageManagerException(herddb.metadata.MetadataStorageManagerException) DataScannerException(herddb.model.DataScannerException) SystablesTableManager(herddb.core.system.SystablesTableManager) SysdualTableManager(herddb.core.system.SysdualTableManager) SysnodesTableManager(herddb.core.system.SysnodesTableManager) SystablestatsTableManager(herddb.core.system.SystablestatsTableManager) SysindexcolumnsTableManager(herddb.core.system.SysindexcolumnsTableManager) SysstatementsTableManager(herddb.core.system.SysstatementsTableManager) SystransactionsTableManager(herddb.core.system.SystransactionsTableManager) SysconfigTableManager(herddb.core.system.SysconfigTableManager) SysindexesTableManager(herddb.core.system.SysindexesTableManager) SystablespacesTableManager(herddb.core.system.SystablespacesTableManager) SysclientsTableManager(herddb.core.system.SysclientsTableManager) SysforeignkeysTableManager(herddb.core.system.SysforeignkeysTableManager) SystablespacereplicastateTableManager(herddb.core.system.SystablespacereplicastateTableManager) SyscolumnsTableManager(herddb.core.system.SyscolumnsTableManager) FullRecoveryNeededException(herddb.log.FullRecoveryNeededException)

Example 60 with LogSequenceNumber

use of herddb.log.LogSequenceNumber in project herddb by diennea.

the class BookKeeperDataStorageManager method writeTransactionsAtCheckpoint.

@Override
public Collection<PostCheckpointAction> writeTransactionsAtCheckpoint(String tableSpace, LogSequenceNumber sequenceNumber, Collection<Transaction> transactions) throws DataStorageManagerException {
    if (sequenceNumber.isStartOfTime() && !transactions.isEmpty()) {
        throw new DataStorageManagerException("impossible to write a non empty transactions list at start-of-time");
    }
    String checkPointFile = getTablespaceTransactionsFile(tableSpace, sequenceNumber);
    LOGGER.log(Level.FINE, "writeTransactionsAtCheckpoint for tableSpace {0} sequenceNumber {1} to {2}, active transactions {3}", new Object[] { tableSpace, sequenceNumber, checkPointFile, transactions.size() });
    try (VisibleByteArrayOutputStream buffer = new VisibleByteArrayOutputStream();
        ExtendedDataOutputStream dout = new ExtendedDataOutputStream(buffer)) {
        // version
        dout.writeVLong(1);
        // flags for future implementations
        dout.writeVLong(0);
        dout.writeUTF(tableSpace);
        dout.writeZLong(sequenceNumber.ledgerId);
        dout.writeZLong(sequenceNumber.offset);
        dout.writeInt(transactions.size());
        for (Transaction t : transactions) {
            t.serialize(dout);
        }
        dout.flush();
        writeZNodeEnforceOwnership(tableSpace, checkPointFile, buffer.toByteArray(), null);
    } catch (IOException err) {
        throw new DataStorageManagerException(err);
    }
    Collection<PostCheckpointAction> result = new ArrayList<>();
    String tableSpaceDirectory = getTableSpaceZNode(tableSpace);
    List<String> stream = zkGetChildren(tableSpaceDirectory);
    for (String p : stream) {
        if (isTransactionsFile(p)) {
            try {
                byte[] content = readZNode(checkPointFile, new Stat());
                if (content != null) {
                    LogSequenceNumber logPositionInFile = readLogSequenceNumberFromTransactionsFile(tableSpace, content, p);
                    if (sequenceNumber.after(logPositionInFile)) {
                        LOGGER.log(Level.FINEST, "transactions metadata file " + p + ". will be deleted after checkpoint end");
                        result.add(new DeleteZNodeAction(tableSpace, "transactions", "delete transactions file " + p, p));
                    }
                }
            } catch (DataStorageManagerException ignore) {
                LOGGER.log(Level.SEVERE, "Unparsable transactions file " + p, ignore);
                result.add(new DeleteZNodeAction(tableSpace, "transactions", "delete unparsable transactions file " + p, p));
            }
        }
    }
    return result;
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) ArrayList(java.util.ArrayList) VisibleByteArrayOutputStream(herddb.utils.VisibleByteArrayOutputStream) LogSequenceNumber(herddb.log.LogSequenceNumber) IOException(java.io.IOException) ExtendedDataOutputStream(herddb.utils.ExtendedDataOutputStream) PostCheckpointAction(herddb.core.PostCheckpointAction) Stat(org.apache.zookeeper.data.Stat) Transaction(herddb.model.Transaction)

Aggregations

LogSequenceNumber (herddb.log.LogSequenceNumber)74 DataStorageManagerException (herddb.storage.DataStorageManagerException)35 IOException (java.io.IOException)34 ArrayList (java.util.ArrayList)30 Table (herddb.model.Table)21 Test (org.junit.Test)21 LogEntry (herddb.log.LogEntry)20 PostCheckpointAction (herddb.core.PostCheckpointAction)13 CommitLog (herddb.log.CommitLog)13 Index (herddb.model.Index)13 ExtendedDataOutputStream (herddb.utils.ExtendedDataOutputStream)13 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 CreateTableStatement (herddb.model.commands.CreateTableStatement)10 Path (java.nio.file.Path)10 HashMap (java.util.HashMap)10 Transaction (herddb.model.Transaction)9 InsertStatement (herddb.model.commands.InsertStatement)9 ServerConfiguration (herddb.server.ServerConfiguration)9 ExtendedDataInputStream (herddb.utils.ExtendedDataInputStream)9 SimpleByteArrayInputStream (herddb.utils.SimpleByteArrayInputStream)9