Search in sources :

Example 11 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class MemoryDataStorageManager method getLatestTableStatus.

@Override
public TableStatus getLatestTableStatus(String tableSpace, String tableName) throws DataStorageManagerException {
    LogSequenceNumber max = null;
    String prefix = tableSpace + "." + tableName + "_";
    for (String status : tableStatuses.keySet()) {
        if (status.startsWith(prefix)) {
            final LogSequenceNumber log = evaluateLogSequenceNumber(prefix);
            if (log != null) {
                if (max == null || log.after(max)) {
                    max = log;
                }
            }
        }
    }
    TableStatus latestStatus;
    if (max == null) {
        latestStatus = TableStatus.buildTableStatusForNewCreatedTable(tableName);
    } else {
        byte[] data = tableStatuses.get(checkpointName(tableSpace, tableName, max));
        if (data == null) {
            latestStatus = TableStatus.buildTableStatusForNewCreatedTable(tableName);
        } else {
            try {
                try (InputStream input = new SimpleByteArrayInputStream(data);
                    ExtendedDataInputStream dataIn = new ExtendedDataInputStream(input)) {
                    latestStatus = TableStatus.deserialize(dataIn);
                }
            } catch (IOException err) {
                throw new DataStorageManagerException(err);
            }
        }
    }
    return latestStatus;
}
Also used : ExtendedDataInputStream(herddb.utils.ExtendedDataInputStream) DataStorageManagerException(herddb.storage.DataStorageManagerException) ExtendedDataInputStream(herddb.utils.ExtendedDataInputStream) SimpleByteArrayInputStream(herddb.utils.SimpleByteArrayInputStream) InputStream(java.io.InputStream) TableStatus(herddb.storage.TableStatus) LogSequenceNumber(herddb.log.LogSequenceNumber) SimpleByteArrayInputStream(herddb.utils.SimpleByteArrayInputStream) IOException(java.io.IOException)

Example 12 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class MemoryDataStorageManager method fullTableScan.

@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
    TableStatus status = getLatestTableStatus(tableSpace, tableName);
    fullTableScan(tableSpace, tableName, status, consumer);
}
Also used : TableStatus(herddb.storage.TableStatus)

Example 13 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class MemoryDataStorageManager method fullTableScan.

@Override
public void fullTableScan(String tableSpace, String tableName, LogSequenceNumber sequenceNumber, FullTableScanConsumer consumer) throws DataStorageManagerException {
    TableStatus status = getTableStatus(tableSpace, tableName, sequenceNumber);
    fullTableScan(tableSpace, tableName, status, consumer);
}
Also used : TableStatus(herddb.storage.TableStatus)

Example 14 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class TableManager method checkpoint.

/**
 * @param sequenceNumber
 * @param dirtyThreshold
 * @param fillThreshold
 * @param checkpointTargetTime checkpoint target max milliseconds
 * @param cleanupTargetTime    cleanup target max milliseconds
 * @param compactionTargetTime compaction target max milliseconds
 * @return
 * @throws DataStorageManagerException
 */
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long cleanupTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
    LOGGER.log(Level.FINE, "tableCheckpoint dirtyThreshold: " + dirtyThreshold + ", {0}.{1} (pin: {2})", new Object[] { tableSpaceUUID, table.name, pin });
    if (createdInTransaction > 0) {
        LOGGER.log(Level.FINE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
        return null;
    }
    final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
    final long dirtyPageThreshold = dirtyThreshold > 0 ? (long) (dirtyThreshold * maxLogicalPageSize) : -1;
    long start = System.currentTimeMillis();
    long end;
    long getlock;
    long pageAnalysis;
    long dirtyPagesFlush;
    long smallPagesFlush;
    long newPagesFlush;
    long keytopagecheckpoint;
    long indexcheckpoint;
    long tablecheckpoint;
    final List<PostCheckpointAction> actions = new ArrayList<>();
    TableCheckpoint result;
    boolean lockAcquired;
    try {
        lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
    } catch (InterruptedException err) {
        throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
    }
    if (!lockAcquired) {
        throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
    }
    try {
        LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
        getlock = System.currentTimeMillis();
        checkPointRunning = true;
        final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
        final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
        long flushedRecords = 0;
        List<CheckpointingPage> flushingDirtyPages = new ArrayList<>();
        List<CheckpointingPage> flushingSmallPages = new ArrayList<>();
        final Set<Long> flushedPages = new HashSet<>();
        int flushedDirtyPages = 0;
        int flushedSmallPages = 0;
        for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
            final Long pageId = ref.getKey();
            final DataPageMetaData metadata = ref.getValue();
            final long dirt = metadata.dirt.sum();
            /* Check dirtiness (flush here even small pages if enough dirty) */
            if (dirt > 0 && dirt >= dirtyPageThreshold) {
                flushingDirtyPages.add(new CheckpointingPage(pageId, dirt, dirt > 0));
                continue;
            }
            /* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
            if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
                flushingSmallPages.add(new CheckpointingPage(pageId, metadata.size, dirt > 0));
                continue;
            }
        }
        /* Clean dirtier first */
        flushingDirtyPages.sort(CheckpointingPage.DESCENDING_ORDER);
        /* Clean smaller first */
        flushingSmallPages.sort(CheckpointingPage.ASCENDING_ORDER);
        pageAnalysis = System.currentTimeMillis();
        /* Should currently new rebuild page kept on memory or discarded? */
        boolean keepFlushedPageInMemory = false;
        /* New page actually rebuilt */
        DataPage buildingPage = createMutablePage(nextPageId++, 0, 0);
        if (!flushingDirtyPages.isEmpty()) {
            final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(pageAnalysis, cleanupTargetTime));
            /*
                 * Do not continue if we have used up all configured cleanup or checkpoint time (but still compact
                 * at least the smaller page (normally the leftover from last checkpoint)
                 */
            CleanAndCompactResult dirtyResult = cleanAndCompactPages(flushingDirtyPages, buildingPage, keepFlushedPageInMemory, timeLimit);
            flushedDirtyPages = dirtyResult.flushedPages.size();
            flushedPages.addAll(dirtyResult.flushedPages);
            flushedRecords += dirtyResult.flushedRecords;
            keepFlushedPageInMemory = dirtyResult.keepFlushedPageInMemory;
            buildingPage = dirtyResult.buildingPage;
        }
        dirtyPagesFlush = System.currentTimeMillis();
        /* **************************** */
        /* *** Small pages handling *** */
        /* **************************** */
        /*
             * Small pages could be dirty pages too so we need to check every page if has been already handled
             * during dirty pages cleanup. Small pages should be a really small set (normally just last flushed
             * page), the filter is then no critical or heavy to require some optimization
             */
        /* Filter out dirty pages flushed from flushing small pages (a page could be "small" and "dirty") */
        flushingSmallPages = flushingSmallPages.stream().filter(wp -> !flushedPages.contains(wp.pageId)).collect(Collectors.toList());
        /*
             * If there is only one clean small page without additional data to add rebuilding the page make no
             * sense: is too probable to rebuild an identical page!
             */
        if (/* Just one small page */
        flushingSmallPages.size() == 1 && /* Not dirty */
        !flushingSmallPages.get(0).dirty && /* No spare data remaining */
        buildingPage.isEmpty() && /* No new data */
        !newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent()) {
            /* Avoid small page compaction */
            flushingSmallPages.clear();
        }
        if (!flushingSmallPages.isEmpty()) {
            final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(dirtyPagesFlush, compactionTargetTime));
            /*
                 * Do not continue if we have used up all configured compaction or checkpoint time (but still
                 * compact at least the smaller page (normally the leftover from last checkpoint)
                 */
            CleanAndCompactResult smallResult = cleanAndCompactPages(flushingSmallPages, buildingPage, keepFlushedPageInMemory, timeLimit);
            flushedSmallPages = smallResult.flushedPages.size();
            flushedPages.addAll(smallResult.flushedPages);
            flushedRecords += smallResult.flushedRecords;
            keepFlushedPageInMemory = smallResult.keepFlushedPageInMemory;
            buildingPage = smallResult.buildingPage;
        }
        smallPagesFlush = System.currentTimeMillis();
        /* ************************** */
        /* *** New pages handling *** */
        /* ************************** */
        /*
             * Retrieve the "current" new page. It can be held in memory because no writes are executed during
             * a checkpoint and thus the page cannot change (nor be flushed due to an unload because it isn't
             * known to page replacement policy)
             */
        final long lastKnownPageId = currentDirtyRecordsPage.get();
        /*
             * Flush dirty records (and remaining records from previous step).
             *
             * Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
             * Just write the pages as they are.
             *
             * New empty pages won't be written
             */
        long flushedNewPages = 0;
        for (DataPage dataPage : newPages.values()) {
            /* Flush every dirty page (but not the "current" dirty page if empty) */
            if (lastKnownPageId != dataPage.pageId || !dataPage.isEmpty()) {
                flushNewPageForCheckpoint(dataPage, buildingPage);
                ++flushedNewPages;
                flushedRecords += dataPage.size();
            }
        }
        /*
             * Flush remaining records.
             *
             * To keep or not flushed page in memory is a "best guess" here: we don't known if records that
             * needed to be kept in memory were already be flushed during newPage filling (see
             * flushNewPageForCheckpoint). So we still use keepFlushedPageInMemory (possibily true) even if
             * remaining records came from an old unused page.
             */
        if (!buildingPage.isEmpty()) {
            flushMutablePage(buildingPage, keepFlushedPageInMemory);
        } else {
            /* Remove unused empty building page from memory */
            pages.remove(buildingPage.pageId);
        }
        /*
             * Never Never Never revert unused nextPageId! Even if we didn't used booked nextPageId is better to
             * throw it away, reverting generated id could be "strange" for now but simply wrong in the future
             * (if checkpoint will permit concurrent page creation for example..)
             */
        newPagesFlush = System.currentTimeMillis();
        if (flushedDirtyPages > 0 || flushedSmallPages > 0 || flushedNewPages > 0 || flushedRecords > 0) {
            LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
        }
        /* Checkpoint the key to page too */
        actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
        keytopagecheckpoint = System.currentTimeMillis();
        /* Checkpoint secondary indexes too */
        final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
        if (indexes != null) {
            for (AbstractIndexManager indexManager : indexes.values()) {
                // Checkpoint at the same position of current TableManager
                actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
            }
        }
        indexcheckpoint = System.currentTimeMillis();
        pageSet.checkpointDone(flushedPages);
        TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.longToByteArray(nextPrimaryKeyValue.get()), nextPageId, pageSet.getActivePages());
        actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
        tablecheckpoint = System.currentTimeMillis();
        /*
             * Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
             * unloaded) due do a deletion: all pages will be removed and no page will remain alive.
             */
        if (newPages.isEmpty()) {
            /* Allocate live handles the correct policy load/unload of last dirty page */
            allocateLivePage(lastKnownPageId);
        }
        checkPointRunning = false;
        result = new TableCheckpoint(table.name, sequenceNumber, actions);
        end = System.currentTimeMillis();
        if (flushedRecords > 0) {
            LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
        }
    } finally {
        checkpointLock.asWriteLock().unlock();
    }
    long delta = end - start;
    if (delta > 1000) {
        long delta_lock = getlock - start;
        long delta_pageAnalysis = pageAnalysis - getlock;
        long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
        long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
        long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
        long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
        long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
        long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
        long delta_unload = end - tablecheckpoint;
        LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
    }
    return result;
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) ArrayList(java.util.ArrayList) LogSequenceNumber(herddb.log.LogSequenceNumber) DataPageMetaData(herddb.core.PageSet.DataPageMetaData) AtomicLong(java.util.concurrent.atomic.AtomicLong) TableStatus(herddb.storage.TableStatus) HashSet(java.util.HashSet)

Example 15 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class BookKeeperDataStorageManager method fullTableScan.

@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
    try {
        TableStatus status = getLatestTableStatus(tableSpace, tableName);
        fullTableScan(tableSpace, tableName, status, consumer);
    } catch (HerdDBInternalException err) {
        throw new DataStorageManagerException(err);
    }
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) HerdDBInternalException(herddb.core.HerdDBInternalException) TableStatus(herddb.storage.TableStatus)

Aggregations

TableStatus (herddb.storage.TableStatus)16 DataStorageManagerException (herddb.storage.DataStorageManagerException)10 IOException (java.io.IOException)5 HerdDBInternalException (herddb.core.HerdDBInternalException)4 LogSequenceNumber (herddb.log.LogSequenceNumber)4 Record (herddb.model.Record)4 FullTableScanConsumer (herddb.storage.FullTableScanConsumer)4 DataPageMetaData (herddb.core.PageSet.DataPageMetaData)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 PostCheckpointAction (herddb.core.PostCheckpointAction)2 GetResult (herddb.model.GetResult)2 DeleteStatement (herddb.model.commands.DeleteStatement)2 GetStatement (herddb.model.commands.GetStatement)2 InsertStatement (herddb.model.commands.InsertStatement)2 UpdateStatement (herddb.model.commands.UpdateStatement)2 ExtendedDataOutputStream (herddb.utils.ExtendedDataOutputStream)2 Holder (herddb.utils.Holder)2