Search in sources :

Example 1 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class TableManager method start.

@Override
public void start() throws DataStorageManagerException {
    Map<Long, DataPageMetaData> activePagesAtBoot = new HashMap<>();
    bootSequenceNumber = LogSequenceNumber.START_OF_TIME;
    boolean requireLoadAtStartup = keyToPage.requireLoadAtStartup();
    if (requireLoadAtStartup) {
        // non persistent primary key index, we need a full table scan
        LOGGER.log(Level.SEVERE, "loading in memory all the keys for table {0}", new Object[] { table.name });
        dataStorageManager.fullTableScan(tableSpaceUUID, table.uuid, new FullTableScanConsumer() {

            Long currentPage;

            @Override
            public void acceptTableStatus(TableStatus tableStatus) {
                LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
                nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
                nextPageId = tableStatus.nextPageId;
                bootSequenceNumber = tableStatus.sequenceNumber;
                activePagesAtBoot.putAll(tableStatus.activePages);
            }

            @Override
            public void startPage(long pageId) {
                currentPage = pageId;
            }

            @Override
            public void acceptRecord(Record record) {
                if (currentPage < 0) {
                    throw new IllegalStateException();
                }
                keyToPage.put(record.key, currentPage);
            }

            @Override
            public void endPage() {
                currentPage = null;
            }

            @Override
            public void endTable() {
            }
        });
    } else {
        LOGGER.log(Level.SEVERE, "loading table {0}, uuid {1}", new Object[] { table.name, table.uuid });
        TableStatus tableStatus = dataStorageManager.getLatestTableStatus(tableSpaceUUID, table.uuid);
        LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
        nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
        nextPageId = tableStatus.nextPageId;
        bootSequenceNumber = tableStatus.sequenceNumber;
        activePagesAtBoot.putAll(tableStatus.activePages);
    }
    keyToPage.start(bootSequenceNumber);
    dataStorageManager.cleanupAfterBoot(tableSpaceUUID, table.uuid, activePagesAtBoot.keySet());
    pageSet.setActivePagesAtBoot(activePagesAtBoot);
    initNewPage();
    LOGGER.log(Level.SEVERE, "loaded {0} keys for table {1}, newPageId {2}, nextPrimaryKeyValue {3}, activePages {4}", new Object[] { keyToPage.size(), table.name, nextPageId, nextPrimaryKeyValue.get(), pageSet.getActivePages() + "" });
    started = true;
}
Also used : DataPageMetaData(herddb.core.PageSet.DataPageMetaData) FullTableScanConsumer(herddb.storage.FullTableScanConsumer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) AtomicLong(java.util.concurrent.atomic.AtomicLong) TableStatus(herddb.storage.TableStatus) Record(herddb.model.Record)

Example 2 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class FileDataStorageManager method getLatestTableStatus.

@Override
public TableStatus getLatestTableStatus(String tableSpace, String tableName) throws DataStorageManagerException {
    try {
        Path lastFile = getLastTableCheckpointFile(tableSpace, tableName);
        TableStatus latestStatus;
        if (lastFile == null) {
            latestStatus = new TableStatus(tableName, LogSequenceNumber.START_OF_TIME, Bytes.from_long(1).data, 1, Collections.emptyMap());
        } else {
            latestStatus = readTableStatusFromFile(lastFile);
        }
        return latestStatus;
    } catch (IOException err) {
        throw new DataStorageManagerException(err);
    }
}
Also used : Path(java.nio.file.Path) DataStorageManagerException(herddb.storage.DataStorageManagerException) TableStatus(herddb.storage.TableStatus) IOException(java.io.IOException)

Example 3 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class TableManager method checkpoint.

/**
 * @param sequenceNumber
 * @param dirtyThreshold
 * @param fillThreshold
 * @param checkpointTargetTime checkpoint target max milliseconds
 * @param compactionTargetTime compaction target max milliseconds
 * @return
 * @throws DataStorageManagerException
 */
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
    if (createdInTransaction > 0) {
        LOGGER.log(Level.SEVERE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
        return null;
    }
    final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
    final long dirtyPageThreshold = (long) (dirtyThreshold * maxLogicalPageSize);
    long start = System.currentTimeMillis();
    long end;
    long getlock;
    long pageAnalysis;
    long dirtyPagesFlush;
    long smallPagesFlush;
    long newPagesFlush;
    long keytopagecheckpoint;
    long indexcheckpoint;
    long tablecheckpoint;
    final List<PostCheckpointAction> actions = new ArrayList<>();
    TableCheckpoint result;
    boolean lockAcquired;
    try {
        lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
    } catch (InterruptedException err) {
        throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
    }
    if (!lockAcquired) {
        throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
    }
    try {
        LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
        getlock = System.currentTimeMillis();
        checkPointRunning = true;
        final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
        final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
        Map<Bytes, Record> buffer = new HashMap<>();
        long bufferPageSize = 0;
        long flushedRecords = 0;
        final List<WeightedPage> flushingDirtyPages = new ArrayList<>();
        final List<WeightedPage> flushingSmallPages = new ArrayList<>();
        final List<Long> flushedPages = new ArrayList<>();
        int flushedDirtyPages = 0;
        int flushedSmallPages = 0;
        for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
            final Long pageId = ref.getKey();
            final DataPageMetaData metadata = ref.getValue();
            final long dirt = metadata.dirt.sum();
            /*
                 * Check dirtiness (flush here even small pages if dirty. Small pages flush IGNORES dirty data
                 * handling).
                 */
            if (dirt > 0 && (dirt >= dirtyPageThreshold || metadata.size <= fillPageThreshold)) {
                flushingDirtyPages.add(new WeightedPage(pageId, dirt));
                continue;
            }
            /* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
            if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
                flushingSmallPages.add(new WeightedPage(pageId, metadata.size));
                continue;
            }
        }
        /* Clean dirtier first */
        flushingDirtyPages.sort(WeightedPage.DESCENDING_ORDER);
        /* Clean smaller first */
        flushingSmallPages.sort(WeightedPage.ASCENDING_ORDER);
        pageAnalysis = System.currentTimeMillis();
        /* Rebuild dirty pages with only records to be kept */
        for (WeightedPage weighted : flushingDirtyPages) {
            /* Page flushed */
            flushedPages.add(weighted.pageId);
            ++flushedDirtyPages;
            final DataPage dataPage = pages.get(weighted.pageId);
            final Collection<Record> records;
            if (dataPage == null) {
                records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
                LOGGER.log(Level.FINEST, "loaded dirty page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
            } else {
                records = dataPage.data.values();
            }
            for (Record record : records) {
                /* Avoid the record if has been modified or deleted */
                final Long currentPageId = keyToPage.get(record.key);
                if (currentPageId == null || !weighted.pageId.equals(currentPageId)) {
                    continue;
                }
                /* Flush the page if it would exceed max page size */
                if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
                    createImmutablePage(buffer, bufferPageSize);
                    flushedRecords += buffer.size();
                    bufferPageSize = 0;
                    /* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
                    buffer = new HashMap<>(buffer.size());
                }
                buffer.put(record.key, record);
                bufferPageSize += DataPage.estimateEntrySize(record);
            }
            /* Do not continue if we have used up all configured checkpoint time */
            if (checkpointLimitInstant <= System.currentTimeMillis()) {
                break;
            }
        }
        dirtyPagesFlush = System.currentTimeMillis();
        /*
             * If there is only one without additional data to add
             * rebuilding the page make no sense: is too probable to rebuild an identical page!
             */
        if (flushingSmallPages.size() == 1 && buffer.isEmpty()) {
            boolean hasNewPagesData = newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent();
            if (!hasNewPagesData) {
                flushingSmallPages.clear();
            }
        }
        final long compactionLimitInstant = sumOverflowWise(dirtyPagesFlush, compactionTargetTime);
        /* Rebuild too small pages */
        for (WeightedPage weighted : flushingSmallPages) {
            /* Page flushed */
            flushedPages.add(weighted.pageId);
            ++flushedSmallPages;
            final DataPage dataPage = pages.get(weighted.pageId);
            final Collection<Record> records;
            if (dataPage == null) {
                records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
                LOGGER.log(Level.FINEST, "loaded small page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
            } else {
                records = dataPage.data.values();
            }
            for (Record record : records) {
                /* Flush the page if it would exceed max page size */
                if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
                    createImmutablePage(buffer, bufferPageSize);
                    flushedRecords += buffer.size();
                    bufferPageSize = 0;
                    /* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
                    buffer = new HashMap<>(buffer.size());
                }
                buffer.put(record.key, record);
                bufferPageSize += DataPage.estimateEntrySize(record);
            }
            final long now = System.currentTimeMillis();
            /*
                 * Do not continue if we have used up all configured compaction or checkpoint time (but still compact at
                 * least the smaller page (normally the leftover from last checkpoint)
                 */
            if (compactionLimitInstant <= now || checkpointLimitInstant <= now) {
                break;
            }
        }
        flushingSmallPages.clear();
        smallPagesFlush = System.currentTimeMillis();
        /*
             * Flush dirty records (and remaining records from previous step).
             *
             * Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
             * Just write the pages as they are.
             *
             * New empty pages won't be written
             */
        long flushedNewPages = 0;
        for (DataPage dataPage : newPages.values()) {
            if (!dataPage.isEmpty()) {
                bufferPageSize -= flushNewPageForCheckpoint(dataPage, buffer);
                // dataPage.makeImmutable();
                ++flushedNewPages;
                flushedRecords += dataPage.size();
            }
        }
        /* Flush remaining records */
        if (!buffer.isEmpty()) {
            createImmutablePage(buffer, bufferPageSize);
            flushedRecords += buffer.size();
            bufferPageSize = 0;
        /* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
        }
        newPagesFlush = System.currentTimeMillis();
        LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
        }
        /* Checkpoint the key to page too */
        actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
        keytopagecheckpoint = System.currentTimeMillis();
        /* Checkpoint secondary indexes too */
        final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
        if (indexes != null) {
            for (AbstractIndexManager indexManager : indexes.values()) {
                // Checkpoint at the same position of current TableManager
                actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
            }
        }
        indexcheckpoint = System.currentTimeMillis();
        pageSet.checkpointDone(flushedPages);
        TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.from_long(nextPrimaryKeyValue.get()).data, nextPageId, pageSet.getActivePages());
        actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
        tablecheckpoint = System.currentTimeMillis();
        /* Remove flushed pages handled */
        for (Long pageId : flushedPages) {
            final DataPage page = pages.remove(pageId);
            /* Current dirty record page isn't known to page replacement policy */
            if (page != null && currentDirtyRecordsPage.get() != page.pageId) {
                pageReplacementPolicy.remove(page);
            }
        }
        /*
             * Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
             * unloaded) due do a deletion: all pages will be removed and no page will remain alive.
             */
        if (newPages.isEmpty()) {
            /* Allocate live handles the correct policy load/unload of last dirty page */
            allocateLivePage(currentDirtyRecordsPage.get());
        }
        checkPointRunning = false;
        result = new TableCheckpoint(table.name, sequenceNumber, actions);
        end = System.currentTimeMillis();
        LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
        }
    } finally {
        checkpointLock.asWriteLock().unlock();
    }
    long delta = end - start;
    if (delta > 1000) {
        long delta_lock = getlock - start;
        long delta_pageAnalysis = pageAnalysis - getlock;
        long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
        long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
        long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
        long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
        long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
        long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
        long delta_unload = end - tablecheckpoint;
        LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
    }
    return result;
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataPageMetaData(herddb.core.PageSet.DataPageMetaData) Bytes(herddb.utils.Bytes) TableStatus(herddb.storage.TableStatus) Record(herddb.model.Record) LogSequenceNumber(herddb.log.LogSequenceNumber) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 4 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class FileDataStorageManager method fullTableScan.

@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
    try {
        TableStatus status = getLatestTableStatus(tableSpace, tableName);
        fullTableScan(tableSpace, tableName, status, consumer);
    } catch (HerdDBInternalException err) {
        throw new DataStorageManagerException(err);
    }
}
Also used : DataStorageManagerException(herddb.storage.DataStorageManagerException) HerdDBInternalException(herddb.core.HerdDBInternalException) TableStatus(herddb.storage.TableStatus)

Example 5 with TableStatus

use of herddb.storage.TableStatus in project herddb by diennea.

the class FileDataStorageManager method tableCheckpoint.

@Override
public List<PostCheckpointAction> tableCheckpoint(String tableSpace, String tableName, TableStatus tableStatus, boolean pin) throws DataStorageManagerException {
    LogSequenceNumber logPosition = tableStatus.sequenceNumber;
    Path dir = getTableDirectory(tableSpace, tableName);
    Path checkpointFile = getTableCheckPointsFile(dir, logPosition);
    try {
        Files.createDirectories(dir);
        if (Files.isRegularFile(checkpointFile)) {
            TableStatus actualStatus = readTableStatusFromFile(checkpointFile);
            if (actualStatus != null && actualStatus.equals(tableStatus)) {
                LOGGER.log(Level.INFO, "tableCheckpoint " + tableSpace + ", " + tableName + ": " + tableStatus + " already saved on file " + checkpointFile);
                return Collections.emptyList();
            }
        }
    } catch (IOException err) {
        throw new DataStorageManagerException(err);
    }
    Path parent = getParent(checkpointFile);
    Path checkpointFileTemp = parent.resolve(checkpointFile.getFileName() + ".tmp");
    LOGGER.log(Level.FINE, "tableCheckpoint " + tableSpace + ", " + tableName + ": " + tableStatus + " to file " + checkpointFile);
    try (ManagedFile file = ManagedFile.open(checkpointFileTemp);
        SimpleBufferedOutputStream buffer = new SimpleBufferedOutputStream(file.getOutputStream(), COPY_BUFFERS_SIZE);
        XXHash64Utils.HashingOutputStream oo = new XXHash64Utils.HashingOutputStream(buffer);
        ExtendedDataOutputStream dataOutputKeys = new ExtendedDataOutputStream(oo)) {
        // version
        dataOutputKeys.writeVLong(1);
        // flags for future implementations
        dataOutputKeys.writeVLong(0);
        tableStatus.serialize(dataOutputKeys);
        dataOutputKeys.writeLong(oo.hash());
        dataOutputKeys.flush();
        file.sync();
    } catch (IOException err) {
        throw new DataStorageManagerException(err);
    }
    try {
        Files.move(checkpointFileTemp, checkpointFile, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
    } catch (IOException err) {
        throw new DataStorageManagerException(err);
    }
    /* Checkpoint pinning */
    final Map<Long, Integer> pins = pinTableAndGetPages(tableSpace, tableName, tableStatus, pin);
    final Set<LogSequenceNumber> checkpoints = pinTableAndGetCheckpoints(tableSpace, tableName, tableStatus, pin);
    long maxPageId = tableStatus.activePages.keySet().stream().max(Comparator.naturalOrder()).orElse(Long.MAX_VALUE);
    List<PostCheckpointAction> result = new ArrayList<>();
    // we can drop old page files now
    List<Path> pageFiles = getTablePageFiles(tableSpace, tableName);
    for (Path p : pageFiles) {
        long pageId = getPageId(p);
        LOGGER.log(Level.FINEST, "checkpoint file {0} pageId {1}", new Object[] { p.toAbsolutePath(), pageId });
        if (pageId > 0 && !pins.containsKey(pageId) && !tableStatus.activePages.containsKey(pageId) && pageId < maxPageId) {
            LOGGER.log(Level.FINEST, "checkpoint file " + p.toAbsolutePath() + " pageId " + pageId + ". will be deleted after checkpoint end");
            result.add(new DeleteFileAction(tableName, "delete page " + pageId + " file " + p.toAbsolutePath(), p));
        }
    }
    try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
        for (Path p : stream) {
            if (isTableOrIndexCheckpointsFile(p) && !p.equals(checkpointFile)) {
                TableStatus status = readTableStatusFromFile(p);
                if (logPosition.after(status.sequenceNumber) && !checkpoints.contains(status.sequenceNumber)) {
                    LOGGER.log(Level.FINEST, "checkpoint metadata file " + p.toAbsolutePath() + ". will be deleted after checkpoint end");
                    result.add(new DeleteFileAction(tableName, "delete checkpoint metadata file " + p.toAbsolutePath(), p));
                }
            }
        }
    } catch (IOException err) {
        LOGGER.log(Level.SEVERE, "Could not list table dir " + dir, err);
    }
    return result;
}
Also used : Path(java.nio.file.Path) DataStorageManagerException(herddb.storage.DataStorageManagerException) ArrayList(java.util.ArrayList) LogSequenceNumber(herddb.log.LogSequenceNumber) IOException(java.io.IOException) XXHash64Utils(herddb.utils.XXHash64Utils) ManagedFile(herddb.utils.ManagedFile) ExtendedDataOutputStream(herddb.utils.ExtendedDataOutputStream) PostCheckpointAction(herddb.core.PostCheckpointAction) SimpleBufferedOutputStream(herddb.utils.SimpleBufferedOutputStream) TableStatus(herddb.storage.TableStatus)

Aggregations

TableStatus (herddb.storage.TableStatus)10 DataStorageManagerException (herddb.storage.DataStorageManagerException)6 LogSequenceNumber (herddb.log.LogSequenceNumber)3 Record (herddb.model.Record)3 IOException (java.io.IOException)3 HerdDBInternalException (herddb.core.HerdDBInternalException)2 DataPageMetaData (herddb.core.PageSet.DataPageMetaData)2 FullTableScanConsumer (herddb.storage.FullTableScanConsumer)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 PostCheckpointAction (herddb.core.PostCheckpointAction)1 GetResult (herddb.model.GetResult)1 DeleteStatement (herddb.model.commands.DeleteStatement)1 GetStatement (herddb.model.commands.GetStatement)1 InsertStatement (herddb.model.commands.InsertStatement)1 UpdateStatement (herddb.model.commands.UpdateStatement)1 Bytes (herddb.utils.Bytes)1