use of herddb.storage.TableStatus in project herddb by diennea.
the class MemoryDataStorageManager method getLatestTableStatus.
@Override
public TableStatus getLatestTableStatus(String tableSpace, String tableName) throws DataStorageManagerException {
LogSequenceNumber max = null;
String prefix = tableSpace + "." + tableName + "_";
for (String status : tableStatuses.keySet()) {
if (status.startsWith(prefix)) {
final LogSequenceNumber log = evaluateLogSequenceNumber(prefix);
if (log != null) {
if (max == null || log.after(max)) {
max = log;
}
}
}
}
TableStatus latestStatus;
if (max == null) {
latestStatus = TableStatus.buildTableStatusForNewCreatedTable(tableName);
} else {
byte[] data = tableStatuses.get(checkpointName(tableSpace, tableName, max));
if (data == null) {
latestStatus = TableStatus.buildTableStatusForNewCreatedTable(tableName);
} else {
try {
try (InputStream input = new SimpleByteArrayInputStream(data);
ExtendedDataInputStream dataIn = new ExtendedDataInputStream(input)) {
latestStatus = TableStatus.deserialize(dataIn);
}
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
}
}
return latestStatus;
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class MemoryDataStorageManager method fullTableScan.
@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
TableStatus status = getLatestTableStatus(tableSpace, tableName);
fullTableScan(tableSpace, tableName, status, consumer);
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class MemoryDataStorageManager method fullTableScan.
@Override
public void fullTableScan(String tableSpace, String tableName, LogSequenceNumber sequenceNumber, FullTableScanConsumer consumer) throws DataStorageManagerException {
TableStatus status = getTableStatus(tableSpace, tableName, sequenceNumber);
fullTableScan(tableSpace, tableName, status, consumer);
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param cleanupTargetTime cleanup target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long cleanupTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
LOGGER.log(Level.FINE, "tableCheckpoint dirtyThreshold: " + dirtyThreshold + ", {0}.{1} (pin: {2})", new Object[] { tableSpaceUUID, table.name, pin });
if (createdInTransaction > 0) {
LOGGER.log(Level.FINE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = dirtyThreshold > 0 ? (long) (dirtyThreshold * maxLogicalPageSize) : -1;
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
long flushedRecords = 0;
List<CheckpointingPage> flushingDirtyPages = new ArrayList<>();
List<CheckpointingPage> flushingSmallPages = new ArrayList<>();
final Set<Long> flushedPages = new HashSet<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/* Check dirtiness (flush here even small pages if enough dirty) */
if (dirt > 0 && dirt >= dirtyPageThreshold) {
flushingDirtyPages.add(new CheckpointingPage(pageId, dirt, dirt > 0));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new CheckpointingPage(pageId, metadata.size, dirt > 0));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(CheckpointingPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(CheckpointingPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Should currently new rebuild page kept on memory or discarded? */
boolean keepFlushedPageInMemory = false;
/* New page actually rebuilt */
DataPage buildingPage = createMutablePage(nextPageId++, 0, 0);
if (!flushingDirtyPages.isEmpty()) {
final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(pageAnalysis, cleanupTargetTime));
/*
* Do not continue if we have used up all configured cleanup or checkpoint time (but still compact
* at least the smaller page (normally the leftover from last checkpoint)
*/
CleanAndCompactResult dirtyResult = cleanAndCompactPages(flushingDirtyPages, buildingPage, keepFlushedPageInMemory, timeLimit);
flushedDirtyPages = dirtyResult.flushedPages.size();
flushedPages.addAll(dirtyResult.flushedPages);
flushedRecords += dirtyResult.flushedRecords;
keepFlushedPageInMemory = dirtyResult.keepFlushedPageInMemory;
buildingPage = dirtyResult.buildingPage;
}
dirtyPagesFlush = System.currentTimeMillis();
/* **************************** */
/* *** Small pages handling *** */
/* **************************** */
/*
* Small pages could be dirty pages too so we need to check every page if has been already handled
* during dirty pages cleanup. Small pages should be a really small set (normally just last flushed
* page), the filter is then no critical or heavy to require some optimization
*/
/* Filter out dirty pages flushed from flushing small pages (a page could be "small" and "dirty") */
flushingSmallPages = flushingSmallPages.stream().filter(wp -> !flushedPages.contains(wp.pageId)).collect(Collectors.toList());
/*
* If there is only one clean small page without additional data to add rebuilding the page make no
* sense: is too probable to rebuild an identical page!
*/
if (/* Just one small page */
flushingSmallPages.size() == 1 && /* Not dirty */
!flushingSmallPages.get(0).dirty && /* No spare data remaining */
buildingPage.isEmpty() && /* No new data */
!newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent()) {
/* Avoid small page compaction */
flushingSmallPages.clear();
}
if (!flushingSmallPages.isEmpty()) {
final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(dirtyPagesFlush, compactionTargetTime));
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still
* compact at least the smaller page (normally the leftover from last checkpoint)
*/
CleanAndCompactResult smallResult = cleanAndCompactPages(flushingSmallPages, buildingPage, keepFlushedPageInMemory, timeLimit);
flushedSmallPages = smallResult.flushedPages.size();
flushedPages.addAll(smallResult.flushedPages);
flushedRecords += smallResult.flushedRecords;
keepFlushedPageInMemory = smallResult.keepFlushedPageInMemory;
buildingPage = smallResult.buildingPage;
}
smallPagesFlush = System.currentTimeMillis();
/* ************************** */
/* *** New pages handling *** */
/* ************************** */
/*
* Retrieve the "current" new page. It can be held in memory because no writes are executed during
* a checkpoint and thus the page cannot change (nor be flushed due to an unload because it isn't
* known to page replacement policy)
*/
final long lastKnownPageId = currentDirtyRecordsPage.get();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
/* Flush every dirty page (but not the "current" dirty page if empty) */
if (lastKnownPageId != dataPage.pageId || !dataPage.isEmpty()) {
flushNewPageForCheckpoint(dataPage, buildingPage);
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/*
* Flush remaining records.
*
* To keep or not flushed page in memory is a "best guess" here: we don't known if records that
* needed to be kept in memory were already be flushed during newPage filling (see
* flushNewPageForCheckpoint). So we still use keepFlushedPageInMemory (possibily true) even if
* remaining records came from an old unused page.
*/
if (!buildingPage.isEmpty()) {
flushMutablePage(buildingPage, keepFlushedPageInMemory);
} else {
/* Remove unused empty building page from memory */
pages.remove(buildingPage.pageId);
}
/*
* Never Never Never revert unused nextPageId! Even if we didn't used booked nextPageId is better to
* throw it away, reverting generated id could be "strange" for now but simply wrong in the future
* (if checkpoint will permit concurrent page creation for example..)
*/
newPagesFlush = System.currentTimeMillis();
if (flushedDirtyPages > 0 || flushedSmallPages > 0 || flushedNewPages > 0 || flushedRecords > 0) {
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.longToByteArray(nextPrimaryKeyValue.get()), nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(lastKnownPageId);
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
if (flushedRecords > 0) {
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class BookKeeperDataStorageManager method fullTableScan.
@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
try {
TableStatus status = getLatestTableStatus(tableSpace, tableName);
fullTableScan(tableSpace, tableName, status, consumer);
} catch (HerdDBInternalException err) {
throw new DataStorageManagerException(err);
}
}
Aggregations