use of herddb.core.PageSet.DataPageMetaData in project herddb by diennea.
the class TableStatus method deserialize.
public static TableStatus deserialize(ExtendedDataInputStream in) throws IOException {
// version
long version = in.readVLong();
// flags for future implementations
long flags = in.readVLong();
if (version != 1 || flags != 0) {
throw new DataStorageManagerException("corrupted table status");
}
String tableName = in.readUTF();
long ledgerId = in.readLong();
long offset = in.readLong();
long nextPageId = in.readLong();
byte[] nextPrimaryKeyValue = in.readArray();
int numActivePages = in.readVInt();
Map<Long, DataPageMetaData> activePages = new HashMap<>(numActivePages);
for (int i = 0; i < numActivePages; i++) {
activePages.put(in.readVLong(), DataPageMetaData.deserialize(in));
}
return new TableStatus(tableName, new LogSequenceNumber(ledgerId, offset), nextPrimaryKeyValue, nextPageId, activePages);
}
use of herddb.core.PageSet.DataPageMetaData in project herddb by diennea.
the class TableManager method start.
@Override
public void start() throws DataStorageManagerException {
Map<Long, DataPageMetaData> activePagesAtBoot = new HashMap<>();
bootSequenceNumber = LogSequenceNumber.START_OF_TIME;
boolean requireLoadAtStartup = keyToPage.requireLoadAtStartup();
if (requireLoadAtStartup) {
// non persistent primary key index, we need a full table scan
LOGGER.log(Level.SEVERE, "loading in memory all the keys for table {0}", new Object[] { table.name });
dataStorageManager.fullTableScan(tableSpaceUUID, table.uuid, new FullTableScanConsumer() {
Long currentPage;
@Override
public void acceptTableStatus(TableStatus tableStatus) {
LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
@Override
public void startPage(long pageId) {
currentPage = pageId;
}
@Override
public void acceptRecord(Record record) {
if (currentPage < 0) {
throw new IllegalStateException();
}
keyToPage.put(record.key, currentPage);
}
@Override
public void endPage() {
currentPage = null;
}
@Override
public void endTable() {
}
});
} else {
LOGGER.log(Level.SEVERE, "loading table {0}, uuid {1}", new Object[] { table.name, table.uuid });
TableStatus tableStatus = dataStorageManager.getLatestTableStatus(tableSpaceUUID, table.uuid);
LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
keyToPage.start(bootSequenceNumber);
dataStorageManager.cleanupAfterBoot(tableSpaceUUID, table.uuid, activePagesAtBoot.keySet());
pageSet.setActivePagesAtBoot(activePagesAtBoot);
initNewPage();
LOGGER.log(Level.SEVERE, "loaded {0} keys for table {1}, newPageId {2}, nextPrimaryKeyValue {3}, activePages {4}", new Object[] { keyToPage.size(), table.name, nextPageId, nextPrimaryKeyValue.get(), pageSet.getActivePages() + "" });
started = true;
}
use of herddb.core.PageSet.DataPageMetaData in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
if (createdInTransaction > 0) {
LOGGER.log(Level.SEVERE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = (long) (dirtyThreshold * maxLogicalPageSize);
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
Map<Bytes, Record> buffer = new HashMap<>();
long bufferPageSize = 0;
long flushedRecords = 0;
final List<WeightedPage> flushingDirtyPages = new ArrayList<>();
final List<WeightedPage> flushingSmallPages = new ArrayList<>();
final List<Long> flushedPages = new ArrayList<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/*
* Check dirtiness (flush here even small pages if dirty. Small pages flush IGNORES dirty data
* handling).
*/
if (dirt > 0 && (dirt >= dirtyPageThreshold || metadata.size <= fillPageThreshold)) {
flushingDirtyPages.add(new WeightedPage(pageId, dirt));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new WeightedPage(pageId, metadata.size));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(WeightedPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(WeightedPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Rebuild dirty pages with only records to be kept */
for (WeightedPage weighted : flushingDirtyPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedDirtyPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded dirty page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Avoid the record if has been modified or deleted */
final Long currentPageId = keyToPage.get(record.key);
if (currentPageId == null || !weighted.pageId.equals(currentPageId)) {
continue;
}
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
/* Do not continue if we have used up all configured checkpoint time */
if (checkpointLimitInstant <= System.currentTimeMillis()) {
break;
}
}
dirtyPagesFlush = System.currentTimeMillis();
/*
* If there is only one without additional data to add
* rebuilding the page make no sense: is too probable to rebuild an identical page!
*/
if (flushingSmallPages.size() == 1 && buffer.isEmpty()) {
boolean hasNewPagesData = newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent();
if (!hasNewPagesData) {
flushingSmallPages.clear();
}
}
final long compactionLimitInstant = sumOverflowWise(dirtyPagesFlush, compactionTargetTime);
/* Rebuild too small pages */
for (WeightedPage weighted : flushingSmallPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedSmallPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded small page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
final long now = System.currentTimeMillis();
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still compact at
* least the smaller page (normally the leftover from last checkpoint)
*/
if (compactionLimitInstant <= now || checkpointLimitInstant <= now) {
break;
}
}
flushingSmallPages.clear();
smallPagesFlush = System.currentTimeMillis();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
if (!dataPage.isEmpty()) {
bufferPageSize -= flushNewPageForCheckpoint(dataPage, buffer);
// dataPage.makeImmutable();
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/* Flush remaining records */
if (!buffer.isEmpty()) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
}
newPagesFlush = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.from_long(nextPrimaryKeyValue.get()).data, nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/* Remove flushed pages handled */
for (Long pageId : flushedPages) {
final DataPage page = pages.remove(pageId);
/* Current dirty record page isn't known to page replacement policy */
if (page != null && currentDirtyRecordsPage.get() != page.pageId) {
pageReplacementPolicy.remove(page);
}
}
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(currentDirtyRecordsPage.get());
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
Aggregations