use of herddb.storage.TableStatus in project herddb by diennea.
the class TableManager method start.
@Override
public void start() throws DataStorageManagerException {
Map<Long, DataPageMetaData> activePagesAtBoot = new HashMap<>();
bootSequenceNumber = LogSequenceNumber.START_OF_TIME;
boolean requireLoadAtStartup = keyToPage.requireLoadAtStartup();
if (requireLoadAtStartup) {
// non persistent primary key index, we need a full table scan
LOGGER.log(Level.SEVERE, "loading in memory all the keys for table {0}", new Object[] { table.name });
dataStorageManager.fullTableScan(tableSpaceUUID, table.uuid, new FullTableScanConsumer() {
Long currentPage;
@Override
public void acceptTableStatus(TableStatus tableStatus) {
LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
@Override
public void startPage(long pageId) {
currentPage = pageId;
}
@Override
public void acceptRecord(Record record) {
if (currentPage < 0) {
throw new IllegalStateException();
}
keyToPage.put(record.key, currentPage);
}
@Override
public void endPage() {
currentPage = null;
}
@Override
public void endTable() {
}
});
} else {
LOGGER.log(Level.SEVERE, "loading table {0}, uuid {1}", new Object[] { table.name, table.uuid });
TableStatus tableStatus = dataStorageManager.getLatestTableStatus(tableSpaceUUID, table.uuid);
LOGGER.log(Level.SEVERE, "recovery table at " + tableStatus.sequenceNumber);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
keyToPage.start(bootSequenceNumber);
dataStorageManager.cleanupAfterBoot(tableSpaceUUID, table.uuid, activePagesAtBoot.keySet());
pageSet.setActivePagesAtBoot(activePagesAtBoot);
initNewPage();
LOGGER.log(Level.SEVERE, "loaded {0} keys for table {1}, newPageId {2}, nextPrimaryKeyValue {3}, activePages {4}", new Object[] { keyToPage.size(), table.name, nextPageId, nextPrimaryKeyValue.get(), pageSet.getActivePages() + "" });
started = true;
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class FileDataStorageManager method getLatestTableStatus.
@Override
public TableStatus getLatestTableStatus(String tableSpace, String tableName) throws DataStorageManagerException {
try {
Path lastFile = getLastTableCheckpointFile(tableSpace, tableName);
TableStatus latestStatus;
if (lastFile == null) {
latestStatus = new TableStatus(tableName, LogSequenceNumber.START_OF_TIME, Bytes.from_long(1).data, 1, Collections.emptyMap());
} else {
latestStatus = readTableStatusFromFile(lastFile);
}
return latestStatus;
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
if (createdInTransaction > 0) {
LOGGER.log(Level.SEVERE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = (long) (dirtyThreshold * maxLogicalPageSize);
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
Map<Bytes, Record> buffer = new HashMap<>();
long bufferPageSize = 0;
long flushedRecords = 0;
final List<WeightedPage> flushingDirtyPages = new ArrayList<>();
final List<WeightedPage> flushingSmallPages = new ArrayList<>();
final List<Long> flushedPages = new ArrayList<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/*
* Check dirtiness (flush here even small pages if dirty. Small pages flush IGNORES dirty data
* handling).
*/
if (dirt > 0 && (dirt >= dirtyPageThreshold || metadata.size <= fillPageThreshold)) {
flushingDirtyPages.add(new WeightedPage(pageId, dirt));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new WeightedPage(pageId, metadata.size));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(WeightedPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(WeightedPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Rebuild dirty pages with only records to be kept */
for (WeightedPage weighted : flushingDirtyPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedDirtyPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded dirty page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Avoid the record if has been modified or deleted */
final Long currentPageId = keyToPage.get(record.key);
if (currentPageId == null || !weighted.pageId.equals(currentPageId)) {
continue;
}
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
/* Do not continue if we have used up all configured checkpoint time */
if (checkpointLimitInstant <= System.currentTimeMillis()) {
break;
}
}
dirtyPagesFlush = System.currentTimeMillis();
/*
* If there is only one without additional data to add
* rebuilding the page make no sense: is too probable to rebuild an identical page!
*/
if (flushingSmallPages.size() == 1 && buffer.isEmpty()) {
boolean hasNewPagesData = newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent();
if (!hasNewPagesData) {
flushingSmallPages.clear();
}
}
final long compactionLimitInstant = sumOverflowWise(dirtyPagesFlush, compactionTargetTime);
/* Rebuild too small pages */
for (WeightedPage weighted : flushingSmallPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedSmallPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded small page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
final long now = System.currentTimeMillis();
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still compact at
* least the smaller page (normally the leftover from last checkpoint)
*/
if (compactionLimitInstant <= now || checkpointLimitInstant <= now) {
break;
}
}
flushingSmallPages.clear();
smallPagesFlush = System.currentTimeMillis();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
if (!dataPage.isEmpty()) {
bufferPageSize -= flushNewPageForCheckpoint(dataPage, buffer);
// dataPage.makeImmutable();
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/* Flush remaining records */
if (!buffer.isEmpty()) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
}
newPagesFlush = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.from_long(nextPrimaryKeyValue.get()).data, nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/* Remove flushed pages handled */
for (Long pageId : flushedPages) {
final DataPage page = pages.remove(pageId);
/* Current dirty record page isn't known to page replacement policy */
if (page != null && currentDirtyRecordsPage.get() != page.pageId) {
pageReplacementPolicy.remove(page);
}
}
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(currentDirtyRecordsPage.get());
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class FileDataStorageManager method fullTableScan.
@Override
public void fullTableScan(String tableSpace, String tableName, FullTableScanConsumer consumer) throws DataStorageManagerException {
try {
TableStatus status = getLatestTableStatus(tableSpace, tableName);
fullTableScan(tableSpace, tableName, status, consumer);
} catch (HerdDBInternalException err) {
throw new DataStorageManagerException(err);
}
}
use of herddb.storage.TableStatus in project herddb by diennea.
the class FileDataStorageManager method tableCheckpoint.
@Override
public List<PostCheckpointAction> tableCheckpoint(String tableSpace, String tableName, TableStatus tableStatus, boolean pin) throws DataStorageManagerException {
LogSequenceNumber logPosition = tableStatus.sequenceNumber;
Path dir = getTableDirectory(tableSpace, tableName);
Path checkpointFile = getTableCheckPointsFile(dir, logPosition);
try {
Files.createDirectories(dir);
if (Files.isRegularFile(checkpointFile)) {
TableStatus actualStatus = readTableStatusFromFile(checkpointFile);
if (actualStatus != null && actualStatus.equals(tableStatus)) {
LOGGER.log(Level.INFO, "tableCheckpoint " + tableSpace + ", " + tableName + ": " + tableStatus + " already saved on file " + checkpointFile);
return Collections.emptyList();
}
}
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
Path parent = getParent(checkpointFile);
Path checkpointFileTemp = parent.resolve(checkpointFile.getFileName() + ".tmp");
LOGGER.log(Level.FINE, "tableCheckpoint " + tableSpace + ", " + tableName + ": " + tableStatus + " to file " + checkpointFile);
try (ManagedFile file = ManagedFile.open(checkpointFileTemp);
SimpleBufferedOutputStream buffer = new SimpleBufferedOutputStream(file.getOutputStream(), COPY_BUFFERS_SIZE);
XXHash64Utils.HashingOutputStream oo = new XXHash64Utils.HashingOutputStream(buffer);
ExtendedDataOutputStream dataOutputKeys = new ExtendedDataOutputStream(oo)) {
// version
dataOutputKeys.writeVLong(1);
// flags for future implementations
dataOutputKeys.writeVLong(0);
tableStatus.serialize(dataOutputKeys);
dataOutputKeys.writeLong(oo.hash());
dataOutputKeys.flush();
file.sync();
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
try {
Files.move(checkpointFileTemp, checkpointFile, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
/* Checkpoint pinning */
final Map<Long, Integer> pins = pinTableAndGetPages(tableSpace, tableName, tableStatus, pin);
final Set<LogSequenceNumber> checkpoints = pinTableAndGetCheckpoints(tableSpace, tableName, tableStatus, pin);
long maxPageId = tableStatus.activePages.keySet().stream().max(Comparator.naturalOrder()).orElse(Long.MAX_VALUE);
List<PostCheckpointAction> result = new ArrayList<>();
// we can drop old page files now
List<Path> pageFiles = getTablePageFiles(tableSpace, tableName);
for (Path p : pageFiles) {
long pageId = getPageId(p);
LOGGER.log(Level.FINEST, "checkpoint file {0} pageId {1}", new Object[] { p.toAbsolutePath(), pageId });
if (pageId > 0 && !pins.containsKey(pageId) && !tableStatus.activePages.containsKey(pageId) && pageId < maxPageId) {
LOGGER.log(Level.FINEST, "checkpoint file " + p.toAbsolutePath() + " pageId " + pageId + ". will be deleted after checkpoint end");
result.add(new DeleteFileAction(tableName, "delete page " + pageId + " file " + p.toAbsolutePath(), p));
}
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path p : stream) {
if (isTableOrIndexCheckpointsFile(p) && !p.equals(checkpointFile)) {
TableStatus status = readTableStatusFromFile(p);
if (logPosition.after(status.sequenceNumber) && !checkpoints.contains(status.sequenceNumber)) {
LOGGER.log(Level.FINEST, "checkpoint metadata file " + p.toAbsolutePath() + ". will be deleted after checkpoint end");
result.add(new DeleteFileAction(tableName, "delete checkpoint metadata file " + p.toAbsolutePath(), p));
}
}
}
} catch (IOException err) {
LOGGER.log(Level.SEVERE, "Could not list table dir " + dir, err);
}
return result;
}
Aggregations