use of herddb.model.Record in project herddb by diennea.
the class TableManager method buildImmutableDataPage.
private DataPage buildImmutableDataPage(long pageId, List<Record> page) {
Map<Bytes, Record> newPageMap = new HashMap<>();
long estimatedPageSize = 0;
for (Record r : page) {
newPageMap.put(r.key, r);
estimatedPageSize += DataPage.estimateEntrySize(r);
}
return buildImmutableDataPage(pageId, newPageMap, estimatedPageSize);
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method executeUpdate.
private StatementExecutionResult executeUpdate(UpdateStatement update, Transaction transaction, StatementEvaluationContext context) throws StatementExecutionException, DataStorageManagerException {
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<byte[]> lastValue = new Holder<>();
/*
an update can succeed only if the row is valid, the key is contains in the "keys" structure
the update will simply override the value of the row, assigning a null page to the row
the update can have a 'where' predicate which is to be evaluated against the decoded row, the update will be executed only if the predicate returns boolean 'true' value (CAS operation)
locks: the update uses a lock on the the key
*/
RecordFunction function = update.getFunction();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = update.getPredicate();
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record actual) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
byte[] newValue = function.computeNewValue(actual, context, tableContext);
final long size = DataPage.estimateEntrySize(actual.key, newValue);
if (size > maxLogicalPageSize) {
throw new RecordTooBigException("New version of record " + actual.key + " is to big to be update: new size " + size + ", actual size " + DataPage.estimateEntrySize(actual) + ", max size " + maxLogicalPageSize);
}
LogEntry entry = LogEntryFactory.update(table, actual.key.data, newValue, transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
apply(pos, entry, false);
lastKey.value = actual.key;
lastValue.value = newValue;
updateCount.incrementAndGet();
}
}, transaction, true, true);
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, update.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method executeDelete.
private StatementExecutionResult executeDelete(DeleteStatement delete, Transaction transaction, StatementEvaluationContext context) throws StatementExecutionException, DataStorageManagerException {
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<byte[]> lastValue = new Holder<>();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = delete.getPredicate();
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record actual) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
LogEntry entry = LogEntryFactory.delete(table, actual.key.data, transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
apply(pos, entry, false);
lastKey.value = actual.key;
lastValue.value = actual.value.data;
updateCount.incrementAndGet();
}
}, transaction, true, true);
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, delete.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method scanNoStream.
private DataScanner scanNoStream(ScanStatement statement, StatementEvaluationContext context, Transaction transaction, boolean lockRequired, boolean forWrite) throws StatementExecutionException {
boolean sorted = statement.getComparator() != null;
boolean sortedByClusteredIndex = statement.getComparator() != null && statement.getComparator().isOnlyPrimaryKeyAndAscending() && keyToPage.isSortedAscending();
final Projection projection = statement.getProjection();
boolean applyProjectionDuringScan = !sorted && projection != null;
MaterializedRecordSet recordSet;
if (applyProjectionDuringScan) {
recordSet = tableSpaceManager.getDbmanager().getRecordSetFactory().createRecordSet(projection.getFieldNames(), projection.getColumns());
} else {
recordSet = tableSpaceManager.getDbmanager().getRecordSetFactory().createRecordSet(table.columnNames, table.columns);
}
ScanLimits limits = statement.getLimits();
int maxRows = limits == null ? 0 : limits.computeMaxRows(context);
int offset = limits == null ? 0 : limits.computeOffset(context);
boolean sortDone = false;
if (maxRows > 0) {
if (sortedByClusteredIndex) {
// leverage the sorted nature of the clustered primary key index
AtomicInteger remaining = new AtomicInteger(maxRows);
if (offset > 0) {
remaining.getAndAdd(offset);
}
accessTableData(statement, context, new ScanResultOperation() {
private boolean inTransactionData;
@Override
public void beginNewRecordsInTransactionBlock() {
inTransactionData = true;
}
@Override
public void accept(Record record) throws StatementExecutionException {
if (applyProjectionDuringScan) {
DataAccessor tuple = projection.map(record.getDataAccessor(table), context);
recordSet.add(tuple);
} else {
recordSet.add(record.getDataAccessor(table));
}
if (!inTransactionData) {
// in the same order as the clustered index
if (remaining.decrementAndGet() == 0) {
// we want to receive transaction data uncommitted records too
throw new ExitLoop(true);
}
}
}
}, transaction, lockRequired, forWrite);
// we have to sort data any way, because accessTableData will return partially sorted data
sortDone = transaction == null;
} else if (sorted) {
InStreamTupleSorter sorter = new InStreamTupleSorter(offset + maxRows, statement.getComparator());
accessTableData(statement, context, new ScanResultOperation() {
@Override
public void accept(Record record) throws StatementExecutionException {
if (applyProjectionDuringScan) {
DataAccessor tuple = projection.map(record.getDataAccessor(table), context);
sorter.collect(tuple);
} else {
sorter.collect(record.getDataAccessor(table));
}
}
}, transaction, lockRequired, forWrite);
sorter.flushToRecordSet(recordSet);
sortDone = true;
} else {
// if no sort is present the limits can be applying during the scan and perform an early exit
AtomicInteger remaining = new AtomicInteger(maxRows);
if (offset > 0) {
remaining.getAndAdd(offset);
}
accessTableData(statement, context, new ScanResultOperation() {
@Override
public void accept(Record record) throws StatementExecutionException {
if (applyProjectionDuringScan) {
DataAccessor tuple = projection.map(record.getDataAccessor(table), context);
recordSet.add(tuple);
} else {
recordSet.add(record.getDataAccessor(table));
}
if (remaining.decrementAndGet() == 0) {
throw new ExitLoop(false);
}
}
}, transaction, lockRequired, forWrite);
}
} else {
accessTableData(statement, context, new ScanResultOperation() {
@Override
public void accept(Record record) throws StatementExecutionException {
if (applyProjectionDuringScan) {
DataAccessor tuple = projection.map(record.getDataAccessor(table), context);
recordSet.add(tuple);
} else {
recordSet.add(record.getDataAccessor(table));
}
}
}, transaction, lockRequired, forWrite);
}
recordSet.writeFinished();
if (!sortDone) {
recordSet.sort(statement.getComparator());
}
recordSet.applyLimits(statement.getLimits(), context);
if (!applyProjectionDuringScan) {
recordSet.applyProjection(statement.getProjection(), context);
}
return new SimpleDataScanner(transaction != null ? transaction.transactionId : 0, recordSet);
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
if (createdInTransaction > 0) {
LOGGER.log(Level.SEVERE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = (long) (dirtyThreshold * maxLogicalPageSize);
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
Map<Bytes, Record> buffer = new HashMap<>();
long bufferPageSize = 0;
long flushedRecords = 0;
final List<WeightedPage> flushingDirtyPages = new ArrayList<>();
final List<WeightedPage> flushingSmallPages = new ArrayList<>();
final List<Long> flushedPages = new ArrayList<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/*
* Check dirtiness (flush here even small pages if dirty. Small pages flush IGNORES dirty data
* handling).
*/
if (dirt > 0 && (dirt >= dirtyPageThreshold || metadata.size <= fillPageThreshold)) {
flushingDirtyPages.add(new WeightedPage(pageId, dirt));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new WeightedPage(pageId, metadata.size));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(WeightedPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(WeightedPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Rebuild dirty pages with only records to be kept */
for (WeightedPage weighted : flushingDirtyPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedDirtyPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded dirty page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Avoid the record if has been modified or deleted */
final Long currentPageId = keyToPage.get(record.key);
if (currentPageId == null || !weighted.pageId.equals(currentPageId)) {
continue;
}
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
/* Do not continue if we have used up all configured checkpoint time */
if (checkpointLimitInstant <= System.currentTimeMillis()) {
break;
}
}
dirtyPagesFlush = System.currentTimeMillis();
/*
* If there is only one without additional data to add
* rebuilding the page make no sense: is too probable to rebuild an identical page!
*/
if (flushingSmallPages.size() == 1 && buffer.isEmpty()) {
boolean hasNewPagesData = newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent();
if (!hasNewPagesData) {
flushingSmallPages.clear();
}
}
final long compactionLimitInstant = sumOverflowWise(dirtyPagesFlush, compactionTargetTime);
/* Rebuild too small pages */
for (WeightedPage weighted : flushingSmallPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedSmallPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded small page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
final long now = System.currentTimeMillis();
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still compact at
* least the smaller page (normally the leftover from last checkpoint)
*/
if (compactionLimitInstant <= now || checkpointLimitInstant <= now) {
break;
}
}
flushingSmallPages.clear();
smallPagesFlush = System.currentTimeMillis();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
if (!dataPage.isEmpty()) {
bufferPageSize -= flushNewPageForCheckpoint(dataPage, buffer);
// dataPage.makeImmutable();
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/* Flush remaining records */
if (!buffer.isEmpty()) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
}
newPagesFlush = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.from_long(nextPrimaryKeyValue.get()).data, nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/* Remove flushed pages handled */
for (Long pageId : flushedPages) {
final DataPage page = pages.remove(pageId);
/* Current dirty record page isn't known to page replacement policy */
if (page != null && currentDirtyRecordsPage.get() != page.pageId) {
pageReplacementPolicy.remove(page);
}
}
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(currentDirtyRecordsPage.get());
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
Aggregations