use of herddb.storage.DataStorageManagerException in project herddb by diennea.
the class TableSpaceManager method downloadTableSpaceData.
private void downloadTableSpaceData() throws MetadataStorageManagerException, DataStorageManagerException, LogNotAvailableException {
TableSpace tableSpaceData = metadataStorageManager.describeTableSpace(tableSpaceName);
String leaderId = tableSpaceData.leaderId;
if (this.nodeId.equals(leaderId)) {
throw new DataStorageManagerException("cannot download data of tableSpace " + tableSpaceName + " from myself");
}
Optional<NodeMetadata> leaderAddress = metadataStorageManager.listNodes().stream().filter(n -> n.nodeId.equals(leaderId)).findAny();
if (!leaderAddress.isPresent()) {
throw new DataStorageManagerException("cannot download data of tableSpace " + tableSpaceName + " from leader " + leaderId + ", no metadata found");
}
NodeMetadata nodeData = leaderAddress.get();
ClientConfiguration clientConfiguration = new ClientConfiguration(dbmanager.getTmpDirectory());
clientConfiguration.set(ClientConfiguration.PROPERTY_CLIENT_USERNAME, dbmanager.getServerToServerUsername());
clientConfiguration.set(ClientConfiguration.PROPERTY_CLIENT_PASSWORD, dbmanager.getServerToServerPassword());
try (HDBClient client = new HDBClient(clientConfiguration)) {
client.setClientSideMetadataProvider(new ClientSideMetadataProvider() {
@Override
public String getTableSpaceLeader(String tableSpace) throws ClientSideMetadataProviderException {
return leaderId;
}
@Override
public ServerHostData getServerHostData(String nodeId) throws ClientSideMetadataProviderException {
return new ServerHostData(nodeData.host, nodeData.port, "?", nodeData.ssl, Collections.emptyMap());
}
});
try (HDBConnection con = client.openConnection()) {
ReplicaFullTableDataDumpReceiver receiver = new ReplicaFullTableDataDumpReceiver(this);
int fetchSize = 10000;
con.dumpTableSpace(tableSpaceName, receiver, fetchSize, false);
long _start = System.currentTimeMillis();
boolean ok = receiver.join(1000 * 60 * 60);
if (!ok) {
throw new DataStorageManagerException("Cannot receive dump within " + (System.currentTimeMillis() - _start) + " ms");
}
if (receiver.getError() != null) {
throw new DataStorageManagerException("Error while receiving dump: " + receiver.getError(), receiver.getError());
}
this.actualLogSequenceNumber = receiver.logSequenceNumber;
LOGGER.log(Level.SEVERE, "After download local actualLogSequenceNumber is " + actualLogSequenceNumber);
} catch (ClientSideMetadataProviderException | HDBException | InterruptedException networkError) {
throw new DataStorageManagerException(networkError);
}
}
}
use of herddb.storage.DataStorageManagerException in project herddb by diennea.
the class TableManager method executeUpdate.
private StatementExecutionResult executeUpdate(UpdateStatement update, Transaction transaction, StatementEvaluationContext context) throws StatementExecutionException, DataStorageManagerException {
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<byte[]> lastValue = new Holder<>();
/*
an update can succeed only if the row is valid, the key is contains in the "keys" structure
the update will simply override the value of the row, assigning a null page to the row
the update can have a 'where' predicate which is to be evaluated against the decoded row, the update will be executed only if the predicate returns boolean 'true' value (CAS operation)
locks: the update uses a lock on the the key
*/
RecordFunction function = update.getFunction();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = update.getPredicate();
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record actual) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
byte[] newValue = function.computeNewValue(actual, context, tableContext);
final long size = DataPage.estimateEntrySize(actual.key, newValue);
if (size > maxLogicalPageSize) {
throw new RecordTooBigException("New version of record " + actual.key + " is to big to be update: new size " + size + ", actual size " + DataPage.estimateEntrySize(actual) + ", max size " + maxLogicalPageSize);
}
LogEntry entry = LogEntryFactory.update(table, actual.key.data, newValue, transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
apply(pos, entry, false);
lastKey.value = actual.key;
lastValue.value = newValue;
updateCount.incrementAndGet();
}
}, transaction, true, true);
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, update.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
}
use of herddb.storage.DataStorageManagerException in project herddb by diennea.
the class TableManager method executeDelete.
private StatementExecutionResult executeDelete(DeleteStatement delete, Transaction transaction, StatementEvaluationContext context) throws StatementExecutionException, DataStorageManagerException {
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<byte[]> lastValue = new Holder<>();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = delete.getPredicate();
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record actual) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
LogEntry entry = LogEntryFactory.delete(table, actual.key.data, transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
apply(pos, entry, false);
lastKey.value = actual.key;
lastValue.value = actual.value.data;
updateCount.incrementAndGet();
}
}, transaction, true, true);
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, delete.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
}
use of herddb.storage.DataStorageManagerException in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
if (createdInTransaction > 0) {
LOGGER.log(Level.SEVERE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = (long) (dirtyThreshold * maxLogicalPageSize);
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
Map<Bytes, Record> buffer = new HashMap<>();
long bufferPageSize = 0;
long flushedRecords = 0;
final List<WeightedPage> flushingDirtyPages = new ArrayList<>();
final List<WeightedPage> flushingSmallPages = new ArrayList<>();
final List<Long> flushedPages = new ArrayList<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/*
* Check dirtiness (flush here even small pages if dirty. Small pages flush IGNORES dirty data
* handling).
*/
if (dirt > 0 && (dirt >= dirtyPageThreshold || metadata.size <= fillPageThreshold)) {
flushingDirtyPages.add(new WeightedPage(pageId, dirt));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new WeightedPage(pageId, metadata.size));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(WeightedPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(WeightedPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Rebuild dirty pages with only records to be kept */
for (WeightedPage weighted : flushingDirtyPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedDirtyPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded dirty page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Avoid the record if has been modified or deleted */
final Long currentPageId = keyToPage.get(record.key);
if (currentPageId == null || !weighted.pageId.equals(currentPageId)) {
continue;
}
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
/* Do not continue if we have used up all configured checkpoint time */
if (checkpointLimitInstant <= System.currentTimeMillis()) {
break;
}
}
dirtyPagesFlush = System.currentTimeMillis();
/*
* If there is only one without additional data to add
* rebuilding the page make no sense: is too probable to rebuild an identical page!
*/
if (flushingSmallPages.size() == 1 && buffer.isEmpty()) {
boolean hasNewPagesData = newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent();
if (!hasNewPagesData) {
flushingSmallPages.clear();
}
}
final long compactionLimitInstant = sumOverflowWise(dirtyPagesFlush, compactionTargetTime);
/* Rebuild too small pages */
for (WeightedPage weighted : flushingSmallPages) {
/* Page flushed */
flushedPages.add(weighted.pageId);
++flushedSmallPages;
final DataPage dataPage = pages.get(weighted.pageId);
final Collection<Record> records;
if (dataPage == null) {
records = dataStorageManager.readPage(tableSpaceUUID, table.uuid, weighted.pageId);
LOGGER.log(Level.FINEST, "loaded small page {0} on tmp buffer: {1} records", new Object[] { weighted.pageId, records.size() });
} else {
records = dataPage.data.values();
}
for (Record record : records) {
/* Flush the page if it would exceed max page size */
if (bufferPageSize + DataPage.estimateEntrySize(record) > maxLogicalPageSize) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
buffer = new HashMap<>(buffer.size());
}
buffer.put(record.key, record);
bufferPageSize += DataPage.estimateEntrySize(record);
}
final long now = System.currentTimeMillis();
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still compact at
* least the smaller page (normally the leftover from last checkpoint)
*/
if (compactionLimitInstant <= now || checkpointLimitInstant <= now) {
break;
}
}
flushingSmallPages.clear();
smallPagesFlush = System.currentTimeMillis();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
if (!dataPage.isEmpty()) {
bufferPageSize -= flushNewPageForCheckpoint(dataPage, buffer);
// dataPage.makeImmutable();
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/* Flush remaining records */
if (!buffer.isEmpty()) {
createImmutablePage(buffer, bufferPageSize);
flushedRecords += buffer.size();
bufferPageSize = 0;
/* Do not clean old buffer! It will used in generated pages to avoid too many copies! */
}
newPagesFlush = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.from_long(nextPrimaryKeyValue.get()).data, nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/* Remove flushed pages handled */
for (Long pageId : flushedPages) {
final DataPage page = pages.remove(pageId);
/* Current dirty record page isn't known to page replacement policy */
if (page != null && currentDirtyRecordsPage.get() != page.pageId) {
pageReplacementPolicy.remove(page);
}
}
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(currentDirtyRecordsPage.get());
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
use of herddb.storage.DataStorageManagerException in project herddb by diennea.
the class TableManager method fetchRecord.
private Record fetchRecord(Bytes key, Long pageId, LocalScanPageCache localScanPageCache) throws StatementExecutionException, DataStorageManagerException {
int maxTrials = 2;
while (true) {
DataPage dataPage = fetchDataPage(pageId, localScanPageCache);
if (dataPage != null) {
Record record = dataPage.get(key);
if (record != null) {
return record;
}
}
Long relocatedPageId = keyToPage.get(key);
LOGGER.log(Level.SEVERE, table.name + " fetchRecord " + key + " failed," + "checkPointRunning:" + checkPointRunning + " pageId:" + pageId + " relocatedPageId:" + relocatedPageId);
if (relocatedPageId == null) {
// deleted
LOGGER.log(Level.SEVERE, "table " + table.name + ", activePages " + pageSet.getActivePages() + ", record " + key + " deleted during data access");
return null;
}
pageId = relocatedPageId;
if (maxTrials-- == 0) {
throw new DataStorageManagerException("inconsistency! table " + table.name + " no record in memory for " + key + " page " + pageId + ", activePages " + pageSet.getActivePages() + " after many trials");
}
}
}
Aggregations