use of herddb.model.Record in project herddb by diennea.
the class TableManager method start.
@Override
public void start(boolean created) throws DataStorageManagerException {
Map<Long, DataPageMetaData> activePagesAtBoot = new HashMap<>();
dataStorageManager.initTable(tableSpaceUUID, table.uuid);
keyToPage.init();
bootSequenceNumber = LogSequenceNumber.START_OF_TIME;
boolean requireLoadAtStartup = keyToPage.requireLoadAtStartup();
if (requireLoadAtStartup) {
if (created) {
// this is a fresh new table, with in memory key-to-page
TableStatus tableStatus = TableStatus.buildTableStatusForNewCreatedTable(table.uuid);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
} else {
// non persistent primary key index, we need a full table scan
LOGGER.log(Level.INFO, "loading in memory all the keys for table {0}", new Object[] { table.name });
dataStorageManager.fullTableScan(tableSpaceUUID, table.uuid, new FullTableScanConsumer() {
@Override
public void acceptTableStatus(TableStatus tableStatus) {
LOGGER.log(Level.INFO, "recovery table at {0}", tableStatus.sequenceNumber);
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
@Override
public void acceptPage(long pageId, List<Record> records) {
for (Record record : records) {
keyToPage.put(record.key.nonShared(), pageId, null);
}
}
@Override
public void endTable() {
}
});
}
} else {
LOGGER.log(Level.INFO, "loading table {0}, uuid {1}", new Object[] { table.name, table.uuid });
TableStatus tableStatus = dataStorageManager.getLatestTableStatus(tableSpaceUUID, table.uuid);
if (!tableStatus.sequenceNumber.isStartOfTime()) {
LOGGER.log(Level.INFO, "recovery table at {0}", tableStatus.sequenceNumber);
}
nextPrimaryKeyValue.set(Bytes.toLong(tableStatus.nextPrimaryKeyValue, 0));
nextPageId = tableStatus.nextPageId;
bootSequenceNumber = tableStatus.sequenceNumber;
activePagesAtBoot.putAll(tableStatus.activePages);
}
keyToPage.start(bootSequenceNumber, created);
dataStorageManager.cleanupAfterTableBoot(tableSpaceUUID, table.uuid, activePagesAtBoot.keySet());
pageSet.setActivePagesAtBoot(activePagesAtBoot);
initNewPages();
if (!created) {
LOGGER.log(Level.INFO, "loaded {0} keys for table {1}, newPageId {2}, nextPrimaryKeyValue {3}, activePages {4}", new Object[] { keyToPage.size(), table.name, nextPageId, nextPrimaryKeyValue.get(), pageSet.getActivePages() + "" });
}
tableSpaceManager.rebuildForeignKeyReferences(table);
started = true;
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method accessTableData.
private void accessTableData(ScanStatement statement, StatementEvaluationContext context, ScanResultOperation consumer, Transaction transaction, boolean lockRequired, boolean forWrite) throws StatementExecutionException {
statement.validateContext(context);
Predicate predicate = statement.getPredicate();
long _start = System.currentTimeMillis();
boolean acquireLock = transaction != null || forWrite || lockRequired;
LocalScanPageCache lastPageRead = acquireLock ? null : new LocalScanPageCache();
AtomicInteger count = new AtomicInteger();
try {
IndexOperation indexOperation = predicate != null ? predicate.getIndexOperation() : null;
boolean primaryIndexSeek = indexOperation instanceof PrimaryIndexSeek;
AbstractIndexManager useIndex = getIndexForTbleAccess(indexOperation);
class RecordProcessor implements BatchOrderedExecutor.Executor<Entry<Bytes, Long>>, Consumer<Map.Entry<Bytes, Long>> {
@Override
public void execute(List<Map.Entry<Bytes, Long>> batch) throws HerdDBInternalException {
batch.forEach((entry) -> {
accept(entry);
});
}
@Override
public void accept(Entry<Bytes, Long> entry) throws DataStorageManagerException, StatementExecutionException, LogNotAvailableException {
if (transaction != null && count.incrementAndGet() % 1000 == 0) {
transaction.touch();
}
Bytes key = entry.getKey();
boolean already_locked = transaction != null && transaction.lookupLock(table.name, key) != null;
boolean record_discarded = !already_locked;
LockHandle lock = acquireLock ? (forWrite ? lockForWrite(key, transaction) : lockForRead(key, transaction)) : null;
// LOGGER.log(Level.SEVERE, "CREATED LOCK " + lock + " for " + key);
try {
if (transaction != null) {
if (transaction.recordDeleted(table.name, key)) {
// skip this record. inside current transaction it has been deleted
return;
}
Record record = transaction.recordUpdated(table.name, key);
if (record != null) {
// use current transaction version of the record
if (predicate == null || predicate.evaluate(record, context)) {
// now the consumer is the owner of the lock on the record
record_discarded = false;
consumer.accept(record, null);
}
return;
}
}
Long pageId = entry.getValue();
if (pageId != null) {
boolean pkFilterCompleteMatch = false;
if (!primaryIndexSeek && predicate != null) {
Predicate.PrimaryKeyMatchOutcome outcome = predicate.matchesRawPrimaryKey(key, context);
if (outcome == Predicate.PrimaryKeyMatchOutcome.FAILED) {
return;
} else if (outcome == Predicate.PrimaryKeyMatchOutcome.FULL_CONDITION_VERIFIED) {
pkFilterCompleteMatch = true;
}
}
Record record = fetchRecord(key, pageId, lastPageRead);
if (record != null && (pkFilterCompleteMatch || predicate == null || predicate.evaluate(record, context))) {
// now the consumer is the owner of the lock on the record
record_discarded = false;
consumer.accept(record, transaction == null ? lock : null);
}
}
} finally {
// release the lock on the key if it did not match scan criteria
if (record_discarded) {
if (transaction == null) {
locksManager.releaseLock(lock);
} else if (!already_locked) {
transaction.releaseLockOnKey(table.name, key, locksManager);
}
}
}
}
}
RecordProcessor scanExecutor = new RecordProcessor();
boolean exit = false;
try {
if (primaryIndexSeek) {
// we are expecting at most one record, no need for BatchOrderedExecutor
// this is the most common case for UPDATE-BY-PK and SELECT-BY-PK
// no need to craete and use Streams
PrimaryIndexSeek seek = (PrimaryIndexSeek) indexOperation;
Bytes value = Bytes.from_array(seek.value.computeNewValue(null, context, tableContext));
Long page = keyToPage.get(value);
if (page != null) {
Map.Entry<Bytes, Long> singleEntry = new AbstractMap.SimpleImmutableEntry<>(value, page);
scanExecutor.accept(singleEntry);
}
} else {
Stream<Map.Entry<Bytes, Long>> scanner = keyToPage.scanner(indexOperation, context, tableContext, useIndex);
BatchOrderedExecutor<Map.Entry<Bytes, Long>> executor = new BatchOrderedExecutor<>(SORTED_PAGE_ACCESS_WINDOW_SIZE, scanExecutor, SORTED_PAGE_ACCESS_COMPARATOR);
scanner.forEach(executor);
executor.finish();
}
} catch (ExitLoop exitLoop) {
exit = !exitLoop.continueWithTransactionData;
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "exit loop during scan {0}, started at {1}: {2}", new Object[] { statement, new java.sql.Timestamp(_start), exitLoop.toString() });
}
} catch (final HerdDBInternalException error) {
LOGGER.log(Level.SEVERE, "error during scan", error);
if (error.getCause() instanceof StatementExecutionException) {
throw (StatementExecutionException) error.getCause();
} else if (error.getCause() instanceof DataStorageManagerException) {
throw (DataStorageManagerException) error.getCause();
} else if (error instanceof StatementExecutionException) {
throw error;
} else if (error instanceof DataStorageManagerException) {
throw error;
} else {
throw new StatementExecutionException(error);
}
}
if (!exit && transaction != null) {
consumer.beginNewRecordsInTransactionBlock();
Collection<Record> newRecordsForTable = transaction.getNewRecordsForTable(table.name);
if (newRecordsForTable != null) {
newRecordsForTable.forEach(record -> {
if (!transaction.recordDeleted(table.name, record.key) && (predicate == null || predicate.evaluate(record, context))) {
consumer.accept(record, null);
}
});
}
}
} catch (ExitLoop exitLoop) {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "exit loop during scan {0}, started at {1}: {2}", new Object[] { statement, new java.sql.Timestamp(_start), exitLoop.toString() });
}
} catch (StatementExecutionException err) {
LOGGER.log(Level.SEVERE, "error during scan {0}, started at {1}: {2}", new Object[] { statement, new java.sql.Timestamp(_start), err.toString() });
throw err;
} catch (HerdDBInternalException err) {
LOGGER.log(Level.SEVERE, "error during scan {0}, started at {1}: {2}", new Object[] { statement, new java.sql.Timestamp(_start), err.toString() });
throw new StatementExecutionException(err);
}
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method streamTableData.
private Stream<Record> streamTableData(ScanStatement statement, StatementEvaluationContext context, Transaction transaction, boolean lockRequired, boolean forWrite) throws StatementExecutionException {
statement.validateContext(context);
Predicate predicate = statement.getPredicate();
boolean acquireLock = transaction != null || forWrite || lockRequired;
LocalScanPageCache lastPageRead = acquireLock ? null : new LocalScanPageCache();
IndexOperation indexOperation = predicate != null ? predicate.getIndexOperation() : null;
boolean primaryIndexSeek = indexOperation instanceof PrimaryIndexSeek;
AbstractIndexManager useIndex = getIndexForTbleAccess(indexOperation);
Stream<Map.Entry<Bytes, Long>> scanner = keyToPage.scanner(indexOperation, context, tableContext, useIndex);
Stream<Record> resultFromTable = scanner.map(entry -> {
return accessRecord(entry, predicate, context, transaction, lastPageRead, primaryIndexSeek, forWrite, acquireLock);
}).filter(r -> r != null);
return resultFromTable;
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method executeUpdateAsync.
private CompletableFuture<StatementExecutionResult> executeUpdateAsync(UpdateStatement update, Transaction transaction, StatementEvaluationContext context) throws StatementExecutionException, DataStorageManagerException {
// LOGGER.log(Level.SEVERE, "executeUpdateAsync, " + update + ", transaction " + transaction);
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<byte[]> lastValue = new Holder<>();
/*
an update can succeed only if the row is valid, the key is contains in the "keys" structure
the update will simply override the value of the row, assigning a null page to the row
the update can have a 'where' predicate which is to be evaluated against the decoded row, the update will be executed only if the predicate returns boolean 'true' value (CAS operation)
locks: the update uses a lock on the the key
*/
RecordFunction function = update.getFunction();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = update.getPredicate();
Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
List<CompletableFuture<PendingLogEntryWork>> writes = new ArrayList<>();
try {
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record current, LockHandle lockHandle) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
List<UniqueIndexLockReference> uniqueIndexes = null;
byte[] newValue;
try {
if (childrenTables != null) {
DataAccessor currentValues = current.getDataAccessor(table);
for (Table childTable : childrenTables) {
executeForeignKeyConstraintsAsParentTable(childTable, currentValues, context, transaction, false);
}
}
newValue = function.computeNewValue(current, context, tableContext);
if (indexes != null || table.foreignKeys != null) {
DataAccessor values = new Record(current.key, Bytes.from_array(newValue)).getDataAccessor(table);
if (table.foreignKeys != null) {
for (ForeignKeyDef fk : table.foreignKeys) {
checkForeignKeyConstraintsAsChildTable(fk, values, context, transaction);
}
}
if (indexes != null) {
for (AbstractIndexManager index : indexes.values()) {
if (index.isUnique()) {
Bytes indexKey = RecordSerializer.serializeIndexKey(values, index.getIndex(), index.getColumnNames());
if (uniqueIndexes == null) {
uniqueIndexes = new ArrayList<>(1);
}
UniqueIndexLockReference uniqueIndexLock = new UniqueIndexLockReference(index, indexKey);
uniqueIndexes.add(uniqueIndexLock);
LockHandle lockForIndex = lockForWrite(uniqueIndexLock.key, transaction, index.getIndexName(), index.getLockManager());
if (transaction == null) {
uniqueIndexLock.lockHandle = lockForIndex;
}
if (index.valueAlreadyMapped(indexKey, current.key)) {
throw new UniqueIndexContraintViolationException(index.getIndexName(), indexKey, "Value " + indexKey + " already present in index " + index.getIndexName());
}
} else {
RecordSerializer.validateIndexableValue(values, index.getIndex(), index.getColumnNames());
}
}
}
}
} catch (IllegalArgumentException | herddb.utils.IllegalDataAccessException | StatementExecutionException err) {
locksManager.releaseLock(lockHandle);
StatementExecutionException finalError;
if (!(err instanceof StatementExecutionException)) {
finalError = new StatementExecutionException(err.getMessage(), err);
} else {
finalError = (StatementExecutionException) err;
}
CompletableFuture<PendingLogEntryWork> res = Futures.exception(finalError);
if (uniqueIndexes != null) {
for (UniqueIndexLockReference lock : uniqueIndexes) {
res = releaseWriteLock(res, lock.lockHandle, lock.indexManager.getLockManager());
}
}
writes.add(res);
return;
}
final long size = DataPage.estimateEntrySize(current.key, newValue);
if (size > maxLogicalPageSize) {
locksManager.releaseLock(lockHandle);
writes.add(Futures.exception(new RecordTooBigException("New version of record " + current.key + " is to big to be update: new size " + size + ", actual size " + DataPage.estimateEntrySize(current) + ", max size " + maxLogicalPageSize)));
return;
}
LogEntry entry = LogEntryFactory.update(table, current.key, Bytes.from_array(newValue), transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
final List<UniqueIndexLockReference> _uniqueIndexes = uniqueIndexes;
writes.add(pos.logSequenceNumber.thenApply(lsn -> new PendingLogEntryWork(entry, pos, lockHandle, _uniqueIndexes)));
lastKey.value = current.key;
lastValue.value = newValue;
updateCount.incrementAndGet();
}
}, transaction, true, true);
} catch (HerdDBInternalException err) {
LOGGER.log(Level.SEVERE, "bad error during an update", err);
return Futures.exception(err);
}
if (writes.isEmpty()) {
return CompletableFuture.completedFuture(new DMLStatementExecutionResult(transactionId, 0, null, null));
}
if (writes.size() == 1) {
return writes.get(0).whenCompleteAsync((pending, error) -> {
try {
// apply any of the DML operations
if (error == null) {
apply(pending.pos, pending.entry, false);
}
} finally {
releaseMultiplePendingLogEntryWorks(writes);
}
}, tableSpaceManager.getCallbacksExecutor()).thenApply((pending) -> {
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, update.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
});
} else {
return Futures.collect(writes).whenCompleteAsync((pendings, error) -> {
try {
// apply any of the DML operations
if (error == null) {
for (PendingLogEntryWork pending : pendings) {
apply(pending.pos, pending.entry, false);
}
}
} finally {
releaseMultiplePendingLogEntryWorks(writes);
}
}, tableSpaceManager.getCallbacksExecutor()).thenApply((pendings) -> {
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, update.isReturnValues() ? (lastValue.value != null ? Bytes.from_array(lastValue.value) : null) : null);
});
}
}
use of herddb.model.Record in project herddb by diennea.
the class TableManager method onTransactionCommit.
@Override
public void onTransactionCommit(Transaction transaction, boolean recovery) throws DataStorageManagerException {
if (transaction == null) {
throw new DataStorageManagerException("transaction cannot be null");
}
boolean forceFlushTableData = false;
if (createdInTransaction > 0) {
if (transaction.transactionId != createdInTransaction) {
throw new DataStorageManagerException("table " + table.tablespace + "." + table.name + " is available only on transaction " + createdInTransaction);
}
createdInTransaction = 0;
forceFlushTableData = true;
}
if (!transaction.lastSequenceNumber.after(bootSequenceNumber)) {
if (recovery) {
LOGGER.log(Level.FINER, "ignoring transaction {0} commit on recovery, {1}.{2} data is newer: transaction {3}, table {4}", new Object[] { transaction.transactionId, table.tablespace, table.name, transaction.lastSequenceNumber, bootSequenceNumber });
return;
} else {
throw new DataStorageManagerException("corrupted commit log " + table.tablespace + "." + table.name + " data is newer than transaction " + transaction.transactionId + " transaction " + transaction.lastSequenceNumber + " table " + bootSequenceNumber);
}
}
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asReadLock().tryLock(CHECKPOINT_LOCK_READ_TIMEOUT, SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while acquiring checkpoint lock during a commit", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while acquiring checkpoint lock during a commit");
}
try {
Map<Bytes, Record> changedRecords = transaction.changedRecords.get(table.name);
// transaction is still holding locks on each record, so we can change records
Map<Bytes, Record> newRecords = transaction.newRecords.get(table.name);
if (newRecords != null) {
for (Record record : newRecords.values()) {
applyInsert(record.key, record.value, true);
}
}
if (changedRecords != null) {
for (Record r : changedRecords.values()) {
applyUpdate(r.key, r.value);
}
}
Set<Bytes> deletedRecords = transaction.deletedRecords.get(table.name);
if (deletedRecords != null) {
for (Bytes key : deletedRecords) {
applyDelete(key);
}
}
} finally {
checkpointLock.asReadLock().unlock();
}
transaction.releaseLocksOnTable(table.name, locksManager);
if (forceFlushTableData) {
LOGGER.log(Level.FINE, "forcing local checkpoint, table " + table.name + " will be visible to all transactions now");
checkpoint(false);
}
}
Aggregations