use of herddb.model.Index in project herddb by diennea.
the class TableManager method applyUpdate.
private void applyUpdate(Bytes key, Bytes value) throws DataStorageManagerException {
// do not want to retain shared buffers as keys
key = key.nonShared();
/*
* New record to be updated, it will always updated if there aren't errors thus is simpler to create
* the record now
*/
final Record record = new Record(key, value);
/* This could be a normal or a temporary modifiable page */
final Long prevPageId = keyToPage.get(key);
if (prevPageId == null) {
throw new IllegalStateException("corrupted transaction log: key " + key + " is not present in table " + table.tablespace + "." + table.name);
}
/*
* We'll try to replace the record if in a writable page, otherwise we'll simply set the old page
* as dirty and continue like a normal insertion
*/
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
/*
* When index is enabled we need the old value to update them, we'll force the page load only if that
* record is really needed.
*/
final DataPage prevPage;
final Record previous;
boolean insertedInSamePage = false;
if (indexes == null) {
/* We don't need the page if isn't loaded or isn't a mutable new page*/
prevPage = newPages.get(prevPageId);
if (prevPage != null) {
pageReplacementPolicy.pageHit(prevPage);
previous = prevPage.get(key);
if (previous == null) {
throw new IllegalStateException("corrupted PK: old page " + prevPageId + " for updated record at " + key + " was not found in table " + table.tablespace + "." + table.name);
}
} else {
previous = null;
}
} else {
/* We really need the page for update index old values */
prevPage = loadPageToMemory(prevPageId, false);
previous = prevPage.get(key);
if (previous == null) {
throw new IllegalStateException("corrupted PK: old page " + prevPageId + " for updated record at " + key + " was not found in table" + table.tablespace + "." + table.name);
}
}
if (prevPage == null || prevPage.immutable) {
/* Unloaded or immutable, set it as dirty */
pageSet.setPageDirty(prevPageId, previous);
} else {
/* Mutable page, need to check if still modifiable or already unloaded */
final Lock lock = prevPage.pageLock.readLock();
lock.lock();
try {
if (prevPage.writable) {
/* We can try to modify the page directly */
insertedInSamePage = prevPage.put(record);
} else {
/* Unfortunately is not writable (anymore), set it as dirty */
pageSet.setPageDirty(prevPageId, previous);
}
} finally {
lock.unlock();
}
}
/* Insertion page */
Long insertionPageId;
if (insertedInSamePage) {
/* Inserted in temporary mutable previous page, no need to alter keyToPage too: no record page change */
insertionPageId = prevPageId;
} else {
/* Do real insertion */
insertionPageId = currentDirtyRecordsPage.get();
while (true) {
final DataPage newPage = newPages.get(insertionPageId);
if (newPage != null) {
pageReplacementPolicy.pageHit(newPage);
/* The temporary memory page could have been unloaded and loaded again in meantime */
if (!newPage.immutable) {
/* Mutable page, need to check if still modifiable or already unloaded */
final Lock lock = newPage.pageLock.readLock();
lock.lock();
try {
if (newPage.writable) {
/* We can try to modify the page directly */
if (newPage.put(record)) {
break;
}
}
} finally {
lock.unlock();
}
}
}
/* Try allocate a new page if no already done */
insertionPageId = allocateLivePage(insertionPageId);
}
/* Update the value on keyToPage */
keyToPage.put(key, insertionPageId);
}
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "Updated key " + key + " from page " + prevPageId + " to page " + insertionPageId + " on table " + table.tablespace + "." + table.name);
}
if (indexes != null) {
/* If there are indexes e have already forced a page load and previous record has been loaded */
DataAccessor prevValues = previous.getDataAccessor(table);
DataAccessor newValues = record.getDataAccessor(table);
for (AbstractIndexManager index : indexes.values()) {
Index indexDef = index.getIndex();
String[] indexColumnNames = index.getColumnNames();
Bytes indexKeyRemoved = RecordSerializer.serializeIndexKey(prevValues, indexDef, indexColumnNames);
Bytes indexKeyAdded = RecordSerializer.serializeIndexKey(newValues, indexDef, indexColumnNames);
index.recordUpdated(key, indexKeyRemoved, indexKeyAdded);
}
}
}
use of herddb.model.Index in project herddb by diennea.
the class TableManager method executeDeleteAsync.
private CompletableFuture<StatementExecutionResult> executeDeleteAsync(DeleteStatement delete, Transaction transaction, StatementEvaluationContext context) {
AtomicInteger updateCount = new AtomicInteger();
Holder<Bytes> lastKey = new Holder<>();
Holder<Bytes> lastValue = new Holder<>();
long transactionId = transaction != null ? transaction.transactionId : 0;
Predicate predicate = delete.getPredicate();
List<CompletableFuture<PendingLogEntryWork>> writes = new ArrayList<>();
Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
ScanStatement scan = new ScanStatement(table.tablespace, table, predicate);
try {
accessTableData(scan, context, new ScanResultOperation() {
@Override
public void accept(Record current, LockHandle lockHandle) throws StatementExecutionException, LogNotAvailableException, DataStorageManagerException {
// ensure we are holding the write locks on every unique index
List<UniqueIndexLockReference> uniqueIndexes = null;
try {
if (indexes != null || childrenTables != null) {
DataAccessor dataAccessor = current.getDataAccessor(table);
if (childrenTables != null) {
for (Table childTable : childrenTables) {
executeForeignKeyConstraintsAsParentTable(childTable, dataAccessor, context, transaction, true);
}
}
if (indexes != null) {
for (AbstractIndexManager index : indexes.values()) {
if (index.isUnique()) {
Bytes indexKey = RecordSerializer.serializeIndexKey(dataAccessor, index.getIndex(), index.getColumnNames());
if (uniqueIndexes == null) {
uniqueIndexes = new ArrayList<>(1);
}
UniqueIndexLockReference uniqueIndexLock = new UniqueIndexLockReference(index, indexKey);
uniqueIndexes.add(uniqueIndexLock);
LockHandle lockForIndex = lockForWrite(uniqueIndexLock.key, transaction, index.getIndexName(), index.getLockManager());
if (transaction == null) {
uniqueIndexLock.lockHandle = lockForIndex;
}
}
}
}
}
} catch (IllegalArgumentException | herddb.utils.IllegalDataAccessException | StatementExecutionException err) {
locksManager.releaseLock(lockHandle);
StatementExecutionException finalError;
if (!(err instanceof StatementExecutionException)) {
finalError = new StatementExecutionException(err.getMessage(), err);
} else {
finalError = (StatementExecutionException) err;
}
CompletableFuture<PendingLogEntryWork> res = Futures.exception(finalError);
if (uniqueIndexes != null) {
for (UniqueIndexLockReference lock : uniqueIndexes) {
res = releaseWriteLock(res, lockHandle, lock.indexManager.getLockManager());
}
}
writes.add(res);
return;
}
LogEntry entry = LogEntryFactory.delete(table, current.key, transaction);
CommitLogResult pos = log.log(entry, entry.transactionId <= 0);
final List<UniqueIndexLockReference> _uniqueIndexes = uniqueIndexes;
writes.add(pos.logSequenceNumber.thenApply(lsn -> new PendingLogEntryWork(entry, pos, lockHandle, _uniqueIndexes)));
lastKey.value = current.key;
lastValue.value = current.value;
updateCount.incrementAndGet();
}
}, transaction, true, true);
} catch (HerdDBInternalException err) {
LOGGER.log(Level.SEVERE, "bad error during a delete", err);
return Futures.exception(err);
}
if (writes.isEmpty()) {
return CompletableFuture.completedFuture(new DMLStatementExecutionResult(transactionId, 0, null, null));
}
if (writes.size() == 1) {
return writes.get(0).whenCompleteAsync((pending, error) -> {
try {
// apply any of the DML operations
if (error == null) {
apply(pending.pos, pending.entry, false);
}
} finally {
releaseMultiplePendingLogEntryWorks(writes);
}
}, tableSpaceManager.getCallbacksExecutor()).thenApply((pending) -> {
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, delete.isReturnValues() ? lastValue.value : null);
});
} else {
return Futures.collect(writes).whenCompleteAsync((pendings, error) -> {
try {
// apply any of the DML operations
if (error == null) {
for (PendingLogEntryWork pending : pendings) {
apply(pending.pos, pending.entry, false);
}
}
} finally {
releaseMultiplePendingLogEntryWorks(writes);
}
}, tableSpaceManager.getCallbacksExecutor()).thenApply((pendings) -> {
return new DMLStatementExecutionResult(transactionId, updateCount.get(), lastKey.value, delete.isReturnValues() ? lastValue.value : null);
});
}
}
use of herddb.model.Index in project herddb by diennea.
the class TableSpaceManager method apply.
void apply(CommitLogResult position, LogEntry entry, boolean recovery) throws DataStorageManagerException, DDLException {
if (!position.deferred || position.sync) {
// this will wait for the write to be acknowledged by the log
// it can throw LogNotAvailableException
this.actualLogSequenceNumber = position.getLogSequenceNumber();
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position.getLogSequenceNumber(), entry });
}
} else {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position, entry });
}
}
switch(entry.type) {
case LogEntryType.NOOP:
{
// NOOP
}
break;
case LogEntryType.BEGINTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = new Transaction(id, tableSpaceName, position);
transactions.put(id, transaction);
}
break;
case LogEntryType.ROLLBACKTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
if (transaction == null) {
throw new DataStorageManagerException("invalid transaction id " + id + ", only " + transactions.keySet());
}
List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
for (AbstractIndexManager indexManager : indexManagers) {
if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
indexManager.onTransactionRollback(transaction);
}
}
List<AbstractTableManager> managers = new ArrayList<>(tables.values());
for (AbstractTableManager manager : managers) {
if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
Table table = manager.getTable();
if (transaction.isNewTable(table.name)) {
LOGGER.log(Level.INFO, "rollback CREATE TABLE " + table.tablespace + "." + table.name);
disposeTable(manager);
Map<String, AbstractIndexManager> indexes = indexesByTable.remove(manager.getTable().name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
disposeIndexManager(indexManager);
}
}
} else {
manager.onTransactionRollback(transaction);
}
}
}
transactions.remove(transaction.transactionId);
}
break;
case LogEntryType.COMMITTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
if (transaction == null) {
throw new DataStorageManagerException("invalid transaction id " + id);
}
LogSequenceNumber commit = position.getLogSequenceNumber();
transaction.sync(commit);
List<AbstractTableManager> managers = new ArrayList<>(tables.values());
for (AbstractTableManager manager : managers) {
if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
manager.onTransactionCommit(transaction, recovery);
}
}
List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
for (AbstractIndexManager indexManager : indexManagers) {
if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
indexManager.onTransactionCommit(transaction, recovery);
}
}
if ((transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
if (transaction.droppedTables != null) {
for (String dropped : transaction.droppedTables) {
for (AbstractTableManager manager : managers) {
if (manager.getTable().name.equals(dropped)) {
disposeTable(manager);
}
}
}
}
if (transaction.droppedIndexes != null) {
for (String dropped : transaction.droppedIndexes) {
for (AbstractIndexManager manager : indexManagers) {
if (manager.getIndex().name.equals(dropped)) {
disposeIndexManager(manager);
}
}
}
}
}
if ((transaction.newTables != null && !transaction.newTables.isEmpty()) || (transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.newIndexes != null && !transaction.newIndexes.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
writeTablesOnDataStorageManager(position, false);
dbmanager.getPlanner().clearCache();
}
transactions.remove(transaction.transactionId);
}
break;
case LogEntryType.CREATE_TABLE:
{
Table table = Table.deserialize(entry.value.to_array());
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerNewTable(table, position);
}
bootTable(table, entry.transactionId, null, true);
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.CREATE_INDEX:
{
Index index = Index.deserialize(entry.value.to_array());
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerNewIndex(index, position);
}
AbstractTableManager tableManager = tables.get(index.table);
if (tableManager == null) {
throw new RuntimeException("table " + index.table + " does not exists");
}
bootIndex(index, tableManager, true, entry.transactionId, true, false);
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.DROP_TABLE:
{
String tableName = entry.tableName;
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerDropTable(tableName, position);
} else {
AbstractTableManager manager = tables.get(tableName);
if (manager != null) {
disposeTable(manager);
Map<String, AbstractIndexManager> indexes = indexesByTable.get(tableName);
if (indexes != null && !indexes.isEmpty()) {
LOGGER.log(Level.SEVERE, "It looks like we are dropping a table " + tableName + " with these indexes " + indexes);
}
}
}
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.DROP_INDEX:
{
String indexName = entry.value.to_string();
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerDropIndex(indexName, position);
} else {
AbstractIndexManager manager = indexes.get(indexName);
if (manager != null) {
disposeIndexManager(manager);
}
}
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
dbmanager.getPlanner().clearCache();
}
}
break;
case LogEntryType.ALTER_TABLE:
{
Table table = Table.deserialize(entry.value.to_array());
alterTable(table, null);
writeTablesOnDataStorageManager(position, false);
}
break;
case LogEntryType.TABLE_CONSISTENCY_CHECK:
{
/*
In recovery mode, we need to skip the consistency check.
The tablespace may not be avaible yet and therefore calcite will not able to performed the select query.
*/
if (recovery) {
LOGGER.log(Level.INFO, "skip {0} consistency check LogEntry {1}", new Object[] { tableSpaceName, entry });
break;
}
try {
TableChecksum check = MAPPER.readValue(entry.value.to_array(), TableChecksum.class);
String tableSpace = check.getTableSpaceName();
String query = check.getQuery();
String tableName = entry.tableName;
// In the entry type = 14, the follower will have to run the query on the transaction log
if (!isLeader()) {
AbstractTableManager tablemanager = this.getTableManager(tableName);
DBManager manager = this.getDbmanager();
if (tablemanager == null || tablemanager.getCreatedInTransaction() > 0) {
throw new TableDoesNotExistException(String.format("Table %s does not exist.", tablemanager));
}
/*
scan = true
allowCache = false
returnValues = false
maxRows = -1
*/
TranslatedQuery translated = manager.getPlanner().translate(tableSpace, query, Collections.emptyList(), true, false, false, -1);
TableChecksum scanResult = TableDataChecksum.createChecksum(manager, translated, this, tableSpace, tableName);
long followerDigest = scanResult.getDigest();
long leaderDigest = check.getDigest();
long leaderNumRecords = check.getNumRecords();
long followerNumRecords = scanResult.getNumRecords();
// the necessary condition to pass the check is to have exactly the same digest and the number of records processed
if (followerDigest == leaderDigest && leaderNumRecords == followerNumRecords) {
LOGGER.log(Level.INFO, "Data consistency check PASS for table {0} tablespace {1} with Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
} else {
LOGGER.log(Level.SEVERE, "Data consistency check FAILED for table {0} in tablespace {1} with Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
}
} else {
long digest = check.getDigest();
LOGGER.log(Level.INFO, "Created checksum {0} for table {1} in tablespace {2} on node {3}", new Object[] { digest, entry.tableName, tableSpace, this.getDbmanager().getNodeId() });
}
} catch (IOException | DataScannerException ex) {
LOGGER.log(Level.SEVERE, "Error during table consistency check ", ex);
}
}
break;
default:
// other entry types are not important for the tablespacemanager
break;
}
if (entry.tableName != null && entry.type != LogEntryType.CREATE_TABLE && entry.type != LogEntryType.CREATE_INDEX && entry.type != LogEntryType.ALTER_TABLE && entry.type != LogEntryType.DROP_TABLE && entry.type != LogEntryType.TABLE_CONSISTENCY_CHECK) {
AbstractTableManager tableManager = tables.get(entry.tableName);
tableManager.apply(position, entry, recovery);
}
}
use of herddb.model.Index in project herddb by diennea.
the class TableSpaceManager method recover.
void recover(TableSpace tableSpaceInfo) throws DataStorageManagerException, LogNotAvailableException, MetadataStorageManagerException {
if (recoveryInProgress) {
throw new HerdDBInternalException("Cannot run recovery twice");
}
recoveryInProgress = true;
LogSequenceNumber logSequenceNumber = dataStorageManager.getLastcheckpointSequenceNumber(tableSpaceUUID);
actualLogSequenceNumber = logSequenceNumber;
LOGGER.log(Level.INFO, "{0} recover {1}, logSequenceNumber from DataStorage: {2}", new Object[] { nodeId, tableSpaceName, logSequenceNumber });
List<Table> tablesAtBoot = dataStorageManager.loadTables(logSequenceNumber, tableSpaceUUID);
List<Index> indexesAtBoot = dataStorageManager.loadIndexes(logSequenceNumber, tableSpaceUUID);
String tableNames = tablesAtBoot.stream().map(t -> {
return t.name;
}).collect(Collectors.joining(","));
String indexNames = indexesAtBoot.stream().map(t -> {
return t.name + " on table " + t.table;
}).collect(Collectors.joining(","));
if (!tableNames.isEmpty()) {
LOGGER.log(Level.INFO, "{0} {1} tablesAtBoot: {2}, indexesAtBoot: {3}", new Object[] { nodeId, tableSpaceName, tableNames, indexNames });
}
for (Table table : tablesAtBoot) {
TableManager tableManager = bootTable(table, 0, null, false);
for (Index index : indexesAtBoot) {
if (index.table.equals(table.name)) {
bootIndex(index, tableManager, false, 0, false, false);
}
}
}
dataStorageManager.loadTransactions(logSequenceNumber, tableSpaceUUID, t -> {
transactions.put(t.transactionId, t);
LOGGER.log(Level.FINER, "{0} {1} tx {2} at boot lsn {3}", new Object[] { nodeId, tableSpaceName, t.transactionId, t.lastSequenceNumber });
try {
if (t.newTables != null) {
for (Table table : t.newTables.values()) {
if (!tables.containsKey(table.name)) {
bootTable(table, t.transactionId, null, false);
}
}
}
if (t.newIndexes != null) {
for (Index index : t.newIndexes.values()) {
if (!indexes.containsKey(index.name)) {
AbstractTableManager tableManager = tables.get(index.table);
bootIndex(index, tableManager, false, t.transactionId, false, false);
}
}
}
} catch (Exception err) {
LOGGER.log(Level.SEVERE, "error while booting tmp tables " + err, err);
throw new RuntimeException(err);
}
});
if (LogSequenceNumber.START_OF_TIME.equals(logSequenceNumber) && dbmanager.getServerConfiguration().getBoolean(ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT, ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT_DEFAULT)) {
LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is forced (" + ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT + "=true) for tableSpace " + tableSpaceName);
downloadTableSpaceData();
log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
} else {
try {
log.recovery(logSequenceNumber, new ApplyEntryOnRecovery(), false);
} catch (FullRecoveryNeededException fullRecoveryNeeded) {
LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is needed for tableSpace " + tableSpaceName, fullRecoveryNeeded);
downloadTableSpaceData();
log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
}
}
recoveryInProgress = false;
if (!LogSequenceNumber.START_OF_TIME.equals(actualLogSequenceNumber)) {
LOGGER.log(Level.INFO, "Recovery finished for {0} seqNum {1}", new Object[] { tableSpaceName, actualLogSequenceNumber });
checkpoint(false, false, false);
}
}
use of herddb.model.Index in project herddb by diennea.
the class ReplicaFullTableDataDumpReceiver method beginTable.
@Override
public void beginTable(DumpedTableMetadata dumpedTable, Map<String, Object> stats) throws DataStorageManagerException {
Table table = dumpedTable.table;
LOGGER.log(Level.INFO, "dumpReceiver " + tableSpaceName + ", beginTable " + table.name + ", stats:" + stats + ", dumped at " + dumpedTable.logSequenceNumber + " (general dump at " + logSequenceNumber + ")");
currentTable = tableSpaceManager.bootTable(table, 0, dumpedTable.logSequenceNumber, false);
for (Index index : dumpedTable.indexes) {
tableSpaceManager.bootIndex(index, currentTable, false, 0, false, true);
}
}
Aggregations