use of herddb.log.LogSequenceNumber in project herddb by diennea.
the class TableManager method checkpoint.
/**
* @param sequenceNumber
* @param dirtyThreshold
* @param fillThreshold
* @param checkpointTargetTime checkpoint target max milliseconds
* @param cleanupTargetTime cleanup target max milliseconds
* @param compactionTargetTime compaction target max milliseconds
* @return
* @throws DataStorageManagerException
*/
private TableCheckpoint checkpoint(double dirtyThreshold, double fillThreshold, long checkpointTargetTime, long cleanupTargetTime, long compactionTargetTime, boolean pin) throws DataStorageManagerException {
LOGGER.log(Level.FINE, "tableCheckpoint dirtyThreshold: " + dirtyThreshold + ", {0}.{1} (pin: {2})", new Object[] { tableSpaceUUID, table.name, pin });
if (createdInTransaction > 0) {
LOGGER.log(Level.FINE, "checkpoint for table " + table.name + " skipped," + "this table is created on transaction " + createdInTransaction + " which is not committed");
return null;
}
final long fillPageThreshold = (long) (fillThreshold * maxLogicalPageSize);
final long dirtyPageThreshold = dirtyThreshold > 0 ? (long) (dirtyThreshold * maxLogicalPageSize) : -1;
long start = System.currentTimeMillis();
long end;
long getlock;
long pageAnalysis;
long dirtyPagesFlush;
long smallPagesFlush;
long newPagesFlush;
long keytopagecheckpoint;
long indexcheckpoint;
long tablecheckpoint;
final List<PostCheckpointAction> actions = new ArrayList<>();
TableCheckpoint result;
boolean lockAcquired;
try {
lockAcquired = checkpointLock.asWriteLock().tryLock(CHECKPOINT_LOCK_WRITE_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException err) {
throw new DataStorageManagerException("interrupted while waiting for checkpoint lock", err);
}
if (!lockAcquired) {
throw new DataStorageManagerException("timed out while waiting for checkpoint lock, write lock " + checkpointLock.writeLock());
}
try {
LogSequenceNumber sequenceNumber = log.getLastSequenceNumber();
getlock = System.currentTimeMillis();
checkPointRunning = true;
final long checkpointLimitInstant = sumOverflowWise(getlock, checkpointTargetTime);
final Map<Long, DataPageMetaData> activePages = pageSet.getActivePages();
long flushedRecords = 0;
List<CheckpointingPage> flushingDirtyPages = new ArrayList<>();
List<CheckpointingPage> flushingSmallPages = new ArrayList<>();
final Set<Long> flushedPages = new HashSet<>();
int flushedDirtyPages = 0;
int flushedSmallPages = 0;
for (Entry<Long, DataPageMetaData> ref : activePages.entrySet()) {
final Long pageId = ref.getKey();
final DataPageMetaData metadata = ref.getValue();
final long dirt = metadata.dirt.sum();
/* Check dirtiness (flush here even small pages if enough dirty) */
if (dirt > 0 && dirt >= dirtyPageThreshold) {
flushingDirtyPages.add(new CheckpointingPage(pageId, dirt, dirt > 0));
continue;
}
/* Check emptiness (with a really dirty check to avoid to rewrite an unfillable page) */
if (metadata.size <= fillPageThreshold && maxLogicalPageSize - metadata.avgRecordSize >= fillPageThreshold) {
flushingSmallPages.add(new CheckpointingPage(pageId, metadata.size, dirt > 0));
continue;
}
}
/* Clean dirtier first */
flushingDirtyPages.sort(CheckpointingPage.DESCENDING_ORDER);
/* Clean smaller first */
flushingSmallPages.sort(CheckpointingPage.ASCENDING_ORDER);
pageAnalysis = System.currentTimeMillis();
/* Should currently new rebuild page kept on memory or discarded? */
boolean keepFlushedPageInMemory = false;
/* New page actually rebuilt */
DataPage buildingPage = createMutablePage(nextPageId++, 0, 0);
if (!flushingDirtyPages.isEmpty()) {
final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(pageAnalysis, cleanupTargetTime));
/*
* Do not continue if we have used up all configured cleanup or checkpoint time (but still compact
* at least the smaller page (normally the leftover from last checkpoint)
*/
CleanAndCompactResult dirtyResult = cleanAndCompactPages(flushingDirtyPages, buildingPage, keepFlushedPageInMemory, timeLimit);
flushedDirtyPages = dirtyResult.flushedPages.size();
flushedPages.addAll(dirtyResult.flushedPages);
flushedRecords += dirtyResult.flushedRecords;
keepFlushedPageInMemory = dirtyResult.keepFlushedPageInMemory;
buildingPage = dirtyResult.buildingPage;
}
dirtyPagesFlush = System.currentTimeMillis();
/* **************************** */
/* *** Small pages handling *** */
/* **************************** */
/*
* Small pages could be dirty pages too so we need to check every page if has been already handled
* during dirty pages cleanup. Small pages should be a really small set (normally just last flushed
* page), the filter is then no critical or heavy to require some optimization
*/
/* Filter out dirty pages flushed from flushing small pages (a page could be "small" and "dirty") */
flushingSmallPages = flushingSmallPages.stream().filter(wp -> !flushedPages.contains(wp.pageId)).collect(Collectors.toList());
/*
* If there is only one clean small page without additional data to add rebuilding the page make no
* sense: is too probable to rebuild an identical page!
*/
if (/* Just one small page */
flushingSmallPages.size() == 1 && /* Not dirty */
!flushingSmallPages.get(0).dirty && /* No spare data remaining */
buildingPage.isEmpty() && /* No new data */
!newPages.values().stream().filter(p -> !p.isEmpty()).findAny().isPresent()) {
/* Avoid small page compaction */
flushingSmallPages.clear();
}
if (!flushingSmallPages.isEmpty()) {
final long timeLimit = Math.min(checkpointLimitInstant, sumOverflowWise(dirtyPagesFlush, compactionTargetTime));
/*
* Do not continue if we have used up all configured compaction or checkpoint time (but still
* compact at least the smaller page (normally the leftover from last checkpoint)
*/
CleanAndCompactResult smallResult = cleanAndCompactPages(flushingSmallPages, buildingPage, keepFlushedPageInMemory, timeLimit);
flushedSmallPages = smallResult.flushedPages.size();
flushedPages.addAll(smallResult.flushedPages);
flushedRecords += smallResult.flushedRecords;
keepFlushedPageInMemory = smallResult.keepFlushedPageInMemory;
buildingPage = smallResult.buildingPage;
}
smallPagesFlush = System.currentTimeMillis();
/* ************************** */
/* *** New pages handling *** */
/* ************************** */
/*
* Retrieve the "current" new page. It can be held in memory because no writes are executed during
* a checkpoint and thus the page cannot change (nor be flushed due to an unload because it isn't
* known to page replacement policy)
*/
final long lastKnownPageId = currentDirtyRecordsPage.get();
/*
* Flush dirty records (and remaining records from previous step).
*
* Any newpage remaining here is unflushed and is not set as dirty (if "dirty" were unloaded!).
* Just write the pages as they are.
*
* New empty pages won't be written
*/
long flushedNewPages = 0;
for (DataPage dataPage : newPages.values()) {
/* Flush every dirty page (but not the "current" dirty page if empty) */
if (lastKnownPageId != dataPage.pageId || !dataPage.isEmpty()) {
flushNewPageForCheckpoint(dataPage, buildingPage);
++flushedNewPages;
flushedRecords += dataPage.size();
}
}
/*
* Flush remaining records.
*
* To keep or not flushed page in memory is a "best guess" here: we don't known if records that
* needed to be kept in memory were already be flushed during newPage filling (see
* flushNewPageForCheckpoint). So we still use keepFlushedPageInMemory (possibily true) even if
* remaining records came from an old unused page.
*/
if (!buildingPage.isEmpty()) {
flushMutablePage(buildingPage, keepFlushedPageInMemory);
} else {
/* Remove unused empty building page from memory */
pages.remove(buildingPage.pageId);
}
/*
* Never Never Never revert unused nextPageId! Even if we didn't used booked nextPageId is better to
* throw it away, reverting generated id could be "strange" for now but simply wrong in the future
* (if checkpoint will permit concurrent page creation for example..)
*/
newPagesFlush = System.currentTimeMillis();
if (flushedDirtyPages > 0 || flushedSmallPages > 0 || flushedNewPages > 0 || flushedRecords > 0) {
LOGGER.log(Level.INFO, "checkpoint {0}, logpos {1}, flushed: {2} dirty pages, {3} small pages, {4} new pages, {5} records", new Object[] { table.name, sequenceNumber, flushedDirtyPages, flushedSmallPages, flushedNewPages, flushedRecords });
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0}, logpos {1}, flushed pages: {2}", new Object[] { table.name, sequenceNumber, flushedPages.toString() });
}
/* Checkpoint the key to page too */
actions.addAll(keyToPage.checkpoint(sequenceNumber, pin));
keytopagecheckpoint = System.currentTimeMillis();
/* Checkpoint secondary indexes too */
final Map<String, AbstractIndexManager> indexes = tableSpaceManager.getIndexesOnTable(table.name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
// Checkpoint at the same position of current TableManager
actions.addAll(indexManager.checkpoint(sequenceNumber, pin));
}
}
indexcheckpoint = System.currentTimeMillis();
pageSet.checkpointDone(flushedPages);
TableStatus tableStatus = new TableStatus(table.name, sequenceNumber, Bytes.longToByteArray(nextPrimaryKeyValue.get()), nextPageId, pageSet.getActivePages());
actions.addAll(dataStorageManager.tableCheckpoint(tableSpaceUUID, table.uuid, tableStatus, pin));
tablecheckpoint = System.currentTimeMillis();
/*
* Can happen when at checkpoint start all pages are set as dirty or immutable (immutable or
* unloaded) due do a deletion: all pages will be removed and no page will remain alive.
*/
if (newPages.isEmpty()) {
/* Allocate live handles the correct policy load/unload of last dirty page */
allocateLivePage(lastKnownPageId);
}
checkPointRunning = false;
result = new TableCheckpoint(table.name, sequenceNumber, actions);
end = System.currentTimeMillis();
if (flushedRecords > 0) {
LOGGER.log(Level.INFO, "checkpoint {0} finished, logpos {1}, {2} active pages, {3} dirty pages, " + "flushed {4} records, total time {5} ms", new Object[] { table.name, sequenceNumber, pageSet.getActivePagesCount(), pageSet.getDirtyPagesCount(), flushedRecords, Long.toString(end - start) });
}
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "checkpoint {0} finished, logpos {1}, pageSet: {2}", new Object[] { table.name, sequenceNumber, pageSet.toString() });
}
} finally {
checkpointLock.asWriteLock().unlock();
}
long delta = end - start;
if (delta > 1000) {
long delta_lock = getlock - start;
long delta_pageAnalysis = pageAnalysis - getlock;
long delta_dirtyPagesFlush = dirtyPagesFlush - pageAnalysis;
long delta_smallPagesFlush = smallPagesFlush - dirtyPagesFlush;
long delta_newPagesFlush = newPagesFlush - smallPagesFlush;
long delta_keytopagecheckpoint = keytopagecheckpoint - newPagesFlush;
long delta_indexcheckpoint = indexcheckpoint - keytopagecheckpoint;
long delta_tablecheckpoint = tablecheckpoint - indexcheckpoint;
long delta_unload = end - tablecheckpoint;
LOGGER.log(Level.INFO, "long checkpoint for {0}, time {1}", new Object[] { table.name, delta + " ms (" + delta_lock + "+" + delta_pageAnalysis + "+" + delta_dirtyPagesFlush + "+" + delta_smallPagesFlush + "+" + delta_newPagesFlush + "+" + delta_keytopagecheckpoint + "+" + delta_indexcheckpoint + "+" + delta_tablecheckpoint + "+" + delta_unload + ")" });
}
return result;
}
use of herddb.log.LogSequenceNumber in project herddb by diennea.
the class TableManager method apply.
@Override
public void apply(CommitLogResult writeResult, LogEntry entry, boolean recovery) throws DataStorageManagerException, LogNotAvailableException {
if (recovery) {
if (writeResult.deferred) {
throw new DataStorageManagerException("impossibile to have a deferred CommitLogResult during recovery");
}
LogSequenceNumber position = writeResult.getLogSequenceNumber();
if (dumpLogSequenceNumber != null && !position.after(dumpLogSequenceNumber)) {
// in "restore mode" the 'position" parameter is from the 'old' transaction log
Transaction transaction = null;
if (entry.transactionId > 0) {
transaction = tableSpaceManager.getTransaction(entry.transactionId);
}
if (transaction != null) {
transaction.touch();
LOGGER.log(Level.FINER, "{0}.{1} keep {2} at {3}, table restored from position {4}, it belongs to transaction {5} which was in progress during the dump of the table", new Object[] { table.tablespace, table.name, entry, position, dumpLogSequenceNumber, entry.transactionId });
} else {
LOGGER.log(Level.FINER, "{0}.{1} skip {2} at {3}, table restored from position {4}", new Object[] { table.tablespace, table.name, entry, position, dumpLogSequenceNumber });
return;
}
} else if (!position.after(bootSequenceNumber)) {
// recovery mode
Transaction transaction = null;
if (entry.transactionId > 0) {
transaction = tableSpaceManager.getTransaction(entry.transactionId);
}
if (transaction != null) {
transaction.touch();
LOGGER.log(Level.FINER, "{0}.{1} keep {2} at {3}, table booted at {4}, it belongs to transaction {5} which was in progress during the flush of the table", new Object[] { table.tablespace, table.name, entry, position, bootSequenceNumber, entry.transactionId });
} else {
LOGGER.log(Level.FINER, "{0}.{1} skip {2} at {3}, table booted at {4}", new Object[] { table.tablespace, table.name, entry, position, bootSequenceNumber });
return;
}
}
}
if (writeResult.sync) {
// wait for data to be stored to log
writeResult.getLogSequenceNumber();
}
switch(entry.type) {
case LogEntryType.DELETE:
{
// remove the record from the set of existing records
Bytes key = entry.key;
if (entry.transactionId > 0) {
Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
if (transaction == null) {
/* Ignore missing transaction only if during recovery and ignore property is active */
if (recovery && ignoreMissingTransactionsOnRecovery) {
LOGGER.log(Level.WARNING, "Ignoring delete of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
} else {
throw new DataStorageManagerException("no such transaction " + entry.transactionId);
}
} else {
transaction.registerDeleteOnTable(this.table.name, key, writeResult);
}
} else {
applyDelete(key);
}
break;
}
case LogEntryType.UPDATE:
{
Bytes key = entry.key;
Bytes value = entry.value;
if (entry.transactionId > 0) {
Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
if (transaction == null) {
/* Ignore missing transaction only if during recovery and ignore property is active */
if (recovery && ignoreMissingTransactionsOnRecovery) {
LOGGER.log(Level.WARNING, "Ignoring update of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
} else {
throw new DataStorageManagerException("no such transaction " + entry.transactionId);
}
} else {
transaction.registerRecordUpdate(this.table.name, key, value, writeResult);
}
} else {
applyUpdate(key, value);
}
break;
}
case LogEntryType.INSERT:
{
Bytes key = entry.key;
Bytes value = entry.value;
if (entry.transactionId > 0) {
Transaction transaction = tableSpaceManager.getTransaction(entry.transactionId);
if (transaction == null) {
/* Ignore missing transaction only if during recovery and ignore property is active */
if (recovery && ignoreMissingTransactionsOnRecovery) {
LOGGER.log(Level.WARNING, "Ignoring insert of {0} due to missing transaction {1}", new Object[] { entry.key, entry.transactionId });
} else {
throw new DataStorageManagerException("no such transaction " + entry.transactionId);
}
} else {
transaction.registerInsertOnTable(table.name, key, value, writeResult);
}
} else {
applyInsert(key, value, false);
}
break;
}
case LogEntryType.TRUNCATE_TABLE:
{
applyTruncate();
}
break;
default:
throw new IllegalArgumentException("unhandled entry type " + entry.type);
}
}
use of herddb.log.LogSequenceNumber in project herddb by diennea.
the class TableSpaceManager method apply.
void apply(CommitLogResult position, LogEntry entry, boolean recovery) throws DataStorageManagerException, DDLException {
if (!position.deferred || position.sync) {
// this will wait for the write to be acknowledged by the log
// it can throw LogNotAvailableException
this.actualLogSequenceNumber = position.getLogSequenceNumber();
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position.getLogSequenceNumber(), entry });
}
} else {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.log(Level.FINEST, "apply {0} {1}", new Object[] { position, entry });
}
}
switch(entry.type) {
case LogEntryType.NOOP:
{
// NOOP
}
break;
case LogEntryType.BEGINTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = new Transaction(id, tableSpaceName, position);
transactions.put(id, transaction);
}
break;
case LogEntryType.ROLLBACKTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
if (transaction == null) {
throw new DataStorageManagerException("invalid transaction id " + id + ", only " + transactions.keySet());
}
List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
for (AbstractIndexManager indexManager : indexManagers) {
if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
indexManager.onTransactionRollback(transaction);
}
}
List<AbstractTableManager> managers = new ArrayList<>(tables.values());
for (AbstractTableManager manager : managers) {
if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
Table table = manager.getTable();
if (transaction.isNewTable(table.name)) {
LOGGER.log(Level.INFO, "rollback CREATE TABLE " + table.tablespace + "." + table.name);
disposeTable(manager);
Map<String, AbstractIndexManager> indexes = indexesByTable.remove(manager.getTable().name);
if (indexes != null) {
for (AbstractIndexManager indexManager : indexes.values()) {
disposeIndexManager(indexManager);
}
}
} else {
manager.onTransactionRollback(transaction);
}
}
}
transactions.remove(transaction.transactionId);
}
break;
case LogEntryType.COMMITTRANSACTION:
{
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
if (transaction == null) {
throw new DataStorageManagerException("invalid transaction id " + id);
}
LogSequenceNumber commit = position.getLogSequenceNumber();
transaction.sync(commit);
List<AbstractTableManager> managers = new ArrayList<>(tables.values());
for (AbstractTableManager manager : managers) {
if (manager.getCreatedInTransaction() == 0 || manager.getCreatedInTransaction() == id) {
manager.onTransactionCommit(transaction, recovery);
}
}
List<AbstractIndexManager> indexManagers = new ArrayList<>(indexes.values());
for (AbstractIndexManager indexManager : indexManagers) {
if (indexManager.getCreatedInTransaction() == 0 || indexManager.getCreatedInTransaction() == id) {
indexManager.onTransactionCommit(transaction, recovery);
}
}
if ((transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
if (transaction.droppedTables != null) {
for (String dropped : transaction.droppedTables) {
for (AbstractTableManager manager : managers) {
if (manager.getTable().name.equals(dropped)) {
disposeTable(manager);
}
}
}
}
if (transaction.droppedIndexes != null) {
for (String dropped : transaction.droppedIndexes) {
for (AbstractIndexManager manager : indexManagers) {
if (manager.getIndex().name.equals(dropped)) {
disposeIndexManager(manager);
}
}
}
}
}
if ((transaction.newTables != null && !transaction.newTables.isEmpty()) || (transaction.droppedTables != null && !transaction.droppedTables.isEmpty()) || (transaction.newIndexes != null && !transaction.newIndexes.isEmpty()) || (transaction.droppedIndexes != null && !transaction.droppedIndexes.isEmpty())) {
writeTablesOnDataStorageManager(position, false);
dbmanager.getPlanner().clearCache();
}
transactions.remove(transaction.transactionId);
}
break;
case LogEntryType.CREATE_TABLE:
{
Table table = Table.deserialize(entry.value.to_array());
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerNewTable(table, position);
}
bootTable(table, entry.transactionId, null, true);
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.CREATE_INDEX:
{
Index index = Index.deserialize(entry.value.to_array());
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerNewIndex(index, position);
}
AbstractTableManager tableManager = tables.get(index.table);
if (tableManager == null) {
throw new RuntimeException("table " + index.table + " does not exists");
}
bootIndex(index, tableManager, true, entry.transactionId, true, false);
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.DROP_TABLE:
{
String tableName = entry.tableName;
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerDropTable(tableName, position);
} else {
AbstractTableManager manager = tables.get(tableName);
if (manager != null) {
disposeTable(manager);
Map<String, AbstractIndexManager> indexes = indexesByTable.get(tableName);
if (indexes != null && !indexes.isEmpty()) {
LOGGER.log(Level.SEVERE, "It looks like we are dropping a table " + tableName + " with these indexes " + indexes);
}
}
}
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
}
}
break;
case LogEntryType.DROP_INDEX:
{
String indexName = entry.value.to_string();
if (entry.transactionId > 0) {
long id = entry.transactionId;
Transaction transaction = transactions.get(id);
transaction.registerDropIndex(indexName, position);
} else {
AbstractIndexManager manager = indexes.get(indexName);
if (manager != null) {
disposeIndexManager(manager);
}
}
if (entry.transactionId <= 0) {
writeTablesOnDataStorageManager(position, false);
dbmanager.getPlanner().clearCache();
}
}
break;
case LogEntryType.ALTER_TABLE:
{
Table table = Table.deserialize(entry.value.to_array());
alterTable(table, null);
writeTablesOnDataStorageManager(position, false);
}
break;
case LogEntryType.TABLE_CONSISTENCY_CHECK:
{
/*
In recovery mode, we need to skip the consistency check.
The tablespace may not be avaible yet and therefore calcite will not able to performed the select query.
*/
if (recovery) {
LOGGER.log(Level.INFO, "skip {0} consistency check LogEntry {1}", new Object[] { tableSpaceName, entry });
break;
}
try {
TableChecksum check = MAPPER.readValue(entry.value.to_array(), TableChecksum.class);
String tableSpace = check.getTableSpaceName();
String query = check.getQuery();
String tableName = entry.tableName;
// In the entry type = 14, the follower will have to run the query on the transaction log
if (!isLeader()) {
AbstractTableManager tablemanager = this.getTableManager(tableName);
DBManager manager = this.getDbmanager();
if (tablemanager == null || tablemanager.getCreatedInTransaction() > 0) {
throw new TableDoesNotExistException(String.format("Table %s does not exist.", tablemanager));
}
/*
scan = true
allowCache = false
returnValues = false
maxRows = -1
*/
TranslatedQuery translated = manager.getPlanner().translate(tableSpace, query, Collections.emptyList(), true, false, false, -1);
TableChecksum scanResult = TableDataChecksum.createChecksum(manager, translated, this, tableSpace, tableName);
long followerDigest = scanResult.getDigest();
long leaderDigest = check.getDigest();
long leaderNumRecords = check.getNumRecords();
long followerNumRecords = scanResult.getNumRecords();
// the necessary condition to pass the check is to have exactly the same digest and the number of records processed
if (followerDigest == leaderDigest && leaderNumRecords == followerNumRecords) {
LOGGER.log(Level.INFO, "Data consistency check PASS for table {0} tablespace {1} with Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
} else {
LOGGER.log(Level.SEVERE, "Data consistency check FAILED for table {0} in tablespace {1} with Checksum {2}", new Object[] { tableName, tableSpace, followerDigest });
}
} else {
long digest = check.getDigest();
LOGGER.log(Level.INFO, "Created checksum {0} for table {1} in tablespace {2} on node {3}", new Object[] { digest, entry.tableName, tableSpace, this.getDbmanager().getNodeId() });
}
} catch (IOException | DataScannerException ex) {
LOGGER.log(Level.SEVERE, "Error during table consistency check ", ex);
}
}
break;
default:
// other entry types are not important for the tablespacemanager
break;
}
if (entry.tableName != null && entry.type != LogEntryType.CREATE_TABLE && entry.type != LogEntryType.CREATE_INDEX && entry.type != LogEntryType.ALTER_TABLE && entry.type != LogEntryType.DROP_TABLE && entry.type != LogEntryType.TABLE_CONSISTENCY_CHECK) {
AbstractTableManager tableManager = tables.get(entry.tableName);
tableManager.apply(position, entry, recovery);
}
}
use of herddb.log.LogSequenceNumber in project herddb by diennea.
the class TableSpaceManager method recover.
void recover(TableSpace tableSpaceInfo) throws DataStorageManagerException, LogNotAvailableException, MetadataStorageManagerException {
if (recoveryInProgress) {
throw new HerdDBInternalException("Cannot run recovery twice");
}
recoveryInProgress = true;
LogSequenceNumber logSequenceNumber = dataStorageManager.getLastcheckpointSequenceNumber(tableSpaceUUID);
actualLogSequenceNumber = logSequenceNumber;
LOGGER.log(Level.INFO, "{0} recover {1}, logSequenceNumber from DataStorage: {2}", new Object[] { nodeId, tableSpaceName, logSequenceNumber });
List<Table> tablesAtBoot = dataStorageManager.loadTables(logSequenceNumber, tableSpaceUUID);
List<Index> indexesAtBoot = dataStorageManager.loadIndexes(logSequenceNumber, tableSpaceUUID);
String tableNames = tablesAtBoot.stream().map(t -> {
return t.name;
}).collect(Collectors.joining(","));
String indexNames = indexesAtBoot.stream().map(t -> {
return t.name + " on table " + t.table;
}).collect(Collectors.joining(","));
if (!tableNames.isEmpty()) {
LOGGER.log(Level.INFO, "{0} {1} tablesAtBoot: {2}, indexesAtBoot: {3}", new Object[] { nodeId, tableSpaceName, tableNames, indexNames });
}
for (Table table : tablesAtBoot) {
TableManager tableManager = bootTable(table, 0, null, false);
for (Index index : indexesAtBoot) {
if (index.table.equals(table.name)) {
bootIndex(index, tableManager, false, 0, false, false);
}
}
}
dataStorageManager.loadTransactions(logSequenceNumber, tableSpaceUUID, t -> {
transactions.put(t.transactionId, t);
LOGGER.log(Level.FINER, "{0} {1} tx {2} at boot lsn {3}", new Object[] { nodeId, tableSpaceName, t.transactionId, t.lastSequenceNumber });
try {
if (t.newTables != null) {
for (Table table : t.newTables.values()) {
if (!tables.containsKey(table.name)) {
bootTable(table, t.transactionId, null, false);
}
}
}
if (t.newIndexes != null) {
for (Index index : t.newIndexes.values()) {
if (!indexes.containsKey(index.name)) {
AbstractTableManager tableManager = tables.get(index.table);
bootIndex(index, tableManager, false, t.transactionId, false, false);
}
}
}
} catch (Exception err) {
LOGGER.log(Level.SEVERE, "error while booting tmp tables " + err, err);
throw new RuntimeException(err);
}
});
if (LogSequenceNumber.START_OF_TIME.equals(logSequenceNumber) && dbmanager.getServerConfiguration().getBoolean(ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT, ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT_DEFAULT)) {
LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is forced (" + ServerConfiguration.PROPERTY_BOOT_FORCE_DOWNLOAD_SNAPSHOT + "=true) for tableSpace " + tableSpaceName);
downloadTableSpaceData();
log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
} else {
try {
log.recovery(logSequenceNumber, new ApplyEntryOnRecovery(), false);
} catch (FullRecoveryNeededException fullRecoveryNeeded) {
LOGGER.log(Level.SEVERE, nodeId + " full recovery of data is needed for tableSpace " + tableSpaceName, fullRecoveryNeeded);
downloadTableSpaceData();
log.recovery(actualLogSequenceNumber, new ApplyEntryOnRecovery(), false);
}
}
recoveryInProgress = false;
if (!LogSequenceNumber.START_OF_TIME.equals(actualLogSequenceNumber)) {
LOGGER.log(Level.INFO, "Recovery finished for {0} seqNum {1}", new Object[] { tableSpaceName, actualLogSequenceNumber });
checkpoint(false, false, false);
}
}
use of herddb.log.LogSequenceNumber in project herddb by diennea.
the class BookKeeperDataStorageManager method writeTransactionsAtCheckpoint.
@Override
public Collection<PostCheckpointAction> writeTransactionsAtCheckpoint(String tableSpace, LogSequenceNumber sequenceNumber, Collection<Transaction> transactions) throws DataStorageManagerException {
if (sequenceNumber.isStartOfTime() && !transactions.isEmpty()) {
throw new DataStorageManagerException("impossible to write a non empty transactions list at start-of-time");
}
String checkPointFile = getTablespaceTransactionsFile(tableSpace, sequenceNumber);
LOGGER.log(Level.FINE, "writeTransactionsAtCheckpoint for tableSpace {0} sequenceNumber {1} to {2}, active transactions {3}", new Object[] { tableSpace, sequenceNumber, checkPointFile, transactions.size() });
try (VisibleByteArrayOutputStream buffer = new VisibleByteArrayOutputStream();
ExtendedDataOutputStream dout = new ExtendedDataOutputStream(buffer)) {
// version
dout.writeVLong(1);
// flags for future implementations
dout.writeVLong(0);
dout.writeUTF(tableSpace);
dout.writeZLong(sequenceNumber.ledgerId);
dout.writeZLong(sequenceNumber.offset);
dout.writeInt(transactions.size());
for (Transaction t : transactions) {
t.serialize(dout);
}
dout.flush();
writeZNodeEnforceOwnership(tableSpace, checkPointFile, buffer.toByteArray(), null);
} catch (IOException err) {
throw new DataStorageManagerException(err);
}
Collection<PostCheckpointAction> result = new ArrayList<>();
String tableSpaceDirectory = getTableSpaceZNode(tableSpace);
List<String> stream = zkGetChildren(tableSpaceDirectory);
for (String p : stream) {
if (isTransactionsFile(p)) {
try {
byte[] content = readZNode(checkPointFile, new Stat());
if (content != null) {
LogSequenceNumber logPositionInFile = readLogSequenceNumberFromTransactionsFile(tableSpace, content, p);
if (sequenceNumber.after(logPositionInFile)) {
LOGGER.log(Level.FINEST, "transactions metadata file " + p + ". will be deleted after checkpoint end");
result.add(new DeleteZNodeAction(tableSpace, "transactions", "delete transactions file " + p, p));
}
}
} catch (DataStorageManagerException ignore) {
LOGGER.log(Level.SEVERE, "Unparsable transactions file " + p, ignore);
result.add(new DeleteZNodeAction(tableSpace, "transactions", "delete unparsable transactions file " + p, p));
}
}
}
return result;
}
Aggregations