use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointStatus in project ignite by apache.
the class GridCacheDatabaseSharedManager method finishRecovery.
/**
* Restores last valid WAL pointer and resumes logging from that pointer.
* Re-creates metastorage if needed.
*
* @throws IgniteCheckedException If failed.
*/
private void finishRecovery() throws IgniteCheckedException {
assert !cctx.kernalContext().clientNode();
long time = System.currentTimeMillis();
CHECKPOINT_LOCK_HOLD_COUNT.set(CHECKPOINT_LOCK_HOLD_COUNT.get() + 1);
try {
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.beforeResumeWalLogging(this);
// Try to resume logging since last finished checkpoint if possible.
if (walTail == null) {
CheckpointStatus status = readCheckpointStatus();
walTail = CheckpointStatus.NULL_PTR.equals(status.endPtr) ? null : status.endPtr;
}
resumeWalLogging();
walTail = null;
// Recreate metastorage to refresh page memory state after deactivation.
if (metaStorage == null)
metaStorage = createMetastorage(false);
notifyMetastorageReadyForReadWrite();
U.log(log, "Finish recovery performed in " + (System.currentTimeMillis() - time) + " ms.");
} catch (IgniteCheckedException e) {
if (X.hasCause(e, StorageException.class, IOException.class))
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, e));
throw e;
} finally {
CHECKPOINT_LOCK_HOLD_COUNT.set(CHECKPOINT_LOCK_HOLD_COUNT.get() - 1);
}
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointStatus in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointStatus in project ignite by apache.
the class GridCacheDatabaseSharedManager method startMemoryRestore.
/**
* {@inheritDoc}
*/
@Override
public void startMemoryRestore(GridKernalContext kctx, TimeBag startTimer) throws IgniteCheckedException {
if (kctx.clientNode())
return;
MaintenanceRegistry mntcRegistry = kctx.maintenanceRegistry();
MaintenanceTask mntcTask = mntcRegistry.activeMaintenanceTask(CORRUPTED_DATA_FILES_MNTC_TASK_NAME);
if (mntcTask != null) {
log.warning("Maintenance task found, stop restoring memory");
File workDir = ((FilePageStoreManager) cctx.pageStore()).workDir();
mntcRegistry.registerWorkflowCallback(CORRUPTED_DATA_FILES_MNTC_TASK_NAME, new CorruptedPdsMaintenanceCallback(workDir, Arrays.asList(mntcTask.parameters().split(Pattern.quote(File.separator)))));
return;
}
checkpointReadLock();
RestoreLogicalState logicalState;
try {
// Preform early regions startup before restoring state.
initAndStartRegions(kctx.config().getDataStorageConfiguration());
startTimer.finishGlobalStage("Init and start regions");
// Restore binary memory for all not WAL disabled cache groups.
restoreBinaryMemory(groupsWithEnabledWal(), physicalRecords());
if (recoveryVerboseLogging && log.isInfoEnabled()) {
log.info("Partition states information after BINARY RECOVERY phase:");
dumpPartitionsInfo(cctx, log);
}
startTimer.finishGlobalStage("Restore binary memory");
CheckpointStatus status = readCheckpointStatus();
logicalState = applyLogicalUpdates(status, groupsWithEnabledWal(), logicalRecords(), false);
cctx.tm().clearUncommitedStates();
cctx.wal().startAutoReleaseSegments();
if (recoveryVerboseLogging && log.isInfoEnabled()) {
log.info("Partition states information after LOGICAL RECOVERY phase:");
dumpPartitionsInfo(cctx, log);
}
startTimer.finishGlobalStage("Restore logical state");
} catch (IgniteCheckedException e) {
releaseFileLock();
throw e;
} finally {
checkpointReadUnlock();
}
walTail = tailPointer(logicalState);
cctx.wal().onDeActivate(kctx);
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointStatus in project ignite by apache.
the class GridCacheDatabaseSharedManager method readMetastore.
/**
*/
private void readMetastore() throws IgniteCheckedException {
try {
CheckpointStatus status = readCheckpointStatus();
checkpointReadLock();
try {
dataRegion(METASTORE_DATA_REGION_NAME).pageMemory().start();
performBinaryMemoryRestore(status, onlyMetastorageGroup(), physicalRecords(), false);
metaStorage = createMetastorage(true);
applyLogicalUpdates(status, onlyMetastorageGroup(), onlyMetastorageAndEncryptionRecords(), true);
fillWalDisabledGroups();
checkpointManager.initializeStorage();
registerSystemView();
notifyMetastorageReadyForRead();
cctx.kernalContext().maintenanceRegistry().registerWorkflowCallbackIfTaskExists(DEFRAGMENTATION_MNTC_TASK_NAME, task -> {
prepareCacheDefragmentation(fromStore(task).cacheNames());
return new DefragmentationWorkflowCallback(cctx.kernalContext()::log, defrgMgr, cctx.kernalContext().failure());
});
} finally {
if (metaStorage != null)
metaStorage.close();
metaStorage = null;
dataRegion(METASTORE_DATA_REGION_NAME).pageMemory().stop(false);
cctx.pageStore().cleanupPageStoreIfMatch(new Predicate<Integer>() {
@Override
public boolean test(Integer grpId) {
return MetaStorage.METASTORAGE_CACHE_ID == grpId;
}
}, false);
checkpointReadUnlock();
}
} catch (StorageException e) {
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, e));
throw new IgniteCheckedException(e);
}
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointStatus in project ignite by apache.
the class GridCacheDatabaseSharedManager method restoreBinaryMemory.
/**
* @param cacheGroupsPredicate Cache groups to restore.
* @param recordTypePredicate Filter records by type.
* @return Last seen WAL pointer during binary memory recovery.
* @throws IgniteCheckedException If failed.
*/
private RestoreBinaryState restoreBinaryMemory(IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate) throws IgniteCheckedException {
long time = System.currentTimeMillis();
try {
if (log.isInfoEnabled())
log.info("Starting binary memory restore for: " + cctx.cache().cacheGroupDescriptors().keySet());
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.beforeBinaryMemoryRestore(this);
CheckpointStatus status = readCheckpointStatus();
// First, bring memory to the last consistent checkpoint state if needed.
// This method should return a pointer to the last valid record in the WAL.
RestoreBinaryState binaryState = performBinaryMemoryRestore(status, cacheGroupsPredicate, recordTypePredicate, true);
WALPointer restored = binaryState.lastReadRecordPointer();
if (restored.equals(CheckpointStatus.NULL_PTR))
// This record is first
restored = null;
else
restored = restored.next();
if (restored == null && !status.endPtr.equals(CheckpointStatus.NULL_PTR)) {
throw new StorageException("The memory cannot be restored. The critical part of WAL archive is missing " + "[tailWalPtr=" + restored + ", endPtr=" + status.endPtr + ']');
} else if (restored != null)
U.log(log, "Binary memory state restored at node startup [restoredPtr=" + restored + ']');
// Wal logging is now available.
cctx.wal().resumeLogging(restored);
// Log MemoryRecoveryRecord to make sure that old physical records are not replayed during
// next physical recovery.
checkpointManager.memoryRecoveryRecordPtr(cctx.wal().log(new MemoryRecoveryRecord(U.currentTimeMillis())));
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterBinaryMemoryRestore(this, binaryState);
if (log.isInfoEnabled())
log.info("Binary recovery performed in " + (System.currentTimeMillis() - time) + " ms.");
return binaryState;
} catch (IgniteCheckedException e) {
if (X.hasCause(e, StorageException.class, IOException.class))
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, e));
throw e;
}
}
Aggregations