use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class CheckpointWorkflow method markCheckpointBegin.
/**
* First stage of checkpoint which collects demanded information(dirty pages mostly).
*
* @param cpTs Checkpoint start timestamp.
* @param curr Current checkpoint event info.
* @param tracker Checkpoint metrics tracker.
* @param workProgressDispatcher Work progress dispatcher.
* @return Checkpoint collected info.
* @throws IgniteCheckedException if fail.
*/
public Checkpoint markCheckpointBegin(long cpTs, CheckpointProgressImpl curr, CheckpointMetricsTracker tracker, WorkProgressDispatcher workProgressDispatcher) throws IgniteCheckedException {
Collection<DataRegion> checkpointedRegions = dataRegions.get();
List<CheckpointListener> dbLsnrs = getRelevantCheckpointListeners(checkpointedRegions);
CheckpointRecord cpRec = new CheckpointRecord(memoryRecoveryRecordPtr);
memoryRecoveryRecordPtr = null;
IgniteFuture snapFut = null;
CheckpointPagesInfoHolder cpPagesHolder;
int dirtyPagesCount;
boolean hasPartitionsToDestroy;
WALPointer cpPtr = null;
CheckpointContextImpl ctx0 = new CheckpointContextImpl(curr, new PartitionAllocationMap(), checkpointCollectPagesInfoPool, workProgressDispatcher);
checkpointReadWriteLock.readLock();
try {
for (CheckpointListener lsnr : dbLsnrs) lsnr.beforeCheckpointBegin(ctx0);
ctx0.awaitPendingTasksFinished();
} finally {
checkpointReadWriteLock.readUnlock();
}
tracker.onLockWaitStart();
checkpointReadWriteLock.writeLock();
try {
curr.transitTo(LOCK_TAKEN);
tracker.onMarkStart();
// Listeners must be invoked before we write checkpoint record to WAL.
for (CheckpointListener lsnr : dbLsnrs) lsnr.onMarkCheckpointBegin(ctx0);
ctx0.awaitPendingTasksFinished();
tracker.onListenersExecuteEnd();
if (curr.nextSnapshot())
snapFut = snapshotMgr.onMarkCheckPointBegin(curr.snapshotOperation(), ctx0.partitionStatMap());
fillCacheGroupState(cpRec);
// There are allowable to replace pages only after checkpoint entry was stored to disk.
cpPagesHolder = beginAllCheckpoints(checkpointedRegions, curr.futureFor(MARKER_STORED_TO_DISK));
curr.currentCheckpointPagesCount(cpPagesHolder.pagesNum());
dirtyPagesCount = cpPagesHolder.pagesNum();
hasPartitionsToDestroy = !curr.getDestroyQueue().pendingReqs().isEmpty();
if (dirtyPagesCount > 0 || curr.nextSnapshot() || hasPartitionsToDestroy) {
// No page updates for this checkpoint are allowed from now on.
if (wal != null)
cpPtr = wal.log(cpRec);
if (cpPtr == null)
cpPtr = CheckpointStatus.NULL_PTR;
}
curr.transitTo(PAGE_SNAPSHOT_TAKEN);
} finally {
checkpointReadWriteLock.writeUnlock();
tracker.onLockRelease();
}
curr.transitTo(LOCK_RELEASED);
for (CheckpointListener lsnr : dbLsnrs) lsnr.onCheckpointBegin(ctx0);
if (snapFut != null) {
try {
snapFut.get();
} catch (IgniteException e) {
U.error(log, "Failed to wait for snapshot operation initialization: " + curr.snapshotOperation(), e);
}
}
if (dirtyPagesCount > 0 || hasPartitionsToDestroy) {
tracker.onWalCpRecordFsyncStart();
// Sync log outside the checkpoint write lock.
if (wal != null)
wal.flush(cpPtr, true);
tracker.onWalCpRecordFsyncEnd();
CheckpointEntry checkpointEntry = null;
if (checkpointMarkersStorage != null)
checkpointEntry = checkpointMarkersStorage.writeCheckpointEntry(cpTs, cpRec.checkpointId(), cpPtr, cpRec, CheckpointEntryType.START, skipSync);
curr.transitTo(MARKER_STORED_TO_DISK);
tracker.onSplitAndSortCpPagesStart();
GridConcurrentMultiPairQueue<PageMemoryEx, FullPageId> cpPages = splitAndSortCpPagesIfNeeded(cpPagesHolder);
tracker.onSplitAndSortCpPagesEnd();
return new Checkpoint(checkpointEntry, cpPages, curr);
} else {
if (curr.nextSnapshot() && wal != null)
wal.flush(null, true);
return new Checkpoint(null, GridConcurrentMultiPairQueue.EMPTY, curr);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class IgnitePdsRecoveryAfterFileCorruptionTest method generateWal.
/**
* @param mem Mem.
* @param storeMgr Store manager.
* @param wal Wal.
* @param cacheId Cache id.
* @param pages Pages.
*/
private void generateWal(final PageMemoryImpl mem, final IgnitePageStoreManager storeMgr, final IgniteWriteAheadLogManager wal, final int cacheId, FullPageId[] pages) throws Exception {
// Mark the start position.
CheckpointRecord cpRec = new CheckpointRecord(null);
WALPointer start = wal.log(cpRec);
wal.flush(start, false);
for (FullPageId fullId : pages) {
long page = mem.acquirePage(fullId.groupId(), fullId.pageId());
try {
long pageAddr = mem.writeLock(fullId.groupId(), fullId.pageId(), page);
PageIO.setPageId(pageAddr, fullId.pageId());
try {
for (int j = PageIO.COMMON_HEADER_END; j < mem.realPageSize(fullId.groupId()); j += 4) PageUtils.putInt(pageAddr, j, j + (int) fullId.pageId());
} finally {
mem.writeUnlock(fullId.groupId(), fullId.pageId(), page, null, true);
}
} finally {
mem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
}
Collection<FullPageId> pageIds = mem.beginCheckpoint(new GridFinishedFuture());
info("Acquired pages for checkpoint: " + pageIds.size());
try {
long begin = System.currentTimeMillis();
long cp = 0;
AtomicLong write = new AtomicLong();
PageStoreWriter pageStoreWriter = (fullPageId, buf, tag) -> {
int groupId = fullPageId.groupId();
long pageId = fullPageId.pageId();
for (int j = PageIO.COMMON_HEADER_END; j < mem.realPageSize(groupId); j += 4) assertEquals(j + (int) pageId, buf.getInt(j));
buf.rewind();
long writeStart = System.nanoTime();
storeMgr.write(cacheId, pageId, buf, tag, true);
long writeEnd = System.nanoTime();
write.getAndAdd(writeEnd - writeStart);
};
ByteBuffer tmpBuf = ByteBuffer.allocate(mem.pageSize());
tmpBuf.order(ByteOrder.nativeOrder());
for (FullPageId fullId : pages) {
if (pageIds.contains(fullId)) {
long cpStart = System.nanoTime();
mem.checkpointWritePage(fullId, tmpBuf, pageStoreWriter, null);
long cpEnd = System.nanoTime();
cp += cpEnd - cpStart;
}
}
long syncStart = System.currentTimeMillis();
storeMgr.sync(cacheId, 0);
long end = System.currentTimeMillis();
info("Written pages in " + (end - begin) + "ms, copy took " + (cp / 1_000_000) + "ms, " + "write took " + (write.get() / 1_000_000) + "ms, sync took " + (end - syncStart) + "ms");
} finally {
info("Finishing checkpoint...");
mem.finishCheckpoint();
info("Finished checkpoint");
}
wal.flush(wal.log(new CheckpointRecord(null)), false);
for (FullPageId fullId : pages) {
long page = mem.acquirePage(fullId.groupId(), fullId.pageId());
try {
assertFalse("Page has a temp heap copy after the last checkpoint: [cacheId=" + fullId.groupId() + ", pageId=" + fullId.pageId() + "]", mem.hasTempCopy(page));
assertFalse("Page is dirty after the last checkpoint: [cacheId=" + fullId.groupId() + ", pageId=" + fullId.pageId() + "]", mem.isDirty(fullId.groupId(), fullId.pageId(), page));
} finally {
mem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
}
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class IgnitePdsCorruptedStoreTest method testWalFsyncWriteHeaderFailure.
/**
* Test node invalidation due to error on WAL write header.
*/
@Test
public void testWalFsyncWriteHeaderFailure() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
ignite.cache(CACHE_NAME1).put(0, 0);
failingFileIOFactory.createClosure((file, options) -> {
FileIO delegate = failingFileIOFactory.delegateFactory().create(file, options);
if (file.getName().endsWith(".wal")) {
return new FileIODecorator(delegate) {
@Override
public int write(ByteBuffer srcBuf) throws IOException {
throw new IOException("No space left on device");
}
};
}
return delegate;
});
ignite.context().cache().context().database().checkpointReadLock();
try {
ignite.context().cache().context().wal().log(new CheckpointRecord(null), RolloverType.NEXT_SEGMENT);
} catch (StorageException expected) {
// No-op.
} finally {
ignite.context().cache().context().database().checkpointReadUnlock();
}
waitFailure(StorageException.class);
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class WalScannerTest method shouldDumpToFileAndLogFoundRecord.
/**
* @throws Exception If failed.
*/
@Test
public void shouldDumpToFileAndLogFoundRecord() throws Exception {
// given: File for dumping records and test logger for interception of records.
File targetFile = Paths.get(U.defaultWorkDirectory(), TEST_DUMP_FILE).toFile();
long expPageId = 984;
int grpId = 123;
IgniteLogger log = mock(IgniteLogger.class);
when(log.isInfoEnabled()).thenReturn(true);
ArgumentCaptor<String> valCapture = ArgumentCaptor.forClass(String.class);
doNothing().when(log).info(valCapture.capture());
WALIterator mockedIter = mockWalIterator(new IgniteBiTuple<>(NULL_PTR, new PageSnapshot(new FullPageId(expPageId, grpId), dummyPage(1024, expPageId), 1024)), new IgniteBiTuple<>(NULL_PTR, new CheckpointRecord(new WALPointer(5738, 0, 0))), new IgniteBiTuple<>(NULL_PTR, new FixCountRecord(grpId, expPageId, 4)));
IgniteWalIteratorFactory factory = mock(IgniteWalIteratorFactory.class);
when(factory.iterator(any(IteratorParametersBuilder.class))).thenReturn(mockedIter);
Set<T2<Integer, Long>> groupAndPageIds = new HashSet<>();
groupAndPageIds.add(new T2<>(grpId, expPageId));
List<String> actualFileRecords = null;
try {
// when: Scanning WAL for searching expected page.
buildWalScanner(withIteratorParameters(), factory).findAllRecordsFor(groupAndPageIds).forEach(printToLog(log).andThen(printToFile(targetFile)));
actualFileRecords = Files.readAllLines(targetFile.toPath());
} finally {
targetFile.delete();
}
actualFileRecords = actualFileRecords.stream().filter(it -> it.startsWith("Next WAL record ::")).collect(Collectors.toList());
// then: Should be find only expected value from file.
assertEquals(actualFileRecords.size(), 3);
assertTrue(actualFileRecords.get(0), actualFileRecords.get(0).contains("PageSnapshot ["));
assertTrue(actualFileRecords.get(1), actualFileRecords.get(1).contains("CheckpointRecord ["));
assertTrue(actualFileRecords.get(2), actualFileRecords.get(2).contains("FixCountRecord ["));
// then: Should be find only expected value from log.
List<String> actualLogRecords = valCapture.getAllValues();
assertEquals(actualLogRecords.size(), 1);
assertTrue(actualLogRecords.get(0), actualLogRecords.get(0).contains("PageSnapshot ["));
assertTrue(actualLogRecords.get(0), actualLogRecords.get(0).contains("CheckpointRecord ["));
assertTrue(actualLogRecords.get(0), actualLogRecords.get(0).contains("FixCountRecord ["));
}
Aggregations