use of org.apache.ignite.internal.processors.cache.persistence.CheckpointState.FINISHED in project ignite by apache.
the class CachePartitionDefragmentationManager method defragmentOnePartition.
/**
* Defragment one given partition.
*/
private boolean defragmentOnePartition(CacheGroupContext oldGrpCtx, int grpId, File workDir, GridCacheOffheapManager offheap, FileVersionCheckingFactory pageStoreFactory, GridCompoundFuture<Object, Object> cmpFut, PageMemoryEx oldPageMem, CacheGroupContext newGrpCtx, CacheDataStore oldCacheDataStore) throws IgniteCheckedException {
TreeIterator treeIter = new TreeIterator(pageSize);
checkCancellation();
int partId = oldCacheDataStore.partId();
PartitionContext partCtx = new PartitionContext(workDir, grpId, partId, partDataRegion, mappingDataRegion, oldGrpCtx, newGrpCtx, oldCacheDataStore, pageStoreFactory);
if (skipAlreadyDefragmentedPartition(workDir, grpId, partId, log)) {
partCtx.createPageStore(() -> defragmentedPartMappingFile(workDir, partId).toPath(), partCtx.mappingPagesAllocated, partCtx.mappingPageMemory);
linkMapByPart.put(partId, partCtx.createLinkMapTree(false));
return false;
}
partCtx.createPageStore(() -> defragmentedPartMappingFile(workDir, partId).toPath(), partCtx.mappingPagesAllocated, partCtx.mappingPageMemory);
linkMapByPart.put(partId, partCtx.createLinkMapTree(true));
checkCancellation();
partCtx.createPageStore(() -> defragmentedPartTmpFile(workDir, partId).toPath(), partCtx.partPagesAllocated, partCtx.partPageMemory);
partCtx.createNewCacheDataStore(offheap);
copyPartitionData(partCtx, treeIter);
DefragmentationPageReadWriteManager pageMgr = (DefragmentationPageReadWriteManager) partCtx.partPageMemory.pageManager();
PageStore oldPageStore = filePageStoreMgr.getStore(grpId, partId);
status.onPartitionDefragmented(oldGrpCtx, oldPageStore.size(), // + file header.
pageSize + partCtx.partPagesAllocated.get() * pageSize);
// TODO Move inside of defragmentSinglePartition.
IgniteInClosure<IgniteInternalFuture<?>> cpLsnr = fut -> {
if (fut.error() == null) {
if (log.isDebugEnabled()) {
log.debug(S.toString("Partition defragmented", "grpId", grpId, false, "partId", partId, false, "oldPages", oldPageStore.pages(), false, "newPages", partCtx.partPagesAllocated.get() + 1, false, "mappingPages", partCtx.mappingPagesAllocated.get() + 1, false, "pageSize", pageSize, false, "partFile", defragmentedPartFile(workDir, partId).getName(), false, "workDir", workDir, false));
}
oldPageMem.invalidate(grpId, partId);
partCtx.partPageMemory.invalidate(grpId, partId);
// Yes, it'll be invalid in a second.
pageMgr.pageStoreMap().removePageStore(grpId, partId);
renameTempPartitionFile(workDir, partId);
}
};
GridFutureAdapter<?> cpFut = defragmentationCheckpoint.forceCheckpoint("partition defragmented", null).futureFor(FINISHED);
cpFut.listen(cpLsnr);
cmpFut.add((IgniteInternalFuture<Object>) cpFut);
return true;
}
use of org.apache.ignite.internal.processors.cache.persistence.CheckpointState.FINISHED in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
use of org.apache.ignite.internal.processors.cache.persistence.CheckpointState.FINISHED in project ignite by apache.
the class CachePartitionDefragmentationManager method executeDefragmentation.
/**
*/
public void executeDefragmentation() throws IgniteCheckedException {
Map<Integer, List<CacheDataStore>> oldStores = new HashMap<>();
for (CacheGroupContext oldGrpCtx : cacheGrpCtxsForDefragmentation) {
int grpId = oldGrpCtx.groupId();
final IgniteCacheOffheapManager offheap = oldGrpCtx.offheap();
List<CacheDataStore> oldCacheDataStores = stream(offheap.cacheDataStores().spliterator(), false).filter(store -> {
try {
return filePageStoreMgr.exists(grpId, store.partId());
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
}).collect(Collectors.toList());
oldStores.put(grpId, oldCacheDataStores);
}
int partitionCount = oldStores.values().stream().mapToInt(List::size).sum();
status.onStart(cacheGrpCtxsForDefragmentation, partitionCount);
try {
// Now the actual process starts.
IgniteInternalFuture<?> idxDfrgFut = null;
DataPageEvictionMode prevPageEvictionMode = null;
for (CacheGroupContext oldGrpCtx : cacheGrpCtxsForDefragmentation) {
int grpId = oldGrpCtx.groupId();
File workDir = filePageStoreMgr.cacheWorkDir(oldGrpCtx.sharedGroup(), oldGrpCtx.cacheOrGroupName());
List<CacheDataStore> oldCacheDataStores = oldStores.get(grpId);
if (skipAlreadyDefragmentedCacheGroup(workDir, grpId, log)) {
status.onCacheGroupSkipped(oldGrpCtx, oldCacheDataStores.size());
continue;
}
try {
GridCacheOffheapManager offheap = (GridCacheOffheapManager) oldGrpCtx.offheap();
status.onCacheGroupStart(oldGrpCtx, oldCacheDataStores.size());
if (workDir == null || oldCacheDataStores.isEmpty()) {
status.onCacheGroupFinish(oldGrpCtx);
continue;
}
// We can't start defragmentation of new group on the region that has wrong eviction mode.
// So waiting of the previous cache group defragmentation is inevitable.
DataPageEvictionMode curPageEvictionMode = oldGrpCtx.dataRegion().config().getPageEvictionMode();
if (prevPageEvictionMode == null || prevPageEvictionMode != curPageEvictionMode) {
prevPageEvictionMode = curPageEvictionMode;
partDataRegion.config().setPageEvictionMode(curPageEvictionMode);
if (idxDfrgFut != null)
idxDfrgFut.get();
}
IntMap<CacheDataStore> cacheDataStores = new IntHashMap<>();
for (CacheDataStore store : offheap.cacheDataStores()) {
// This would mean that these partitions are empty.
assert store.tree() == null || store.tree().groupId() == grpId;
if (store.tree() != null)
cacheDataStores.put(store.partId(), store);
}
dbMgr.checkpointedDataRegions().remove(oldGrpCtx.dataRegion());
// Another cheat. Ttl cleanup manager knows too much shit.
oldGrpCtx.caches().stream().filter(cacheCtx -> cacheCtx.groupId() == grpId).forEach(cacheCtx -> cacheCtx.ttl().unregister());
// Technically wal is already disabled, but "PageHandler.isWalDeltaRecordNeeded" doesn't care
// and WAL records will be allocated anyway just to be ignored later if we don't disable WAL for
// cache group explicitly.
oldGrpCtx.localWalEnabled(false, false);
boolean encrypted = oldGrpCtx.config().isEncryptionEnabled();
FileVersionCheckingFactory pageStoreFactory = filePageStoreMgr.getPageStoreFactory(grpId, encrypted);
AtomicLong idxAllocationTracker = new GridAtomicLong();
createIndexPageStore(grpId, workDir, pageStoreFactory, partDataRegion, idxAllocationTracker::addAndGet);
checkCancellation();
GridCompoundFuture<Object, Object> cmpFut = new GridCompoundFuture<>();
PageMemoryEx oldPageMem = (PageMemoryEx) oldGrpCtx.dataRegion().pageMemory();
CacheGroupContext newGrpCtx = new CacheGroupContext(sharedCtx, grpId, oldGrpCtx.receivedFrom(), CacheType.USER, oldGrpCtx.config(), oldGrpCtx.affinityNode(), partDataRegion, oldGrpCtx.cacheObjectContext(), null, null, oldGrpCtx.localStartVersion(), true, false, true);
defragmentationCheckpoint.checkpointTimeoutLock().checkpointReadLock();
try {
// This will initialize partition meta in index partition - meta tree and reuse list.
newGrpCtx.start();
} finally {
defragmentationCheckpoint.checkpointTimeoutLock().checkpointReadUnlock();
}
IgniteUtils.doInParallel(defragmentationThreadPool, oldCacheDataStores, oldCacheDataStore -> defragmentOnePartition(oldGrpCtx, grpId, workDir, offheap, pageStoreFactory, cmpFut, oldPageMem, newGrpCtx, oldCacheDataStore));
// A bit too general for now, but I like it more then saving only the last checkpoint future.
cmpFut.markInitialized().get();
idxDfrgFut = new GridFinishedFuture<>();
if (filePageStoreMgr.hasIndexStore(grpId)) {
defragmentIndexPartition(oldGrpCtx, newGrpCtx);
idxDfrgFut = defragmentationCheckpoint.forceCheckpoint("index defragmented", null).futureFor(FINISHED);
}
PageStore oldIdxPageStore = filePageStoreMgr.getStore(grpId, INDEX_PARTITION);
idxDfrgFut = idxDfrgFut.chain(fut -> {
if (log.isDebugEnabled()) {
log.debug(S.toString("Index partition defragmented", "grpId", grpId, false, "oldPages", oldIdxPageStore.pages(), false, "newPages", idxAllocationTracker.get() + 1, false, "pageSize", pageSize, false, "partFile", defragmentedIndexFile(workDir).getName(), false, "workDir", workDir, false));
}
oldPageMem.invalidate(grpId, INDEX_PARTITION);
PageMemoryEx partPageMem = (PageMemoryEx) partDataRegion.pageMemory();
partPageMem.invalidate(grpId, INDEX_PARTITION);
DefragmentationPageReadWriteManager pageMgr = (DefragmentationPageReadWriteManager) partPageMem.pageManager();
pageMgr.pageStoreMap().removePageStore(grpId, INDEX_PARTITION);
PageMemoryEx mappingPageMem = (PageMemoryEx) mappingDataRegion.pageMemory();
pageMgr = (DefragmentationPageReadWriteManager) mappingPageMem.pageManager();
pageMgr.pageStoreMap().clear(grpId);
renameTempIndexFile(workDir);
writeDefragmentationCompletionMarker(filePageStoreMgr.getPageStoreFileIoFactory(), workDir, log);
batchRenameDefragmentedCacheGroupPartitions(workDir, log);
return null;
});
status.onIndexDefragmented(oldGrpCtx, oldIdxPageStore.size(), // + file header.
pageSize + idxAllocationTracker.get() * pageSize);
} catch (DefragmentationCancelledException e) {
DefragmentationFileUtils.deleteLeftovers(workDir);
throw e;
}
status.onCacheGroupFinish(oldGrpCtx);
}
if (idxDfrgFut != null)
idxDfrgFut.get();
mntcReg.unregisterMaintenanceTask(DEFRAGMENTATION_MNTC_TASK_NAME);
status.onFinish();
completionFut.onDone();
} catch (DefragmentationCancelledException e) {
mntcReg.unregisterMaintenanceTask(DEFRAGMENTATION_MNTC_TASK_NAME);
log.info("Defragmentation process has been cancelled.");
status.onFinish();
completionFut.onDone();
} catch (Throwable t) {
log.error("Defragmentation process failed.", t);
status.onFinish();
completionFut.onDone(t);
throw t;
} finally {
defragmentationCheckpoint.stop(true);
}
}
Aggregations