use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridCacheDatabaseSharedManager method reserveHistoryForPreloading.
/**
* {@inheritDoc}
*/
@Override
public boolean reserveHistoryForPreloading(Map<T2<Integer, Integer>, Long> reservationMap) {
Map<GroupPartitionId, CheckpointEntry> entries = checkpointHistory().searchCheckpointEntry(reservationMap);
if (F.isEmpty(entries))
return false;
WALPointer oldestWALPointerToReserve = null;
for (CheckpointEntry cpE : entries.values()) {
WALPointer ptr = cpE.checkpointMark();
if (ptr == null)
return false;
if (oldestWALPointerToReserve == null || ptr.compareTo(oldestWALPointerToReserve) < 0)
oldestWALPointerToReserve = ptr;
}
if (cctx.wal().reserve(oldestWALPointerToReserve)) {
reservedForPreloading.set(oldestWALPointerToReserve);
return true;
} else
return false;
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridCacheOffheapManager method historicalIterator.
/**
* {@inheritDoc}
*/
@Override
@Nullable
protected IgniteHistoricalIterator historicalIterator(CachePartitionPartialCountersMap partCntrs, Set<Integer> missing) throws IgniteCheckedException {
if (partCntrs == null || partCntrs.isEmpty())
return null;
if (// TODO IGNITE-7384
grp.mvccEnabled())
return super.historicalIterator(partCntrs, missing);
GridCacheDatabaseSharedManager database = (GridCacheDatabaseSharedManager) grp.shared().database();
Map<Integer, Long> partsCounters = new HashMap<>();
for (int i = 0; i < partCntrs.size(); i++) {
int p = partCntrs.partitionAt(i);
long initCntr = partCntrs.initialUpdateCounterAt(i);
partsCounters.put(p, initCntr);
}
try {
WALPointer minPtr = database.checkpointHistory().searchEarliestWalPointer(grp.groupId(), partsCounters, grp.hasAtomicCaches() ? walAtomicCacheMargin : 0L);
WALPointer latestReservedPointer = database.latestWalPointerReservedForPreloading();
assert latestReservedPointer == null || latestReservedPointer.compareTo(minPtr) <= 0 : "Historical iterator tries to iterate WAL out of reservation [cache=" + grp.cacheOrGroupName() + ", reservedPointer=" + latestReservedPointer + ", historicalPointer=" + minPtr + ']';
if (latestReservedPointer == null)
log.warning("History for the preloading has not reserved yet.");
WALIterator it = grp.shared().wal().replay(minPtr);
WALHistoricalIterator histIt = new WALHistoricalIterator(log, grp, partCntrs, partsCounters, it);
// Add historical partitions which are unabled to reserve to missing set.
missing.addAll(histIt.missingParts);
return histIt;
} catch (Exception ex) {
if (!X.hasCause(ex, IgniteHistoricalIteratorException.class))
throw new IgniteHistoricalIteratorException(ex);
throw ex;
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridCacheDatabaseSharedManager method applyUpdatesOnRecovery.
/**
* Apply update from some iterator and with specific filters.
*
* @param it WalIterator.
* @param recPredicate Wal record filter.
* @param entryPredicate Entry filter.
*/
public void applyUpdatesOnRecovery(@Nullable WALIterator it, IgniteBiPredicate<WALPointer, WALRecord> recPredicate, IgnitePredicate<DataEntry> entryPredicate) throws IgniteCheckedException {
if (it == null)
return;
cctx.walState().runWithOutWAL(() -> {
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> next = it.next();
WALRecord rec = next.get2();
if (!recPredicate.apply(next.get1(), rec))
break;
switch(rec.type()) {
case MVCC_DATA_RECORD:
case DATA_RECORD:
case DATA_RECORD_V2:
checkpointReadLock();
try {
DataRecord dataRec = (DataRecord) rec;
int entryCnt = dataRec.entryCount();
for (int i = 0; i < entryCnt; i++) {
DataEntry dataEntry = dataRec.get(i);
if (entryPredicate.apply(dataEntry)) {
checkpointReadLock();
try {
int cacheId = dataEntry.cacheId();
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (cacheCtx != null)
applyUpdate(cacheCtx, dataEntry);
else if (log != null)
log.warning("Cache is not started. Updates cannot be applied " + "[cacheId=" + cacheId + ']');
} finally {
checkpointReadUnlock();
}
}
}
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
} finally {
checkpointReadUnlock();
}
break;
case MVCC_TX_RECORD:
checkpointReadLock();
try {
MvccTxRecord txRecord = (MvccTxRecord) rec;
byte txState = convertToTxState(txRecord.state());
cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
} finally {
checkpointReadUnlock();
}
break;
default:
}
}
});
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridCacheDatabaseSharedManager method restoreBinaryMemory.
/**
* @param cacheGroupsPredicate Cache groups to restore.
* @param recordTypePredicate Filter records by type.
* @return Last seen WAL pointer during binary memory recovery.
* @throws IgniteCheckedException If failed.
*/
private RestoreBinaryState restoreBinaryMemory(IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate) throws IgniteCheckedException {
long time = System.currentTimeMillis();
try {
if (log.isInfoEnabled())
log.info("Starting binary memory restore for: " + cctx.cache().cacheGroupDescriptors().keySet());
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.beforeBinaryMemoryRestore(this);
CheckpointStatus status = readCheckpointStatus();
// First, bring memory to the last consistent checkpoint state if needed.
// This method should return a pointer to the last valid record in the WAL.
RestoreBinaryState binaryState = performBinaryMemoryRestore(status, cacheGroupsPredicate, recordTypePredicate, true);
WALPointer restored = binaryState.lastReadRecordPointer();
if (restored.equals(CheckpointStatus.NULL_PTR))
// This record is first
restored = null;
else
restored = restored.next();
if (restored == null && !status.endPtr.equals(CheckpointStatus.NULL_PTR)) {
throw new StorageException("The memory cannot be restored. The critical part of WAL archive is missing " + "[tailWalPtr=" + restored + ", endPtr=" + status.endPtr + ']');
} else if (restored != null)
U.log(log, "Binary memory state restored at node startup [restoredPtr=" + restored + ']');
// Wal logging is now available.
cctx.wal().resumeLogging(restored);
// Log MemoryRecoveryRecord to make sure that old physical records are not replayed during
// next physical recovery.
checkpointManager.memoryRecoveryRecordPtr(cctx.wal().log(new MemoryRecoveryRecord(U.currentTimeMillis())));
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterBinaryMemoryRestore(this, binaryState);
if (log.isInfoEnabled())
log.info("Binary recovery performed in " + (System.currentTimeMillis() - time) + " ms.");
return binaryState;
} catch (IgniteCheckedException e) {
if (X.hasCause(e, StorageException.class, IOException.class))
cctx.kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, e));
throw e;
}
}
Aggregations