use of org.apache.ignite.internal.pagemem.wal.record.PageSnapshot in project ignite by apache.
the class WalCompactionTest method testApplyingUpdatesFromCompactedWal.
/**
* @throws Exception If failed.
*/
public void testApplyingUpdatesFromCompactedWal() throws Exception {
IgniteEx ig = (IgniteEx) startGrids(3);
ig.active(true);
IgniteCache<Integer, byte[]> cache = ig.cache("cache");
for (int i = 0; i < ENTRIES; i++) {
// At least 20MB of raw data in total.
final byte[] val = new byte[20000];
val[i] = 1;
cache.put(i, val);
}
// Spam WAL to move all data records to compressible WAL zone.
for (int i = 0; i < WAL_SEGMENT_SIZE / DFLT_PAGE_SIZE * 2; i++) ig.context().cache().context().wal().log(new PageSnapshot(new FullPageId(-1, -1), new byte[DFLT_PAGE_SIZE]));
// WAL archive segment is allowed to be compressed when it's at least one checkpoint away from current WAL head.
ig.context().cache().context().database().wakeupForCheckpoint("Forced checkpoint").get();
ig.context().cache().context().database().wakeupForCheckpoint("Forced checkpoint").get();
// Allow compressor to archive WAL segments.
Thread.sleep(15_000);
String nodeFolderName = ig.context().pdsFolderResolver().resolveFolders().folderName();
File dbDir = U.resolveWorkDirectory(U.defaultWorkDirectory(), "db", false);
File walDir = new File(dbDir, "wal");
File archiveDir = new File(walDir, "archive");
File nodeArchiveDir = new File(archiveDir, nodeFolderName);
File walSegment = new File(nodeArchiveDir, FileWriteAheadLogManager.FileDescriptor.fileName(0) + ".zip");
assertTrue(walSegment.exists());
// Should be compressed at least in half.
assertTrue(walSegment.length() < WAL_SEGMENT_SIZE / 2);
stopAllGrids();
File nodeLfsDir = new File(dbDir, nodeFolderName);
File cpMarkersDir = new File(nodeLfsDir, "cp");
File[] cpMarkers = cpMarkersDir.listFiles();
assertNotNull(cpMarkers);
assertTrue(cpMarkers.length > 0);
File cacheDir = new File(nodeLfsDir, "cache-" + CACHE_NAME);
File[] lfsFiles = cacheDir.listFiles();
assertNotNull(lfsFiles);
assertTrue(lfsFiles.length > 0);
// Enforce reading WAL from the very beginning at the next start.
for (File f : cpMarkers) f.delete();
for (File f : lfsFiles) f.delete();
ig = (IgniteEx) startGrids(3);
ig.active(true);
cache = ig.cache(CACHE_NAME);
boolean fail = false;
// Check that all data is recovered from compacted WAL.
for (int i = 0; i < ENTRIES; i++) {
byte[] arr = cache.get(i);
if (arr == null) {
System.out.println(">>> Missing: " + i);
fail = true;
} else if (arr[i] != 1) {
System.out.println(">>> Corrupted: " + i);
fail = true;
}
}
assertFalse(fail);
}
use of org.apache.ignite.internal.pagemem.wal.record.PageSnapshot in project ignite by apache.
the class WalStat method registerRecord.
/**
* Handles WAL record.
*
* @param record record to handle.
* @param walPointer pointer, used to extract segment index.
* @param workDir true for work, false for archive folder.
*/
void registerRecord(WALRecord record, WALPointer walPointer, boolean workDir) {
WALRecord.RecordType type = record.type();
if (type == WALRecord.RecordType.PAGE_RECORD)
registerPageSnapshot((PageSnapshot) record);
else if (type == WALRecord.RecordType.DATA_RECORD)
registerDataRecord((DataRecord) record);
else if (type == WALRecord.RecordType.TX_RECORD)
registerTxRecord((TxRecord) record);
incrementStat(type.toString(), record, recTypeSizes);
if (walPointer instanceof FileWALPointer) {
final FileWALPointer fPtr = (FileWALPointer) walPointer;
incrementStat(Long.toString(fPtr.index()), record, segmentsIndexes);
incrementStat(workDir ? "work" : "archive", record, segmentsFolder);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.PageSnapshot in project ignite by apache.
the class RecordDataV2Serializer method readPlainRecord.
/**
* {@inheritDoc}
*/
@Override
WALRecord readPlainRecord(RecordType type, ByteBufferBackedDataInput in, boolean encrypted, int recordSize) throws IOException, IgniteCheckedException {
switch(type) {
case PAGE_RECORD:
int cacheId = in.readInt();
long pageId = in.readLong();
byte[] arr = new byte[recordSize - 4 - /* cacheId */
8];
in.readFully(arr);
return new PageSnapshot(new FullPageId(pageId, cacheId), arr, encrypted ? realPageSize : pageSize);
case CHECKPOINT_RECORD:
long msb = in.readLong();
long lsb = in.readLong();
boolean hasPtr = in.readByte() != 0;
long idx0 = hasPtr ? in.readLong() : 0;
int off = hasPtr ? in.readInt() : 0;
int len = hasPtr ? in.readInt() : 0;
Map<Integer, CacheState> states = readPartitionStates(in);
boolean end = in.readByte() != 0;
WALPointer walPtr = hasPtr ? new WALPointer(idx0, off, len) : null;
CheckpointRecord cpRec = new CheckpointRecord(new UUID(msb, lsb), walPtr, end);
cpRec.cacheGroupStates(states);
return cpRec;
case DATA_RECORD:
case DATA_RECORD_V2:
int entryCnt = in.readInt();
long timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readPlainDataEntry(in, type), timeStamp);
else {
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readPlainDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case MVCC_DATA_RECORD:
entryCnt = in.readInt();
timeStamp = in.readLong();
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readMvccDataEntry(in));
return new MvccDataRecord(entries, timeStamp);
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
entryCnt = in.readInt();
timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readEncryptedDataEntry(in, type), timeStamp);
else {
entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readEncryptedDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case SNAPSHOT:
long snpId = in.readLong();
byte full = in.readByte();
return new SnapshotRecord(snpId, full == 1);
case EXCHANGE:
int idx = in.readInt();
short constId = in.readShort();
long ts = in.readLong();
return new ExchangeRecord(constId, ExchangeRecord.Type.values()[idx], ts);
case TX_RECORD:
return txRecordSerializer.readTx(in);
case MVCC_TX_RECORD:
return txRecordSerializer.readMvccTx(in);
case ROLLBACK_TX_RECORD:
int grpId = in.readInt();
int partId = in.readInt();
long start = in.readLong();
long range = in.readLong();
return new RollbackRecord(grpId, partId, start, range);
case TRACKING_PAGE_REPAIR_DELTA:
cacheId = in.readInt();
pageId = in.readLong();
return new TrackingPageRepairDeltaRecord(cacheId, pageId);
default:
return super.readPlainRecord(type, in, encrypted, recordSize);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.PageSnapshot in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
use of org.apache.ignite.internal.pagemem.wal.record.PageSnapshot in project ignite by apache.
the class GridCacheOffheapManager method getOrAllocateCacheMetas.
/**
* @return Meta root pages info.
* @throws IgniteCheckedException If failed.
*/
private Metas getOrAllocateCacheMetas() throws IgniteCheckedException {
PageMemoryEx pageMem = (PageMemoryEx) grp.dataRegion().pageMemory();
IgniteWriteAheadLogManager wal = ctx.wal();
int grpId = grp.groupId();
long metaId = PageMemory.META_PAGE_ID;
long metaPage = pageMem.acquirePage(grpId, metaId);
try {
long pageAddr = pageMem.writeLock(grpId, metaId, metaPage);
boolean allocated = false;
boolean markDirty = false;
try {
long metastoreRoot, reuseListRoot;
PageMetaIOV2 io = (PageMetaIOV2) PageMetaIO.VERSIONS.latest();
if (PageIO.getType(pageAddr) != PageIO.T_META) {
PageMetrics metrics = pageMem.metrics().cacheGrpPageMetrics(grpId);
io.initNewPage(pageAddr, metaId, pageMem.realPageSize(grpId), metrics);
metastoreRoot = pageMem.allocatePage(grpId, PageIdAllocator.INDEX_PARTITION, PageMemory.FLAG_IDX);
reuseListRoot = pageMem.allocatePage(grpId, PageIdAllocator.INDEX_PARTITION, PageMemory.FLAG_IDX);
io.setTreeRoot(pageAddr, metastoreRoot);
io.setReuseListRoot(pageAddr, reuseListRoot);
if (isWalDeltaRecordNeeded(pageMem, grpId, metaId, metaPage, wal, null)) {
assert io.getType() == PageIO.T_META;
wal.log(new MetaPageInitRecord(grpId, metaId, io.getType(), io.getVersion(), metastoreRoot, reuseListRoot));
}
allocated = true;
} else {
if (io != PageIO.getPageIO(pageAddr)) {
if (log.isDebugEnabled()) {
log.debug("Upgrade index partition meta page version: [grpId=" + grpId + ", oldVer=" + PagePartitionMetaIO.getVersion(pageAddr) + ", newVer=" + io.getVersion() + ']');
}
io.upgradePage(pageAddr);
markDirty = true;
}
metastoreRoot = io.getTreeRoot(pageAddr);
reuseListRoot = io.getReuseListRoot(pageAddr);
int encrPageCnt = io.getEncryptedPageCount(pageAddr);
if (encrPageCnt > 0) {
ctx.kernalContext().encryption().setEncryptionState(grp, PageIdAllocator.INDEX_PARTITION, io.getEncryptedPageIndex(pageAddr), encrPageCnt);
markDirty = true;
}
assert reuseListRoot != 0L;
if (markDirty && isWalDeltaRecordNeeded(pageMem, grpId, metaId, metaPage, wal, null)) {
wal.log(new PageSnapshot(new FullPageId(PageIdAllocator.INDEX_PARTITION, grpId), pageAddr, pageMem.pageSize(), pageMem.realPageSize(grpId)));
}
}
return new Metas(new RootPage(new FullPageId(metastoreRoot, grpId), allocated), new RootPage(new FullPageId(reuseListRoot, grpId), allocated), null, null);
} finally {
pageMem.writeUnlock(grpId, metaId, metaPage, null, allocated || markDirty);
}
} finally {
pageMem.releasePage(grpId, metaId, metaPage);
}
}
Aggregations