use of org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord in project ignite by apache.
the class PageMemoryImpl method tryToRestorePage.
/**
* Restores page from WAL page snapshot & delta records.
*
* @param fullId Full page ID.
* @param buf Destination byte buffer. Note: synchronization to provide ByteBuffer safety should be done outside
* this method.
*
* @throws IgniteCheckedException If failed to start WAL iteration, if incorrect page type observed in data, etc.
* @throws AssertionError if it was not possible to restore page, page not found in WAL.
*/
private void tryToRestorePage(FullPageId fullId, ByteBuffer buf) throws IgniteCheckedException {
Long tmpAddr = null;
try {
ByteBuffer curPage = null;
ByteBuffer lastValidPage = null;
try (WALIterator it = walMgr.replay(null)) {
for (IgniteBiTuple<WALPointer, WALRecord> tuple : it) {
switch(tuple.getValue().type()) {
case PAGE_RECORD:
PageSnapshot snapshot = (PageSnapshot) tuple.getValue();
if (snapshot.fullPageId().equals(fullId)) {
if (tmpAddr == null) {
assert snapshot.pageData().length <= pageSize() : snapshot.pageData().length;
tmpAddr = GridUnsafe.allocateMemory(pageSize());
}
if (curPage == null)
curPage = wrapPointer(tmpAddr, pageSize());
PageUtils.putBytes(tmpAddr, 0, snapshot.pageData());
}
break;
case CHECKPOINT_RECORD:
CheckpointRecord rec = (CheckpointRecord) tuple.getValue();
assert !rec.end();
if (curPage != null) {
lastValidPage = curPage;
curPage = null;
}
break;
case // It means that previous checkpoint was broken.
MEMORY_RECOVERY:
curPage = null;
break;
default:
if (tuple.getValue() instanceof PageDeltaRecord) {
PageDeltaRecord deltaRecord = (PageDeltaRecord) tuple.getValue();
if (curPage != null && deltaRecord.pageId() == fullId.pageId() && deltaRecord.groupId() == fullId.groupId()) {
assert tmpAddr != null;
deltaRecord.applyDelta(this, tmpAddr);
}
}
}
}
}
ByteBuffer restored = curPage == null ? lastValidPage : curPage;
if (restored == null)
throw new IllegalStateException(String.format("Page is broken. Can't restore it from WAL. (grpId = %d, pageId = %X).", fullId.groupId(), fullId.pageId()));
buf.put(restored);
} finally {
if (tmpAddr != null)
GridUnsafe.freeMemory(tmpAddr);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord in project ignite by apache.
the class GridCacheDatabaseSharedManager method applyLastUpdates.
/**
* @param status Last registered checkpoint status.
* @throws IgniteCheckedException If failed to apply updates.
* @throws StorageException If IO exception occurred while reading write-ahead log.
*/
private void applyLastUpdates(CheckpointStatus status, boolean metastoreOnly) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Applying lost cache updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
if (!metastoreOnly)
cctx.kernalContext().query().skipFieldLookup(true);
long start = U.currentTimeMillis();
int applied = 0;
Collection<Integer> ignoreGrps = metastoreOnly ? Collections.emptySet() : initiallyWalDisabledGrps;
try (WALIterator it = cctx.wal().replay(status.startPtr)) {
Map<T2<Integer, Integer>, T2<Integer, Long>> partStates = new HashMap<>();
while (it.hasNextX()) {
IgniteBiTuple<WALPointer, WALRecord> next = it.nextX();
WALRecord rec = next.get2();
switch(rec.type()) {
case DATA_RECORD:
if (metastoreOnly)
continue;
DataRecord dataRec = (DataRecord) rec;
for (DataEntry dataEntry : dataRec.writeEntries()) {
int cacheId = dataEntry.cacheId();
int grpId = cctx.cache().cacheDescriptor(cacheId).groupId();
if (!ignoreGrps.contains(grpId)) {
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
applyUpdate(cacheCtx, dataEntry);
applied++;
}
}
break;
case PART_META_UPDATE_STATE:
if (metastoreOnly)
continue;
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
if (!ignoreGrps.contains(metaStateRecord.groupId())) {
partStates.put(new T2<>(metaStateRecord.groupId(), metaStateRecord.partitionId()), new T2<>((int) metaStateRecord.state(), metaStateRecord.updateCounter()));
}
break;
case METASTORE_DATA_RECORD:
MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
break;
case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
if (metastoreOnly)
continue;
PageDeltaRecord rec0 = (PageDeltaRecord) rec;
PageMemoryEx pageMem = getPageMemoryForCacheGroup(rec0.groupId());
long page = pageMem.acquirePage(rec0.groupId(), rec0.pageId(), true);
try {
long addr = pageMem.writeLock(rec0.groupId(), rec0.pageId(), page, true);
try {
rec0.applyDelta(pageMem, addr);
} finally {
pageMem.writeUnlock(rec0.groupId(), rec0.pageId(), page, null, true, true);
}
} finally {
pageMem.releasePage(rec0.groupId(), rec0.pageId(), page);
}
break;
default:
}
}
if (!metastoreOnly)
restorePartitionState(partStates, ignoreGrps);
} finally {
if (!metastoreOnly)
cctx.kernalContext().query().skipFieldLookup(false);
}
if (log.isInfoEnabled())
log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + "ms]");
}
use of org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord in project ignite by apache.
the class GridCacheDatabaseSharedManager method restoreMemory.
/**
* @param status Checkpoint status.
* @param storeOnly If {@code True} restores Metastorage only.
*/
private WALPointer restoreMemory(CheckpointStatus status, boolean storeOnly, PageMemoryEx storePageMem) throws IgniteCheckedException {
assert !storeOnly || storePageMem != null;
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
boolean apply = status.needRestoreMemory();
if (apply) {
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.pageStore().beginRecover();
} else
cctx.wal().allowCompressionUntil(status.startPtr);
long start = U.currentTimeMillis();
int applied = 0;
WALPointer lastRead = null;
Collection<Integer> ignoreGrps = storeOnly ? Collections.emptySet() : initiallyWalDisabledGrps;
try (WALIterator it = cctx.wal().replay(status.endPtr)) {
while (it.hasNextX()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.nextX();
WALRecord rec = tup.get2();
lastRead = tup.get1();
switch(rec.type()) {
case CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
// We roll memory up until we find a checkpoint start record registered in the status.
if (F.eq(cpRec.checkpointId(), status.cpStartId)) {
log.info("Found last checkpoint marker [cpId=" + cpRec.checkpointId() + ", pos=" + tup.get1() + ']');
apply = false;
} else if (!F.eq(cpRec.checkpointId(), status.cpEndId))
U.warn(log, "Found unexpected checkpoint marker, skipping [cpId=" + cpRec.checkpointId() + ", expCpId=" + status.cpStartId + ", pos=" + tup.get1() + ']');
break;
case PAGE_RECORD:
if (apply) {
PageSnapshot pageRec = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int grpId = pageRec.fullPageId().groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = pageRec.fullPageId().pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
PageUtils.putBytes(pageAddr, 0, pageRec.pageData());
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRec = (PartitionDestroyRecord) rec;
final int gId = destroyRec.groupId();
if (storeOnly && gId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(gId)) {
final int pId = destroyRec.partitionId();
PageMemoryEx pageMem = gId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(gId);
pageMem.clearAsync((grpId, pageId) -> grpId == gId && PageIdUtils.partId(pageId) == pId, true).get();
}
break;
default:
if (apply && rec instanceof PageDeltaRecord) {
PageDeltaRecord r = (PageDeltaRecord) rec;
int grpId = r.groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = r.pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
r.applyDelta(pageMem, pageAddr);
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
}
}
}
if (storeOnly)
return null;
if (status.needRestoreMemory()) {
if (apply)
throw new IgniteCheckedException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastRead + "]");
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + "ms]");
if (applied > 0)
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr);
}
checkpointHist.loadHistory(cpDir);
return lastRead == null ? null : lastRead.next();
}
use of org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord in project ignite by apache.
the class IgniteWalRecoveryTest method testApplyDeltaRecords.
/**
* @throws Exception if failed.
*/
public void testApplyDeltaRecords() throws Exception {
try {
IgniteEx ignite0 = (IgniteEx) startGrid("node0");
ignite0.active(true);
IgniteCache<Object, Object> cache0 = ignite0.cache(cacheName);
for (int i = 0; i < 1000; i++) cache0.put(i, new IndexedObject(i));
GridCacheSharedContext<Object, Object> sharedCtx = ignite0.context().cache().context();
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) sharedCtx.database();
db.waitForCheckpoint("test");
db.enableCheckpoints(false).get();
// Log something to know where to start.
WALPointer ptr = sharedCtx.wal().log(new MemoryRecoveryRecord(U.currentTimeMillis()));
info("Replay marker: " + ptr);
for (int i = 1000; i < 5000; i++) cache0.put(i, new IndexedObject(i));
info("Done puts...");
for (int i = 2_000; i < 3_000; i++) cache0.remove(i);
info("Done removes...");
for (int i = 5000; i < 6000; i++) cache0.put(i, new IndexedObject(i));
info("Done puts...");
Map<FullPageId, byte[]> rolledPages = new HashMap<>();
int pageSize = sharedCtx.database().pageSize();
ByteBuffer buf = ByteBuffer.allocateDirect(pageSize);
// Now check that deltas can be correctly applied.
try (WALIterator it = sharedCtx.wal().replay(ptr)) {
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
WALRecord rec = tup.get2();
if (rec instanceof PageSnapshot) {
PageSnapshot page = (PageSnapshot) rec;
rolledPages.put(page.fullPageId(), page.pageData());
} else if (rec instanceof PageDeltaRecord) {
PageDeltaRecord delta = (PageDeltaRecord) rec;
FullPageId fullId = new FullPageId(delta.pageId(), delta.groupId());
byte[] pageData = rolledPages.get(fullId);
if (pageData == null) {
pageData = new byte[pageSize];
rolledPages.put(fullId, pageData);
}
assertNotNull("Missing page snapshot [page=" + fullId + ", delta=" + delta + ']', pageData);
buf.order(ByteOrder.nativeOrder());
buf.position(0);
buf.put(pageData);
buf.position(0);
delta.applyDelta(sharedCtx.database().dataRegion(null).pageMemory(), GridUnsafe.bufferAddress(buf));
buf.position(0);
buf.get(pageData);
}
}
}
info("Done apply...");
PageMemoryEx pageMem = (PageMemoryEx) db.dataRegion(null).pageMemory();
for (Map.Entry<FullPageId, byte[]> entry : rolledPages.entrySet()) {
FullPageId fullId = entry.getKey();
ignite0.context().cache().context().database().checkpointReadLock();
try {
long page = pageMem.acquirePage(fullId.groupId(), fullId.pageId(), true);
try {
long bufPtr = pageMem.writeLock(fullId.groupId(), fullId.pageId(), page, true);
try {
byte[] data = entry.getValue();
for (int i = 0; i < data.length; i++) {
if (fullId.pageId() == TrackingPageIO.VERSIONS.latest().trackingPageFor(fullId.pageId(), db.pageSize()))
// Skip tracking pages.
continue;
assertEquals("page=" + fullId + ", pos=" + i, PageUtils.getByte(bufPtr, i), data[i]);
}
} finally {
pageMem.writeUnlock(fullId.groupId(), fullId.pageId(), page, null, false, true);
}
} finally {
pageMem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
} finally {
ignite0.context().cache().context().database().checkpointReadUnlock();
}
}
ignite0.close();
} finally {
stopAllGrids();
}
}
Aggregations