use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class GridCacheDatabaseSharedManager method restoreMemory.
/**
* @param status Checkpoint status.
* @param storeOnly If {@code True} restores Metastorage only.
*/
private WALPointer restoreMemory(CheckpointStatus status, boolean storeOnly, PageMemoryEx storePageMem) throws IgniteCheckedException {
assert !storeOnly || storePageMem != null;
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
boolean apply = status.needRestoreMemory();
if (apply) {
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.pageStore().beginRecover();
} else
cctx.wal().allowCompressionUntil(status.startPtr);
long start = U.currentTimeMillis();
int applied = 0;
WALPointer lastRead = null;
Collection<Integer> ignoreGrps = storeOnly ? Collections.emptySet() : initiallyWalDisabledGrps;
try (WALIterator it = cctx.wal().replay(status.endPtr)) {
while (it.hasNextX()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.nextX();
WALRecord rec = tup.get2();
lastRead = tup.get1();
switch(rec.type()) {
case CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
// We roll memory up until we find a checkpoint start record registered in the status.
if (F.eq(cpRec.checkpointId(), status.cpStartId)) {
log.info("Found last checkpoint marker [cpId=" + cpRec.checkpointId() + ", pos=" + tup.get1() + ']');
apply = false;
} else if (!F.eq(cpRec.checkpointId(), status.cpEndId))
U.warn(log, "Found unexpected checkpoint marker, skipping [cpId=" + cpRec.checkpointId() + ", expCpId=" + status.cpStartId + ", pos=" + tup.get1() + ']');
break;
case PAGE_RECORD:
if (apply) {
PageSnapshot pageRec = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int grpId = pageRec.fullPageId().groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = pageRec.fullPageId().pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
PageUtils.putBytes(pageAddr, 0, pageRec.pageData());
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRec = (PartitionDestroyRecord) rec;
final int gId = destroyRec.groupId();
if (storeOnly && gId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(gId)) {
final int pId = destroyRec.partitionId();
PageMemoryEx pageMem = gId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(gId);
pageMem.clearAsync((grpId, pageId) -> grpId == gId && PageIdUtils.partId(pageId) == pId, true).get();
}
break;
default:
if (apply && rec instanceof PageDeltaRecord) {
PageDeltaRecord r = (PageDeltaRecord) rec;
int grpId = r.groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = r.pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
r.applyDelta(pageMem, pageAddr);
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
}
}
}
if (storeOnly)
return null;
if (status.needRestoreMemory()) {
if (apply)
throw new IgniteCheckedException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastRead + "]");
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + "ms]");
if (applied > 0)
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr);
}
checkpointHist.loadHistory(cpDir);
return lastRead == null ? null : lastRead.next();
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class IgnitePdsCheckpointSimulationWithRealCpDisabledTest method verifyReads.
/**
* @param res Result map to verify.
* @param mem Memory.
*/
private void verifyReads(Map<FullPageId, Integer> res, PageMemory mem, WALPointer start, IgniteWriteAheadLogManager wal) throws Exception {
Map<FullPageId, byte[]> replay = new HashMap<>();
try (PartitionMetaStateRecordExcludeIterator it = new PartitionMetaStateRecordExcludeIterator(wal.replay(start))) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
assertTrue("Invalid record: " + tup, tup.get2() instanceof CheckpointRecord);
CheckpointRecord cpRec = (CheckpointRecord) tup.get2();
while (it.hasNext()) {
tup = it.next();
WALRecord rec = tup.get2();
if (rec instanceof CheckpointRecord) {
CheckpointRecord end = (CheckpointRecord) rec;
// Found the finish mark.
if (end.checkpointId().equals(cpRec.checkpointId()) && end.end())
break;
} else if (rec instanceof PageSnapshot) {
PageSnapshot page = (PageSnapshot) rec;
replay.put(page.fullPageId(), page.pageData());
}
}
}
// Check read-through from the file store.
for (Map.Entry<FullPageId, Integer> entry : res.entrySet()) {
FullPageId fullId = entry.getKey();
int state = entry.getValue();
if (state == -1) {
info("Page was never written: " + fullId);
continue;
}
byte[] walData = replay.get(fullId);
assertNotNull("Missing WAL record for a written page: " + fullId, walData);
long page = mem.acquirePage(fullId.groupId(), fullId.pageId());
try {
long pageAddr = mem.readLock(fullId.groupId(), fullId.pageId(), page);
try {
for (int i = PageIO.COMMON_HEADER_END; i < mem.pageSize(); i++) {
int expState = state & 0xFF;
int pageState = PageUtils.getByte(pageAddr, i) & 0xFF;
int walState = walData[i] & 0xFF;
if (expState != pageState)
assertEquals("Invalid state [pageId=" + fullId + ", pos=" + i + ']', expState, pageState);
if (expState != walState)
assertEquals("Invalid WAL state [pageId=" + fullId + ", pos=" + i + ']', expState, walState);
}
} finally {
mem.readUnlock(fullId.groupId(), fullId.pageId(), page);
}
} finally {
mem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
}
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class IgnitePdsCheckpointSimulationWithRealCpDisabledTest method testDataWalEntries.
/**
* @throws Exception if failed.
*/
public void testDataWalEntries() throws Exception {
IgniteEx ig = startGrid(0);
ig.active(true);
GridCacheSharedContext<Object, Object> sharedCtx = ig.context().cache().context();
GridCacheContext<Object, Object> cctx = sharedCtx.cache().cache(cacheName).context();
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) sharedCtx.database();
IgniteWriteAheadLogManager wal = sharedCtx.wal();
assertTrue(wal.isAlwaysWriteFullPages());
db.enableCheckpoints(false).get();
final int cnt = 10;
List<DataEntry> entries = new ArrayList<>(cnt);
for (int i = 0; i < cnt; i++) {
GridCacheOperation op = i % 2 == 0 ? GridCacheOperation.UPDATE : GridCacheOperation.DELETE;
KeyCacheObject key = cctx.toCacheKeyObject(i);
CacheObject val = null;
if (op != GridCacheOperation.DELETE)
val = cctx.toCacheObject("value-" + i);
entries.add(new DataEntry(cctx.cacheId(), key, val, op, null, cctx.versions().next(), 0L, cctx.affinity().partition(i), i));
}
UUID cpId = UUID.randomUUID();
WALPointer start = wal.log(new CheckpointRecord(cpId, null));
wal.fsync(start);
for (DataEntry entry : entries) wal.log(new DataRecord(entry));
// Data will not be written to the page store.
stopAllGrids();
ig = startGrid(0);
ig.active(true);
sharedCtx = ig.context().cache().context();
cctx = sharedCtx.cache().cache(cacheName).context();
db = (GridCacheDatabaseSharedManager) sharedCtx.database();
wal = sharedCtx.wal();
db.enableCheckpoints(false).get();
try (PartitionMetaStateRecordExcludeIterator it = new PartitionMetaStateRecordExcludeIterator(wal.replay(start))) {
IgniteBiTuple<WALPointer, WALRecord> cpRecordTup = it.next();
assert cpRecordTup.get2() instanceof CheckpointRecord;
assertEquals(start, cpRecordTup.get1());
CheckpointRecord cpRec = (CheckpointRecord) cpRecordTup.get2();
assertEquals(cpId, cpRec.checkpointId());
assertNull(cpRec.checkpointMark());
assertFalse(cpRec.end());
int idx = 0;
CacheObjectContext coctx = cctx.cacheObjectContext();
while (idx < entries.size()) {
IgniteBiTuple<WALPointer, WALRecord> dataRecTup = it.next();
assert dataRecTup.get2() instanceof DataRecord;
DataRecord dataRec = (DataRecord) dataRecTup.get2();
DataEntry entry = entries.get(idx);
assertEquals(1, dataRec.writeEntries().size());
DataEntry readEntry = dataRec.writeEntries().get(0);
assertEquals(entry.cacheId(), readEntry.cacheId());
assertEquals(entry.key().<Integer>value(coctx, true), readEntry.key().<Integer>value(coctx, true));
assertEquals(entry.op(), readEntry.op());
if (entry.op() == GridCacheOperation.UPDATE)
assertEquals(entry.value().value(coctx, true), readEntry.value().value(coctx, true));
else
assertNull(entry.value());
assertEquals(entry.writeVersion(), readEntry.writeVersion());
assertEquals(entry.nearXidVersion(), readEntry.nearXidVersion());
assertEquals(entry.partitionCounter(), readEntry.partitionCounter());
idx++;
}
}
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class WalPageCompressionIntegrationTest method doTestPageCompression.
/**
* @throws Exception If failed.
*/
@Override
protected void doTestPageCompression() throws Exception {
// Ignite instance with compressed WAL page records.
IgniteEx ignite0 = startGrid(0);
compression = DiskPageCompression.DISABLED;
compressionLevel = null;
// Reference ignite instance with uncompressed WAL page records.
IgniteEx ignite1 = startGrid(1);
ignite0.cluster().active(true);
ignite1.cluster().active(true);
String cacheName = "test";
CacheConfiguration<Integer, TestVal> ccfg = new CacheConfiguration<Integer, TestVal>().setName(cacheName).setBackups(0).setAtomicityMode(ATOMIC).setIndexedTypes(Integer.class, TestVal.class);
IgniteCache<Integer, TestVal> cache0 = ignite0.getOrCreateCache(ccfg);
IgniteCache<Integer, TestVal> cache1 = ignite1.getOrCreateCache(ccfg);
int cnt = 20_000;
for (int i = 0; i < cnt; i++) {
assertTrue(cache0.putIfAbsent(i, new TestVal(i)));
assertTrue(cache1.putIfAbsent(i, new TestVal(i)));
}
for (int i = 0; i < cnt; i += 2) {
assertEquals(new TestVal(i), cache0.getAndRemove(i));
assertEquals(new TestVal(i), cache1.getAndRemove(i));
}
// Write any WAL record to get current WAL pointers.
WALPointer ptr0 = ignite0.context().cache().context().wal().log(new CheckpointRecord(null));
WALPointer ptr1 = ignite1.context().cache().context().wal().log(new CheckpointRecord(null));
log.info("Compressed WAL pointer: " + ptr0);
log.info("Uncompressed WAL pointer: " + ptr1);
assertTrue("Compressed WAL must be smaller than uncompressed [ptr0=" + ptr0 + ", ptr1=" + ptr1 + ']', ptr0.compareTo(ptr1) < 0);
}
use of org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord in project ignite by apache.
the class RecordDataV2Serializer method readPlainRecord.
/**
* {@inheritDoc}
*/
@Override
WALRecord readPlainRecord(RecordType type, ByteBufferBackedDataInput in, boolean encrypted, int recordSize) throws IOException, IgniteCheckedException {
switch(type) {
case PAGE_RECORD:
int cacheId = in.readInt();
long pageId = in.readLong();
byte[] arr = new byte[recordSize - 4 - /* cacheId */
8];
in.readFully(arr);
return new PageSnapshot(new FullPageId(pageId, cacheId), arr, encrypted ? realPageSize : pageSize);
case CHECKPOINT_RECORD:
long msb = in.readLong();
long lsb = in.readLong();
boolean hasPtr = in.readByte() != 0;
long idx0 = hasPtr ? in.readLong() : 0;
int off = hasPtr ? in.readInt() : 0;
int len = hasPtr ? in.readInt() : 0;
Map<Integer, CacheState> states = readPartitionStates(in);
boolean end = in.readByte() != 0;
WALPointer walPtr = hasPtr ? new WALPointer(idx0, off, len) : null;
CheckpointRecord cpRec = new CheckpointRecord(new UUID(msb, lsb), walPtr, end);
cpRec.cacheGroupStates(states);
return cpRec;
case DATA_RECORD:
case DATA_RECORD_V2:
int entryCnt = in.readInt();
long timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readPlainDataEntry(in, type), timeStamp);
else {
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readPlainDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case MVCC_DATA_RECORD:
entryCnt = in.readInt();
timeStamp = in.readLong();
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readMvccDataEntry(in));
return new MvccDataRecord(entries, timeStamp);
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
entryCnt = in.readInt();
timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readEncryptedDataEntry(in, type), timeStamp);
else {
entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readEncryptedDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case SNAPSHOT:
long snpId = in.readLong();
byte full = in.readByte();
return new SnapshotRecord(snpId, full == 1);
case EXCHANGE:
int idx = in.readInt();
short constId = in.readShort();
long ts = in.readLong();
return new ExchangeRecord(constId, ExchangeRecord.Type.values()[idx], ts);
case TX_RECORD:
return txRecordSerializer.readTx(in);
case MVCC_TX_RECORD:
return txRecordSerializer.readMvccTx(in);
case ROLLBACK_TX_RECORD:
int grpId = in.readInt();
int partId = in.readInt();
long start = in.readLong();
long range = in.readLong();
return new RollbackRecord(grpId, partId, start, range);
case TRACKING_PAGE_REPAIR_DELTA:
cacheId = in.readInt();
pageId = in.readLong();
return new TrackingPageRepairDeltaRecord(cacheId, pageId);
default:
return super.readPlainRecord(type, in, encrypted, recordSize);
}
}
Aggregations