use of org.apache.ignite.internal.pagemem.wal.WALIterator in project ignite by apache.
the class GridCacheDatabaseSharedManager method applyLastUpdates.
/**
* @param status Last registered checkpoint status.
* @throws IgniteCheckedException If failed to apply updates.
* @throws StorageException If IO exception occurred while reading write-ahead log.
*/
private void applyLastUpdates(CheckpointStatus status, boolean metastoreOnly) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Applying lost cache updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
if (!metastoreOnly)
cctx.kernalContext().query().skipFieldLookup(true);
long start = U.currentTimeMillis();
int applied = 0;
Collection<Integer> ignoreGrps = metastoreOnly ? Collections.emptySet() : initiallyWalDisabledGrps;
try (WALIterator it = cctx.wal().replay(status.startPtr)) {
Map<T2<Integer, Integer>, T2<Integer, Long>> partStates = new HashMap<>();
while (it.hasNextX()) {
IgniteBiTuple<WALPointer, WALRecord> next = it.nextX();
WALRecord rec = next.get2();
switch(rec.type()) {
case DATA_RECORD:
if (metastoreOnly)
continue;
DataRecord dataRec = (DataRecord) rec;
for (DataEntry dataEntry : dataRec.writeEntries()) {
int cacheId = dataEntry.cacheId();
int grpId = cctx.cache().cacheDescriptor(cacheId).groupId();
if (!ignoreGrps.contains(grpId)) {
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
applyUpdate(cacheCtx, dataEntry);
applied++;
}
}
break;
case PART_META_UPDATE_STATE:
if (metastoreOnly)
continue;
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
if (!ignoreGrps.contains(metaStateRecord.groupId())) {
partStates.put(new T2<>(metaStateRecord.groupId(), metaStateRecord.partitionId()), new T2<>((int) metaStateRecord.state(), metaStateRecord.updateCounter()));
}
break;
case METASTORE_DATA_RECORD:
MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
break;
case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
if (metastoreOnly)
continue;
PageDeltaRecord rec0 = (PageDeltaRecord) rec;
PageMemoryEx pageMem = getPageMemoryForCacheGroup(rec0.groupId());
long page = pageMem.acquirePage(rec0.groupId(), rec0.pageId(), true);
try {
long addr = pageMem.writeLock(rec0.groupId(), rec0.pageId(), page, true);
try {
rec0.applyDelta(pageMem, addr);
} finally {
pageMem.writeUnlock(rec0.groupId(), rec0.pageId(), page, null, true, true);
}
} finally {
pageMem.releasePage(rec0.groupId(), rec0.pageId(), page);
}
break;
default:
}
}
if (!metastoreOnly)
restorePartitionState(partStates, ignoreGrps);
} finally {
if (!metastoreOnly)
cctx.kernalContext().query().skipFieldLookup(false);
}
if (log.isInfoEnabled())
log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + "ms]");
}
use of org.apache.ignite.internal.pagemem.wal.WALIterator in project ignite by apache.
the class GridCacheDatabaseSharedManager method restoreMemory.
/**
* @param status Checkpoint status.
* @param storeOnly If {@code True} restores Metastorage only.
*/
private WALPointer restoreMemory(CheckpointStatus status, boolean storeOnly, PageMemoryEx storePageMem) throws IgniteCheckedException {
assert !storeOnly || storePageMem != null;
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
boolean apply = status.needRestoreMemory();
if (apply) {
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.pageStore().beginRecover();
} else
cctx.wal().allowCompressionUntil(status.startPtr);
long start = U.currentTimeMillis();
int applied = 0;
WALPointer lastRead = null;
Collection<Integer> ignoreGrps = storeOnly ? Collections.emptySet() : initiallyWalDisabledGrps;
try (WALIterator it = cctx.wal().replay(status.endPtr)) {
while (it.hasNextX()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.nextX();
WALRecord rec = tup.get2();
lastRead = tup.get1();
switch(rec.type()) {
case CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
// We roll memory up until we find a checkpoint start record registered in the status.
if (F.eq(cpRec.checkpointId(), status.cpStartId)) {
log.info("Found last checkpoint marker [cpId=" + cpRec.checkpointId() + ", pos=" + tup.get1() + ']');
apply = false;
} else if (!F.eq(cpRec.checkpointId(), status.cpEndId))
U.warn(log, "Found unexpected checkpoint marker, skipping [cpId=" + cpRec.checkpointId() + ", expCpId=" + status.cpStartId + ", pos=" + tup.get1() + ']');
break;
case PAGE_RECORD:
if (apply) {
PageSnapshot pageRec = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int grpId = pageRec.fullPageId().groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = pageRec.fullPageId().pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
PageUtils.putBytes(pageAddr, 0, pageRec.pageData());
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRec = (PartitionDestroyRecord) rec;
final int gId = destroyRec.groupId();
if (storeOnly && gId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(gId)) {
final int pId = destroyRec.partitionId();
PageMemoryEx pageMem = gId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(gId);
pageMem.clearAsync((grpId, pageId) -> grpId == gId && PageIdUtils.partId(pageId) == pId, true).get();
}
break;
default:
if (apply && rec instanceof PageDeltaRecord) {
PageDeltaRecord r = (PageDeltaRecord) rec;
int grpId = r.groupId();
if (storeOnly && grpId != METASTORAGE_CACHE_ID)
continue;
if (!ignoreGrps.contains(grpId)) {
long pageId = r.pageId();
PageMemoryEx pageMem = grpId == METASTORAGE_CACHE_ID ? storePageMem : getPageMemoryForCacheGroup(grpId);
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
long page = pageMem.acquirePage(grpId, pageId, true);
try {
long pageAddr = pageMem.writeLock(grpId, pageId, page);
try {
r.applyDelta(pageMem, pageAddr);
} finally {
pageMem.writeUnlock(grpId, pageId, page, null, true, true);
}
} finally {
pageMem.releasePage(grpId, pageId, page);
}
applied++;
}
}
}
}
}
if (storeOnly)
return null;
if (status.needRestoreMemory()) {
if (apply)
throw new IgniteCheckedException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastRead + "]");
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + "ms]");
if (applied > 0)
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr);
}
checkpointHist.loadHistory(cpDir);
return lastRead == null ? null : lastRead.next();
}
use of org.apache.ignite.internal.pagemem.wal.WALIterator in project ignite by apache.
the class IgniteWalRecoveryTest method testTxRecordsConsistency.
/**
* Test that all DataRecord WAL records are within transaction boundaries - PREPARED and COMMITTED markers.
*
* @throws Exception If any fail.
*/
public void testTxRecordsConsistency() throws Exception {
System.setProperty(IgniteSystemProperties.IGNITE_WAL_LOG_TX_RECORDS, "true");
IgniteEx ignite = (IgniteEx) startGrids(3);
ignite.active(true);
try {
final String cacheName = "transactional";
CacheConfiguration<Object, Object> cacheConfiguration = new CacheConfiguration<>(cacheName).setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL).setAffinity(new RendezvousAffinityFunction(false, 32)).setCacheMode(CacheMode.PARTITIONED).setRebalanceMode(CacheRebalanceMode.SYNC).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC).setBackups(0);
ignite.createCache(cacheConfiguration);
IgniteCache<Object, Object> cache = ignite.cache(cacheName);
GridCacheSharedContext<Object, Object> sharedCtx = ignite.context().cache().context();
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) sharedCtx.database();
db.waitForCheckpoint("test");
db.enableCheckpoints(false).get();
// Log something to know where to start.
WALPointer startPtr = sharedCtx.wal().log(new MemoryRecoveryRecord(U.currentTimeMillis()));
final int transactions = 100;
final int operationsPerTransaction = 40;
Random random = new Random();
for (int t = 1; t <= transactions; t++) {
Transaction tx = ignite.transactions().txStart(TransactionConcurrency.OPTIMISTIC, TransactionIsolation.READ_COMMITTED);
for (int op = 0; op < operationsPerTransaction; op++) {
int key = random.nextInt(1000) + 1;
Object value;
if (random.nextBoolean())
value = randomString(random) + key;
else
value = new BigObject(key);
cache.put(key, value);
}
if (random.nextBoolean()) {
tx.commit();
} else {
tx.rollback();
}
if (t % 50 == 0)
log.info("Finished transaction " + t);
}
Set<GridCacheVersion> activeTransactions = new HashSet<>();
// Check that all DataRecords are within PREPARED and COMMITTED tx records.
try (WALIterator it = sharedCtx.wal().replay(startPtr)) {
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
WALRecord rec = tup.get2();
if (rec instanceof TxRecord) {
TxRecord txRecord = (TxRecord) rec;
GridCacheVersion txId = txRecord.nearXidVersion();
switch(txRecord.state()) {
case PREPARED:
assert !activeTransactions.contains(txId) : "Transaction is already present " + txRecord;
activeTransactions.add(txId);
break;
case COMMITTED:
assert activeTransactions.contains(txId) : "No PREPARE marker for transaction " + txRecord;
activeTransactions.remove(txId);
break;
case ROLLED_BACK:
activeTransactions.remove(txId);
break;
default:
throw new IllegalStateException("Unknown Tx state of record " + txRecord);
}
} else if (rec instanceof DataRecord) {
DataRecord dataRecord = (DataRecord) rec;
for (DataEntry entry : dataRecord.writeEntries()) {
GridCacheVersion txId = entry.nearXidVersion();
assert activeTransactions.contains(txId) : "No transaction for entry " + entry;
}
}
}
}
} finally {
System.clearProperty(IgniteSystemProperties.IGNITE_WAL_LOG_TX_RECORDS);
stopAllGrids();
}
}
use of org.apache.ignite.internal.pagemem.wal.WALIterator in project ignite by apache.
the class IgniteWalRecoveryTest method testApplyDeltaRecords.
/**
* @throws Exception if failed.
*/
public void testApplyDeltaRecords() throws Exception {
try {
IgniteEx ignite0 = (IgniteEx) startGrid("node0");
ignite0.active(true);
IgniteCache<Object, Object> cache0 = ignite0.cache(cacheName);
for (int i = 0; i < 1000; i++) cache0.put(i, new IndexedObject(i));
GridCacheSharedContext<Object, Object> sharedCtx = ignite0.context().cache().context();
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) sharedCtx.database();
db.waitForCheckpoint("test");
db.enableCheckpoints(false).get();
// Log something to know where to start.
WALPointer ptr = sharedCtx.wal().log(new MemoryRecoveryRecord(U.currentTimeMillis()));
info("Replay marker: " + ptr);
for (int i = 1000; i < 5000; i++) cache0.put(i, new IndexedObject(i));
info("Done puts...");
for (int i = 2_000; i < 3_000; i++) cache0.remove(i);
info("Done removes...");
for (int i = 5000; i < 6000; i++) cache0.put(i, new IndexedObject(i));
info("Done puts...");
Map<FullPageId, byte[]> rolledPages = new HashMap<>();
int pageSize = sharedCtx.database().pageSize();
ByteBuffer buf = ByteBuffer.allocateDirect(pageSize);
// Now check that deltas can be correctly applied.
try (WALIterator it = sharedCtx.wal().replay(ptr)) {
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
WALRecord rec = tup.get2();
if (rec instanceof PageSnapshot) {
PageSnapshot page = (PageSnapshot) rec;
rolledPages.put(page.fullPageId(), page.pageData());
} else if (rec instanceof PageDeltaRecord) {
PageDeltaRecord delta = (PageDeltaRecord) rec;
FullPageId fullId = new FullPageId(delta.pageId(), delta.groupId());
byte[] pageData = rolledPages.get(fullId);
if (pageData == null) {
pageData = new byte[pageSize];
rolledPages.put(fullId, pageData);
}
assertNotNull("Missing page snapshot [page=" + fullId + ", delta=" + delta + ']', pageData);
buf.order(ByteOrder.nativeOrder());
buf.position(0);
buf.put(pageData);
buf.position(0);
delta.applyDelta(sharedCtx.database().dataRegion(null).pageMemory(), GridUnsafe.bufferAddress(buf));
buf.position(0);
buf.get(pageData);
}
}
}
info("Done apply...");
PageMemoryEx pageMem = (PageMemoryEx) db.dataRegion(null).pageMemory();
for (Map.Entry<FullPageId, byte[]> entry : rolledPages.entrySet()) {
FullPageId fullId = entry.getKey();
ignite0.context().cache().context().database().checkpointReadLock();
try {
long page = pageMem.acquirePage(fullId.groupId(), fullId.pageId(), true);
try {
long bufPtr = pageMem.writeLock(fullId.groupId(), fullId.pageId(), page, true);
try {
byte[] data = entry.getValue();
for (int i = 0; i < data.length; i++) {
if (fullId.pageId() == TrackingPageIO.VERSIONS.latest().trackingPageFor(fullId.pageId(), db.pageSize()))
// Skip tracking pages.
continue;
assertEquals("page=" + fullId + ", pos=" + i, PageUtils.getByte(bufPtr, i), data[i]);
}
} finally {
pageMem.writeUnlock(fullId.groupId(), fullId.pageId(), page, null, false, true);
}
} finally {
pageMem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
} finally {
ignite0.context().cache().context().database().checkpointReadUnlock();
}
}
ignite0.close();
} finally {
stopAllGrids();
}
}
use of org.apache.ignite.internal.pagemem.wal.WALIterator in project ignite by apache.
the class IgniteWalReaderTest method iterateAndCount.
/**
* Iterates on records and closes iterator.
*
* @param walIter iterator to count, will be closed.
* @param touchEntries access data within entries.
* @return count of records.
* @throws IgniteCheckedException if failed to iterate.
*/
private int iterateAndCount(WALIterator walIter, boolean touchEntries) throws IgniteCheckedException {
int cnt = 0;
try (WALIterator it = walIter) {
while (it.hasNextX()) {
final IgniteBiTuple<WALPointer, WALRecord> next = it.nextX();
final WALRecord walRecord = next.get2();
if (touchEntries && walRecord.type() == WALRecord.RecordType.DATA_RECORD) {
final DataRecord record = (DataRecord) walRecord;
for (DataEntry entry : record.writeEntries()) {
final KeyCacheObject key = entry.key();
final CacheObject val = entry.value();
if (dumpRecords)
log.info("Op: " + entry.op() + ", Key: " + key + ", Value: " + val);
}
}
if (dumpRecords)
log.info("Record: " + walRecord);
cnt++;
}
}
return cnt;
}
Aggregations