use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class IgniteWalRebalanceTest method testSwitchHistoricalRebalanceToFullWhileIteratingOverWAL.
/**
* Tests that demander switches to full rebalance if the previously chosen supplier for a group has failed
* to perform historical rebalance due to an unexpected error while iterating over reserved wal.
*
* @throws Exception If failed
*/
@Test
public void testSwitchHistoricalRebalanceToFullWhileIteratingOverWAL() throws Exception {
testSwitchHistoricalRebalanceToFull(supplier1 -> {
try {
// Corrupt wal record in order to fail historical rebalance from supplier1 node.
IgniteWriteAheadLogManager walMgr = supplier1.context().cache().context().wal();
WALPointer ptr = walMgr.log(new DataRecord(new DataEntry(CU.cacheId("test-cache-1"), new KeyCacheObjectImpl(0, null, 0), null, GridCacheOperation.DELETE, new GridCacheVersion(0, 1, 1, 0), new GridCacheVersion(0, 1, 1, 0), 0, 0, 0, DataEntry.EMPTY_FLAGS)));
File walDir = U.field(walMgr, "walWorkDir");
List<FileDescriptor> walFiles = new IgniteWalIteratorFactory().resolveWalFiles(new IgniteWalIteratorFactory.IteratorParametersBuilder().filesOrDirs(walDir));
FileDescriptor lastWalFile = walFiles.get(walFiles.size() - 1);
WalTestUtils.corruptWalSegmentFile(lastWalFile, ptr);
IgniteCache<Integer, IndexedObject> c1 = supplier1.cache("test-cache-1");
for (int i = 0; i < PARTS_CNT * 100; i++) c1.put(i, new IndexedObject(i + PARTS_CNT));
} catch (IgniteCheckedException | IOException e) {
throw new RuntimeException(e);
}
}, () -> true);
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class IgniteWalIteratorSwitchSegmentTest method checkSwitchReadingSegmentDuringIteration.
/**
* @param serVer WAL serializer version.
* @throws Exception If some thing failed.
*/
private void checkSwitchReadingSegmentDuringIteration(int serVer) throws Exception {
String workDir = U.defaultWorkDirectory();
T2<IgniteWriteAheadLogManager, RecordSerializer> initTup = initiate(serVer, workDir);
IgniteWriteAheadLogManager walMgr = initTup.get1();
RecordSerializer recordSerializer = initTup.get2();
MetastoreDataRecord rec = new MetastoreDataRecord("0", new byte[100]);
int recSize = recordSerializer.size(rec);
// Add more record for rollover to the next segment.
int recordsToWrite = SEGMENT_SIZE / recSize + 100;
SegmentAware segmentAware = GridTestUtils.getFieldValue(walMgr, "segmentAware");
// Guard from archiving before iterator would be created.
assertTrue(segmentAware.lock(0));
for (int i = 0; i < recordsToWrite; i++) walMgr.log(new MetastoreDataRecord(rec.key(), rec.value()));
walMgr.flush(null, true);
AtomicInteger actualRecords = new AtomicInteger(0);
AtomicReference<String> startedSegmentPath = new AtomicReference<>();
AtomicReference<String> finishedSegmentPath = new AtomicReference<>();
CountDownLatch startedIterLatch = new CountDownLatch(1);
CountDownLatch finishedArchivedLatch = new CountDownLatch(1);
IgniteInternalFuture<?> fut = GridTestUtils.runAsync(() -> {
// Check that switch segment works as expected and all record is reachable.
try (WALIterator it = walMgr.replay(null)) {
Object handle = getFieldValueHierarchy(it, "currWalSegment");
FileInput in = getFieldValueHierarchy(handle, "in");
Object delegate = getFieldValueHierarchy(in.io(), "delegate");
Channel ch = getFieldValueHierarchy(delegate, "ch");
String path = getFieldValueHierarchy(ch, "path");
startedSegmentPath.set(path);
startedIterLatch.countDown();
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
WALRecord rec0 = tup.get2();
if (rec0.type() == METASTORE_DATA_RECORD)
actualRecords.incrementAndGet();
finishedArchivedLatch.await();
}
in = getFieldValueHierarchy(handle, "in");
delegate = getFieldValueHierarchy(in.io(), "delegate");
ch = getFieldValueHierarchy(delegate, "ch");
path = getFieldValueHierarchy(ch, "path");
finishedSegmentPath.set(path);
}
return null;
});
startedIterLatch.await();
segmentAware.unlock(0);
waitForCondition(() -> segmentAware.lastArchivedAbsoluteIndex() == 0, 5000);
finishedArchivedLatch.countDown();
fut.get();
// should started iteration from work directory but finish from archive directory.
assertEquals(workDir + WORK_SUB_DIR + File.separator + "0000000000000000.wal", startedSegmentPath.get());
assertEquals(workDir + ARCHIVE_SUB_DIR + File.separator + "0000000000000000.wal", finishedSegmentPath.get());
Assert.assertEquals("Not all records read during iteration.", recordsToWrite, actualRecords.get());
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class IgniteWalIteratorSwitchSegmentTest method checkInvariantSwitchSegment.
/**
* @param serVer WAL serializer version.
* @throws Exception If some thing failed.
*/
private void checkInvariantSwitchSegment(int serVer) throws Exception {
String workDir = U.defaultWorkDirectory();
T2<IgniteWriteAheadLogManager, RecordSerializer> initTup = initiate(serVer, workDir);
IgniteWriteAheadLogManager walMgr = initTup.get1();
RecordSerializer recordSerializer = initTup.get2();
int switchSegmentRecordSize = recordSerializer.size(new SwitchSegmentRecord());
log.info("switchSegmentRecordSize:" + switchSegmentRecordSize);
int tailSize = 0;
/* Initial record payload size. */
int payloadSize = 1024;
int recSize = 0;
MetastoreDataRecord rec = null;
/* Record size. */
int recordTypeSize = 1;
/* Record pointer. */
int recordPointerSize = 8 + 4 + 4;
int lowBound = recordTypeSize + recordPointerSize;
int highBound = lowBound + /*CRC*/
4;
int attempt = 1000;
// Try find how many record need for specific tail size.
while (true) {
if (attempt < 0)
throw new IgniteCheckedException("Can not find any payload size for test, " + "lowBound=" + lowBound + ", highBound=" + highBound);
if (tailSize >= lowBound && tailSize < highBound)
break;
payloadSize++;
byte[] payload = new byte[payloadSize];
// Fake record for payload.
rec = new MetastoreDataRecord("0", payload);
recSize = recordSerializer.size(rec);
tailSize = (SEGMENT_SIZE - HEADER_RECORD_SIZE) % recSize;
attempt--;
}
Assert.assertNotNull(rec);
int recordsToWrite = SEGMENT_SIZE / recSize;
log.info("records to write " + recordsToWrite + " tail size " + (SEGMENT_SIZE - HEADER_RECORD_SIZE) % recSize);
// Add more record for rollover to the next segment.
recordsToWrite += 100;
for (int i = 0; i < recordsToWrite; i++) walMgr.log(new MetastoreDataRecord(rec.key(), rec.value()));
walMgr.flush(null, true);
SegmentAware segmentAware = GridTestUtils.getFieldValue(walMgr, "segmentAware");
// Await archiver move segment to WAL archive.
waitForCondition(() -> segmentAware.lastArchivedAbsoluteIndex() == 0, 5_000);
// Filling tail some garbage. Simulate tail garbage on rotate segment in WAL work directory.
if (switchSegmentRecordSize > 1) {
File seg = new File(workDir + ARCHIVE_SUB_DIR + "/0000000000000000.wal");
FileIOFactory ioFactory = new RandomAccessFileIOFactory();
FileIO seg0 = ioFactory.create(seg);
byte[] bytes = new byte[tailSize];
Random rnd = new Random();
rnd.nextBytes(bytes);
// Some record type.
bytes[0] = (byte) (METASTORE_DATA_RECORD.ordinal() + 1);
seg0.position((int) (seg0.size() - tailSize));
seg0.write(bytes, 0, tailSize);
seg0.force(true);
seg0.close();
}
int expRecords = recordsToWrite;
int actualRecords = 0;
// Check that switch segment works as expected and all record is reachable.
try (WALIterator it = walMgr.replay(null)) {
while (it.hasNext()) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
WALRecord rec0 = tup.get2();
if (rec0.type() == METASTORE_DATA_RECORD)
actualRecords++;
}
}
Assert.assertEquals("Not all records read during iteration.", expRecords, actualRecords);
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class IgnitePdsCheckpointSimulationWithRealCpDisabledTest method testPageWalEntries.
/**
* @throws Exception if failed.
*/
@Test
public void testPageWalEntries() throws Exception {
IgniteEx ig = startGrid(0);
ig.cluster().active(true);
GridCacheSharedContext<Object, Object> sharedCtx = ig.context().cache().context();
int cacheId = sharedCtx.cache().cache(CACHE_NAME).context().cacheId();
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) sharedCtx.database();
PageMemory pageMem = sharedCtx.database().dataRegion(null).pageMemory();
IgniteWriteAheadLogManager wal = sharedCtx.wal();
db.enableCheckpoints(false).get();
int pageCnt = 100;
List<FullPageId> pageIds = new ArrayList<>();
for (int i = 0; i < pageCnt; i++) {
db.checkpointReadLock();
try {
pageIds.add(new FullPageId(pageMem.allocatePage(cacheId, PageIdAllocator.INDEX_PARTITION, PageIdAllocator.FLAG_IDX), cacheId));
} finally {
db.checkpointReadUnlock();
}
}
UUID cpId = UUID.randomUUID();
WALPointer start = wal.log(new CheckpointRecord(cpId, null));
wal.flush(start, false);
ig.context().cache().context().database().checkpointReadLock();
try {
for (FullPageId pageId : pageIds) writePageData(pageId, pageMem);
} finally {
ig.context().cache().context().database().checkpointReadUnlock();
}
// Data will not be written to the page store.
stopAllGrids();
ig = startGrid(0);
ig.cluster().active(true);
sharedCtx = ig.context().cache().context();
db = (GridCacheDatabaseSharedManager) sharedCtx.database();
wal = sharedCtx.wal();
db.enableCheckpoints(false);
try (PartitionMetaStateRecordExcludeIterator it = new PartitionMetaStateRecordExcludeIterator(wal.replay(start))) {
IgniteBiTuple<WALPointer, WALRecord> tup = it.next();
assert tup.get2() instanceof CheckpointRecord : tup.get2();
assertEquals(start, tup.get1());
CheckpointRecord cpRec = (CheckpointRecord) tup.get2();
assertEquals(cpId, cpRec.checkpointId());
assertNull(cpRec.checkpointMark());
assertFalse(cpRec.end());
int idx = 0;
while (idx < pageIds.size()) {
tup = it.next();
assert tup.get2() instanceof PageSnapshot : tup.get2().getClass();
PageSnapshot snap = (PageSnapshot) tup.get2();
// there are extra tracking pages, skip them
long trackingPageFor = TrackingPageIO.VERSIONS.latest().trackingPageFor(snap.fullPageId().pageId(), pageMem.pageSize());
if (trackingPageFor == snap.fullPageId().pageId()) {
tup = it.next();
assertTrue(tup.get2() instanceof PageSnapshot);
snap = (PageSnapshot) tup.get2();
}
assertEquals(pageIds.get(idx), snap.fullPageId());
idx++;
}
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class IgnitePdsCheckpointSimulationWithRealCpDisabledTest method runCheckpointing.
/**
* @param mem Memory to use.
* @param storeMgr Store manager.
* @param cacheId Cache ID.
* @return Result map of random operations.
* @throws Exception If failure occurred.
*/
private IgniteBiTuple<Map<FullPageId, Integer>, WALPointer> runCheckpointing(final IgniteEx ig, final PageMemoryImpl mem, final IgnitePageStoreManager storeMgr, final IgniteWriteAheadLogManager wal, final int cacheId) throws Exception {
final ConcurrentMap<FullPageId, Integer> resMap = new ConcurrentHashMap<>();
final FullPageId[] pages = new FullPageId[TOTAL_PAGES];
Set<FullPageId> allocated = new HashSet<>();
IgniteCacheDatabaseSharedManager db = ig.context().cache().context().database();
PageIO pageIO = new DummyPageIO();
for (int i = 0; i < TOTAL_PAGES; i++) {
FullPageId fullId;
db.checkpointReadLock();
try {
fullId = new FullPageId(mem.allocatePage(cacheId, 0, PageIdAllocator.FLAG_DATA), cacheId);
initPage(mem, pageIO, fullId);
} finally {
db.checkpointReadUnlock();
}
resMap.put(fullId, -1);
pages[i] = fullId;
allocated.add(fullId);
}
final AtomicBoolean run = new AtomicBoolean(true);
// Simulate transaction lock.
final ReadWriteLock updLock = new ReentrantReadWriteLock();
// Mark the start position.
CheckpointRecord cpRec = new CheckpointRecord(null);
WALPointer start = wal.log(cpRec);
wal.flush(start, false);
IgniteInternalFuture<Long> updFut = GridTestUtils.runMultiThreadedAsync(new Callable<Object>() {
@Override
public Object call() throws Exception {
while (true) {
FullPageId fullId = pages[ThreadLocalRandom.current().nextInt(TOTAL_PAGES)];
updLock.readLock().lock();
try {
if (!run.get())
return null;
ig.context().cache().context().database().checkpointReadLock();
try {
long page = mem.acquirePage(fullId.groupId(), fullId.pageId());
try {
long pageAddr = mem.writeLock(fullId.groupId(), fullId.pageId(), page);
PageIO.setPageId(pageAddr, fullId.pageId());
try {
int state = resMap.get(fullId);
if (state != -1) {
if (VERBOSE)
info("Verify page [fullId=" + fullId + ", state=" + state + ", buf=" + pageAddr + ", bhc=" + U.hexLong(System.identityHashCode(pageAddr)) + ", page=" + U.hexLong(System.identityHashCode(page)) + ']');
for (int i = PageIO.COMMON_HEADER_END; i < mem.realPageSize(fullId.groupId()); i++) {
assertEquals("Verify page failed [fullId=" + fullId + ", i=" + i + ", state=" + state + ", buf=" + pageAddr + ", bhc=" + U.hexLong(System.identityHashCode(pageAddr)) + ", page=" + U.hexLong(System.identityHashCode(page)) + ']', state & 0xFF, PageUtils.getByte(pageAddr, i) & 0xFF);
}
}
state = (state + 1) & 0xFF;
if (VERBOSE)
info("Write page [fullId=" + fullId + ", state=" + state + ", buf=" + pageAddr + ", bhc=" + U.hexLong(System.identityHashCode(pageAddr)) + ", page=" + U.hexLong(System.identityHashCode(page)) + ']');
for (int i = PageIO.COMMON_HEADER_END; i < mem.realPageSize(fullId.groupId()); i++) PageUtils.putByte(pageAddr, i, (byte) state);
resMap.put(fullId, state);
} finally {
mem.writeUnlock(fullId.groupId(), fullId.pageId(), page, null, true);
}
} finally {
mem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
} finally {
ig.context().cache().context().database().checkpointReadUnlock();
}
} finally {
updLock.readLock().unlock();
}
}
}
}, 8, "update-thread");
int checkpoints = 20;
while (checkpoints > 0) {
Map<FullPageId, Integer> snapshot = null;
Collection<FullPageId> pageIds;
updLock.writeLock().lock();
try {
snapshot = new HashMap<>(resMap);
pageIds = mem.beginCheckpoint(new GridFinishedFuture());
checkpoints--;
if (checkpoints == 0)
// No more writes should be done at this point.
run.set(false);
info("Acquired pages for checkpoint: " + pageIds.size());
} finally {
updLock.writeLock().unlock();
}
boolean ok = false;
try {
ByteBuffer tmpBuf = ByteBuffer.allocate(mem.pageSize());
tmpBuf.order(ByteOrder.nativeOrder());
long begin = System.currentTimeMillis();
long cp = 0;
long write = 0;
for (FullPageId fullId : pageIds) {
long cpStart = System.nanoTime();
Integer tag;
AtomicReference<Integer> tag0 = new AtomicReference<>();
PageStoreWriter pageStoreWriter = (fullPageId, buf, tagx) -> {
tag0.set(tagx);
};
while (true) {
mem.checkpointWritePage(fullId, tmpBuf, pageStoreWriter, null);
tag = tag0.get();
if (tag != null && tag == PageMemoryImpl.TRY_AGAIN_TAG)
continue;
break;
}
if (tag == null)
continue;
long cpEnd = System.nanoTime();
cp += cpEnd - cpStart;
Integer state = snapshot.get(fullId);
if (allocated.contains(fullId) && state != -1) {
tmpBuf.rewind();
Integer first = null;
for (int i = PageIO.COMMON_HEADER_END; i < mem.realPageSize(fullId.groupId()); i++) {
int val = tmpBuf.get(i) & 0xFF;
if (first == null)
first = val;
// Avoid string concat.
if (first != val)
assertEquals("Corrupted buffer at position [pageId=" + fullId + ", pos=" + i + ']', (int) first, val);
// Avoid string concat.
if (state != val)
assertEquals("Invalid value at position [pageId=" + fullId + ", pos=" + i + ']', (int) state, val);
}
}
tmpBuf.rewind();
long writeStart = System.nanoTime();
storeMgr.write(cacheId, fullId.pageId(), tmpBuf, tag, true);
long writeEnd = System.nanoTime();
write += writeEnd - writeStart;
tmpBuf.rewind();
}
long syncStart = System.currentTimeMillis();
storeMgr.sync(cacheId, 0);
long end = System.currentTimeMillis();
info("Written pages in " + (end - begin) + "ms, copy took " + (cp / 1_000_000) + "ms, " + "write took " + (write / 1_000_000) + "ms, sync took " + (end - syncStart) + "ms");
ok = true;
} finally {
info("Finishing checkpoint...");
mem.finishCheckpoint();
info("Finished checkpoint");
if (!ok) {
info("Cancelling updates...");
run.set(false);
updFut.get();
}
}
if (checkpoints != 0)
Thread.sleep(2_000);
}
info("checkpoints=" + checkpoints + ", done=" + updFut.isDone());
updFut.get();
assertEquals(0, mem.activePagesCount());
for (FullPageId fullId : pages) {
long page = mem.acquirePage(fullId.groupId(), fullId.pageId());
try {
assertFalse("Page has a temp heap copy after the last checkpoint: [cacheId=" + fullId.groupId() + ", pageId=" + fullId.pageId() + "]", mem.hasTempCopy(page));
assertFalse("Page is dirty after the last checkpoint: [cacheId=" + fullId.groupId() + ", pageId=" + fullId.pageId() + "]", mem.isDirty(fullId.groupId(), fullId.pageId(), page));
} finally {
mem.releasePage(fullId.groupId(), fullId.pageId(), page);
}
}
return F.t((Map<FullPageId, Integer>) resMap, start);
}
Aggregations