use of org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager in project ignite by apache.
the class CheckpointBufferDeadlockTest method runDeadlockScenario.
/**
*/
private void runDeadlockScenario() throws Exception {
LogListener lsnr = LogListener.matches(s -> s.contains("AssertionError")).build();
log.registerListener(lsnr);
IgniteEx ig = startGrid(0);
ig.cluster().active(true);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
FilePageStoreManager pageStoreMgr = (FilePageStoreManager) ig.context().cache().context().pageStore();
final String cacheName = "single-part";
CacheConfiguration<Object, Object> cacheCfg = new CacheConfiguration<>().setName(cacheName).setAffinity(new RendezvousAffinityFunction(false, 1));
IgniteCache<Object, Object> singlePartCache = ig.getOrCreateCache(cacheCfg);
db.enableCheckpoints(false).get();
Thread.sleep(1_000);
try (IgniteDataStreamer<Object, Object> streamer = ig.dataStreamer(singlePartCache.getName())) {
int entries = MAX_SIZE / ENTRY_BYTE_CHUNK_SIZE / 4;
for (int i = 0; i < entries; i++) streamer.addData(i, new byte[ENTRY_BYTE_CHUNK_SIZE]);
streamer.flush();
}
slowCheckpointEnabled.set(true);
log.info(">>> Slow checkpoints enabled");
db.enableCheckpoints(true).get();
AtomicBoolean fail = new AtomicBoolean(false);
IgniteInternalFuture<Long> fut = GridTestUtils.runMultiThreadedAsync(new Runnable() {
@Override
public void run() {
int loops = 0;
while (!stop.get()) {
if (loops % 10 == 0 && loops > 0 && loops < 500 || loops % 500 == 0 && loops >= 500)
log.info("Successfully completed " + loops + " loops");
db.checkpointReadLock();
try {
Set<FullPageId> pickedPagesSet = new HashSet<>();
PageStore store = pageStoreMgr.getStore(CU.cacheId(cacheName), 0);
int pages = store.pages();
DataRegion region = db.dataRegion(DataStorageConfiguration.DFLT_DATA_REG_DEFAULT_NAME);
PageMemoryImpl pageMem = (PageMemoryImpl) region.pageMemory();
while (pickedPagesSet.size() < PAGES_TOUCHED_UNDER_CP_LOCK) {
int pageIdx = ThreadLocalRandom.current().nextInt(PAGES_TOUCHED_UNDER_CP_LOCK, pages - PAGES_TOUCHED_UNDER_CP_LOCK);
long pageId = PageIdUtils.pageId(0, PageIdAllocator.FLAG_DATA, pageIdx);
long page = pageMem.acquirePage(CU.cacheId(cacheName), pageId);
try {
// We do not know correct flag(FLAG_DATA or FLAG_AUX). Skip page if no luck.
if (pageId != PageIO.getPageId(page + PageMemoryImpl.PAGE_OVERHEAD))
continue;
} finally {
pageMem.releasePage(CU.cacheId(cacheName), pageId, page);
}
pickedPagesSet.add(new FullPageId(pageId, CU.cacheId(cacheName)));
}
List<FullPageId> pickedPages = new ArrayList<>(pickedPagesSet);
assertEquals(PAGES_TOUCHED_UNDER_CP_LOCK, pickedPages.size());
// Sort to avoid deadlocks on pages rw-locks.
pickedPages.sort(new Comparator<FullPageId>() {
@Override
public int compare(FullPageId o1, FullPageId o2) {
int cmp = Long.compare(o1.groupId(), o2.groupId());
if (cmp != 0)
return cmp;
return Long.compare(o1.effectivePageId(), o2.effectivePageId());
}
});
List<Long> readLockedPages = new ArrayList<>();
// Read lock many pages at once intentionally.
for (int i = 0; i < PAGES_TOUCHED_UNDER_CP_LOCK / 2; i++) {
FullPageId fpid = pickedPages.get(i);
long page = pageMem.acquirePage(fpid.groupId(), fpid.pageId());
long abs = pageMem.readLock(fpid.groupId(), fpid.pageId(), page);
assertFalse(fpid.toString(), abs == 0);
readLockedPages.add(page);
}
// Emulate writes to trigger throttling.
for (int i = PAGES_TOUCHED_UNDER_CP_LOCK / 2; i < PAGES_TOUCHED_UNDER_CP_LOCK && !stop.get(); i++) {
FullPageId fpid = pickedPages.get(i);
long page = pageMem.acquirePage(fpid.groupId(), fpid.pageId());
long abs = pageMem.writeLock(fpid.groupId(), fpid.pageId(), page);
assertFalse(fpid.toString(), abs == 0);
pageMem.writeUnlock(fpid.groupId(), fpid.pageId(), page, null, true);
pageMem.releasePage(fpid.groupId(), fpid.pageId(), page);
}
for (int i = 0; i < PAGES_TOUCHED_UNDER_CP_LOCK / 2; i++) {
FullPageId fpid = pickedPages.get(i);
pageMem.readUnlock(fpid.groupId(), fpid.pageId(), readLockedPages.get(i));
pageMem.releasePage(fpid.groupId(), fpid.pageId(), readLockedPages.get(i));
}
} catch (Throwable e) {
log.error("Error in loader thread", e);
fail.set(true);
} finally {
db.checkpointReadUnlock();
}
loops++;
}
}
}, 10, "load-runner");
// Await for the start of throttling.
Thread.sleep(10_000);
slowCheckpointEnabled.set(false);
log.info(">>> Slow checkpoints disabled");
assertFalse(fail.get());
// Previous checkpoint should eventually finish.
forceCheckpoint();
stop.set(true);
fut.get();
db.enableCheckpoints(true).get();
// check that there is no problem with pinned pages
ig.destroyCache(cacheName);
assertFalse(lsnr.check());
log.unregisterListener(lsnr);
}
use of org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager in project ignite by apache.
the class IgniteSequentialNodeCrashRecoveryTest method disableCheckpoints.
/**
*/
private void disableCheckpoints(IgniteEx g) throws Exception {
GridCacheDatabaseSharedManager dbMgr = (GridCacheDatabaseSharedManager) g.context().cache().context().database();
dbMgr.enableCheckpoints(false).get();
}
use of org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager in project ignite by apache.
the class IgniteWalRebalanceTest method testRebalanceReassignAndOwnPartitions.
/**
* Tests that owning partitions (that are trigged by rebalance future) cannot be mapped to a new rebalance future
* that was created by RebalanceReassignExchangeTask.
*
* @throws Exception If failed.
*/
@Test
public void testRebalanceReassignAndOwnPartitions() throws Exception {
backups = 3;
IgniteEx supplier1 = startGrid(0);
IgniteEx supplier2 = startGrid(1);
IgniteEx demander = startGrid(2);
supplier1.cluster().state(ACTIVE);
String cacheName1 = "test-cache-1";
String cacheName2 = "test-cache-2";
IgniteCache<Integer, IndexedObject> c1 = supplier1.getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>(cacheName1).setBackups(backups).setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT)).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC).setRebalanceOrder(10));
IgniteCache<Integer, IndexedObject> c2 = supplier1.getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>(cacheName2).setBackups(backups).setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT)).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC).setRebalanceOrder(20));
// Fill initial data.
final int entryCnt = PARTS_CNT * 200;
final int preloadEntryCnt = PARTS_CNT * 400;
int val = 0;
for (int k = 0; k < preloadEntryCnt; k++) {
c1.put(k, new IndexedObject(val++));
c2.put(k, new IndexedObject(val++));
}
forceCheckpoint();
stopGrid(2);
// This is an easy way to emulate missing partitions on the first rebalance.
for (int i = 0; i < entryCnt; i++) c1.put(i, new IndexedObject(val++));
// Full rebalance for the cacheName2.
for (int i = 0; i < preloadEntryCnt; i++) c2.put(i, new IndexedObject(val++));
// Delay rebalance process for specified groups.
blockMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
return msg0.groupId() == CU.cacheId(cacheName1) || msg0.groupId() == CU.cacheId(cacheName2);
}
return false;
};
// Emulate missing partitions and trigger RebalanceReassignExchangeTask which should re-trigger a new rebalance.
FailingIOFactory ioFactory = injectFailingIOFactory(supplier1);
demander = startGrid(2);
TestRecordingCommunicationSpi demanderSpi = TestRecordingCommunicationSpi.spi(grid(2));
// Wait until demander starts rebalancning.
demanderSpi.waitForBlocked();
// Need to start a client node in order to block RebalanceReassignExchangeTask (and do not change the affinity)
// until cacheName2 triggers a checkpoint after rebalancing.
CountDownLatch blockClientJoin = new CountDownLatch(1);
CountDownLatch unblockClientJoin = new CountDownLatch(1);
demander.context().cache().context().exchange().registerExchangeAwareComponent(new PartitionsExchangeAware() {
@Override
public void onInitBeforeTopologyLock(GridDhtPartitionsExchangeFuture fut) {
blockClientJoin.countDown();
try {
if (!unblockClientJoin.await(getTestTimeout(), MILLISECONDS))
throw new IgniteException("Failed to wait for client node joinning the cluster.");
} catch (InterruptedException e) {
throw new IgniteException("Unexpected exception.", e);
}
}
});
startClientGrid(4);
// Wait for a checkpoint after rebalancing cacheName2.
CountDownLatch blockCheckpoint = new CountDownLatch(1);
CountDownLatch unblockCheckpoint = new CountDownLatch(1);
((GridCacheDatabaseSharedManager) demander.context().cache().context().database()).addCheckpointListener(new CheckpointListener() {
/**
* {@inheritDoc}
*/
@Override
public void onCheckpointBegin(Context ctx) throws IgniteCheckedException {
if (!ctx.progress().reason().contains(String.valueOf(CU.cacheId(cacheName2))))
return;
blockCheckpoint.countDown();
try {
if (!unblockCheckpoint.await(getTestTimeout(), MILLISECONDS))
throw new IgniteCheckedException("Failed to wait for unblocking checkpointer.");
} catch (InterruptedException e) {
throw new IgniteCheckedException("Unexpected exception", e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void beforeCheckpointBegin(Context ctx) throws IgniteCheckedException {
}
/**
* {@inheritDoc}
*/
@Override
public void onMarkCheckpointBegin(Context ctx) throws IgniteCheckedException {
}
});
// Unblock the first rebalance.
demanderSpi.stopBlock();
// Wait for start of the checkpoint after rebalancing cacheName2.
assertTrue("Failed to wait for checkpoint.", blockCheckpoint.await(getTestTimeout(), MILLISECONDS));
// Block the second rebalancing.
demanderSpi.blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
return msg0.groupId() == CU.cacheId(cacheName1);
}
return false;
});
ioFactory.reset();
// Let's unblock client exchange and, therefore, handling of RebalanceReassignExchangeTask,
// which is already scheduled.
unblockClientJoin.countDown();
// Wait for starting the second rebalance (new chain of rebalance futures should be created at this point).
demanderSpi.waitForBlocked();
GridFutureAdapter checkpointFut = ((GridCacheDatabaseSharedManager) demander.context().cache().context().database()).getCheckpointer().currentProgress().futureFor(FINISHED);
// Unblock checkpointer.
unblockCheckpoint.countDown();
assertTrue("Failed to wait for a checkpoint.", GridTestUtils.waitForCondition(() -> checkpointFut.isDone(), getTestTimeout()));
// Well, there is a race between we unblock rebalance and the current checkpoint executes all its listeners.
demanderSpi.stopBlock();
awaitPartitionMapExchange(false, true, null);
}
use of org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager in project ignite by apache.
the class IgniteSnapshotManagerSelfTest method testSnapshotAlwaysStartsNewCheckpoint.
/**
* @throws Exception If fails.
*/
@Test
public void testSnapshotAlwaysStartsNewCheckpoint() throws Exception {
long testTimeout = 30_000;
listenLog = new ListeningTestLogger(log);
LogListener lsnr = LogListener.matches("Snapshot operation is scheduled on local node").times(1).build();
listenLog.registerListener(lsnr);
IgniteEx ignite = startGridsWithCache(1, 4096, key -> new Account(key, key), new CacheConfiguration<>(DEFAULT_CACHE_NAME));
assertTrue("Test requires that only forced checkpoints were allowed.", ignite.configuration().getDataStorageConfiguration().getCheckpointFrequency() >= TimeUnit.DAYS.toMillis(365));
GridCacheDatabaseSharedManager dbMgr = ((GridCacheDatabaseSharedManager) ignite.context().cache().context().database());
// Ensure that previous checkpoint finished.
dbMgr.getCheckpointer().currentProgress().futureFor(CheckpointState.FINISHED).get(testTimeout);
CountDownLatch beforeCpEnter = new CountDownLatch(1);
CountDownLatch beforeCpExit = new CountDownLatch(1);
// Block checkpointer on start.
dbMgr.addCheckpointListener(new CheckpointListener() {
@Override
public void beforeCheckpointBegin(CheckpointListener.Context ctx) throws IgniteCheckedException {
beforeCpEnter.countDown();
U.await(beforeCpExit, testTimeout, TimeUnit.MILLISECONDS);
}
@Override
public void onMarkCheckpointBegin(CheckpointListener.Context ctx) {
// No-op.
}
@Override
public void onCheckpointBegin(CheckpointListener.Context ctx) {
// No-op.
}
});
dbMgr.forceCheckpoint("snapshot-task-hang-test");
beforeCpEnter.await(testTimeout, TimeUnit.MILLISECONDS);
IgniteFuture<Void> snpFut = ignite.snapshot().createSnapshot(SNAPSHOT_NAME);
// Wait until the snapshot task checkpoint listener is registered.
assertTrue(GridTestUtils.waitForCondition(lsnr::check, testTimeout));
// Unblock checkpointer.
beforeCpExit.countDown();
// Make sure the snapshot has been taken.
snpFut.get(testTimeout);
}
use of org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager in project ignite by apache.
the class MockWalIteratorFactory method iterator.
/**
* Creates iterator
* @param wal WAL directory without node consistent id
* @param walArchive WAL archive without node consistent id
* @return iterator
* @throws IgniteCheckedException if IO failed
*/
@SuppressWarnings("unchecked")
public WALIterator iterator(File wal, File walArchive) throws IgniteCheckedException {
final DataStorageConfiguration persistentCfg1 = Mockito.mock(DataStorageConfiguration.class);
when(persistentCfg1.getWalPath()).thenReturn(wal.getAbsolutePath());
when(persistentCfg1.getWalArchivePath()).thenReturn(walArchive.getAbsolutePath());
when(persistentCfg1.getWalSegments()).thenReturn(segments);
when(persistentCfg1.getWalBufferSize()).thenReturn(DataStorageConfiguration.DFLT_WAL_BUFF_SIZE);
when(persistentCfg1.getWalRecordIteratorBufferSize()).thenReturn(DataStorageConfiguration.DFLT_WAL_RECORD_ITERATOR_BUFFER_SIZE);
when(persistentCfg1.getWalSegmentSize()).thenReturn(DataStorageConfiguration.DFLT_WAL_SEGMENT_SIZE);
final FileIOFactory fileIOFactory = new DataStorageConfiguration().getFileIOFactory();
when(persistentCfg1.getFileIOFactory()).thenReturn(fileIOFactory);
final IgniteConfiguration cfg = Mockito.mock(IgniteConfiguration.class);
when(cfg.getDataStorageConfiguration()).thenReturn(persistentCfg1);
final GridKernalContext ctx = Mockito.mock(GridKernalContext.class);
when(ctx.config()).thenReturn(cfg);
when(ctx.clientNode()).thenReturn(false);
when(ctx.pdsFolderResolver()).thenReturn(new PdsFoldersResolver() {
@Override
public PdsFolderSettings resolveFolders() {
return new PdsFolderSettings(new File("."), subfolderName, consistentId, null, false);
}
});
final GridDiscoveryManager disco = Mockito.mock(GridDiscoveryManager.class);
when(ctx.discovery()).thenReturn(disco);
final IgniteWriteAheadLogManager mgr = new FileWriteAheadLogManager(ctx);
final GridCacheSharedContext sctx = Mockito.mock(GridCacheSharedContext.class);
when(sctx.kernalContext()).thenReturn(ctx);
when(sctx.discovery()).thenReturn(disco);
when(sctx.gridConfig()).thenReturn(cfg);
final GridCacheDatabaseSharedManager db = Mockito.mock(GridCacheDatabaseSharedManager.class);
when(db.pageSize()).thenReturn(pageSize);
when(sctx.database()).thenReturn(db);
when(sctx.logger(any(Class.class))).thenReturn(log);
mgr.start(sctx);
return mgr.replay(null);
}
Aggregations