use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.
the class CheckpointReadLockFailureTest method testReentrance.
/**
* @throws Exception If failed.
*/
@Test
@WithSystemProperty(key = IGNITE_PDS_LOG_CP_READ_LOCK_HOLDERS, value = "true")
public void testReentrance() throws Exception {
IgniteEx ig = startGrid(0);
ig.cluster().state(ClusterState.ACTIVE);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
CountDownLatch waitFirstRLock = new CountDownLatch(1);
CountDownLatch waitSecondRLock = new CountDownLatch(1);
long timeout = 500L;
IgniteInternalFuture f0 = GridTestUtils.runAsync(() -> {
// noinspection LockAcquiredButNotSafelyReleased
rwLock.readLock().lock();
// noinspection LockAcquiredButNotSafelyReleased
rwLock.readLock().lock();
rwLock.readLock().unlock();
waitFirstRLock.countDown();
try {
waitSecondRLock.await();
} catch (InterruptedException e) {
fail(e.toString());
}
rwLock.readLock().unlock();
}, "async-runnable-runner-1");
IgniteInternalFuture f1 = GridTestUtils.runAsync(() -> {
try {
waitFirstRLock.await();
} catch (InterruptedException e) {
fail(e.toString());
}
try {
rwLock.writeLock().tryLock();
assertFalse(GridTestUtils.waitForCondition(rwLock.writeLock()::isHeldByCurrentThread, timeout));
} catch (IgniteInterruptedCheckedException e) {
e.printStackTrace();
}
waitSecondRLock.countDown();
try {
rwLock.writeLock().tryLock(timeout, TimeUnit.MILLISECONDS);
assertTrue(rwLock.writeLock().isHeldByCurrentThread());
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
rwLock.writeLock().unlock();
}
}, "async-runnable-runner-2");
f1.get(4 * timeout);
f0.get(4 * timeout);
stopGrid(0);
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.
the class CheckpointReadLockFailureTest method testPrintCpRLockHolder.
/**
* @throws Exception If failed.
*/
@Test
@WithSystemProperty(key = IGNITE_PDS_LOG_CP_READ_LOCK_HOLDERS, value = "true")
public void testPrintCpRLockHolder() throws Exception {
CountDownLatch canRelease = new CountDownLatch(1);
testLog = new ListeningTestLogger(log);
LogListener lsnr = LogListener.matches(LOCK_HOLD_MESSAGE).build();
testLog.registerListener(lsnr);
IgniteEx ig = startGrid(0);
ig.cluster().state(ClusterState.ACTIVE);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
ReentrantReadWriteLockWithTracking tracker = U.field(checkpointReadWriteLock, "checkpointLock");
GridTestUtils.runAsync(() -> {
checkpointReadWriteLock.readLock();
try {
canRelease.await(tracker.lockWaitThreshold() + 500, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
checkpointReadWriteLock.readUnlock();
}
}, "async-runnable-runner-1");
assertTrue(GridTestUtils.waitForCondition(lsnr::check, tracker.lockWaitThreshold() + 1000));
stopGrid(0);
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.
the class CheckpointReadLockFailureTest method testFailureTypeOnTimeout.
/**
* @throws Exception If failed.
*/
@Test
public void testFailureTypeOnTimeout() throws Exception {
hndLatch = new CountDownLatch(1);
IgniteEx ig = startGrid(0);
ig.cluster().state(ClusterState.ACTIVE);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
IgniteInternalFuture acquireWriteLock = GridTestUtils.runAsync(() -> {
checkpointReadWriteLock.writeLock();
try {
doSleep(Long.MAX_VALUE);
} finally {
checkpointReadWriteLock.writeUnlock();
}
});
IgniteInternalFuture acquireReadLock = GridTestUtils.runAsync(() -> {
db.checkpointReadLock();
db.checkpointReadUnlock();
});
assertTrue(hndLatch.await(5, TimeUnit.SECONDS));
acquireWriteLock.cancel();
acquireReadLock.get(5, TimeUnit.SECONDS);
GridTestUtils.waitForCondition(acquireWriteLock::isCancelled, 5000);
stopGrid(0);
}
use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.
the class IgnitePdsCacheEntriesExpirationTest method testDeadlockBetweenCachePutAndEntryExpiration.
/**
* Verifies scenario of a deadlock between thread, modifying a cache entry (acquires cp read lock and entry lock),
* ttl thread, expiring the entry (acquires cp read lock and entry lock) and checkpoint thread (acquires cp write
* lock).
*
* Checkpoint thread in not used but emulated by the test to avoid test hang (interruptible API for acquiring write
* lock is used).
*
* For more details see <a href="https://ggsystems.atlassian.net/browse/GG-23135">GG-23135</a>.
*
* <p> <strong>Important note</strong> Implementation of this test relies heavily on structure of existing code in
* {@link GridCacheOffheapManager.GridCacheDataStore#purgeExpiredInternal(GridCacheContext, IgniteInClosure2X, int)}
* and {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} methods.
*
* Any changes to those methods could break logic inside the test so if new failures of the test occure test code
* itself may require refactoring. </p>
*
* @throws Exception If failed.
*/
@Test
public void testDeadlockBetweenCachePutAndEntryExpiration() throws Exception {
AtomicBoolean timeoutReached = new AtomicBoolean(false);
AtomicBoolean cpWriteLocked = new AtomicBoolean(false);
AtomicInteger partId = new AtomicInteger();
CountDownLatch ttlLatch = new CountDownLatch(2);
IgniteEx srv0 = startGrids(2);
srv0.cluster().active(true);
awaitPartitionMapExchange();
srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) srv0.cachex(DEFAULT_CACHE_NAME).context().topology();
top.partitionFactory((ctx, grp, id, recovery) -> {
partId.set(id);
return new GridDhtLocalPartition(ctx, grp, id, recovery) {
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: updater thread (see code below) on its way to
* {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} call stops here
* (already having entry lock acquired) and waits until checkpoint write lock is acquired
* by another special thread imulating checkpointer thread (cp-write-lock-holder, see code below).
* After that it enables ttl-cleanup-worker thread to proceed
* (by counting down ttLatch, see next overridden method) and reproduce deadlock scenario.
*/
@Override
public IgniteCacheOffheapManager.CacheDataStore dataStore() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("updater"))
return super.dataStore();
boolean unswapFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("unswap")) {
unswapFoundInST = true;
break;
}
}
if (!unswapFoundInST)
return super.dataStore();
while (!cpWriteLocked.get()) {
try {
Thread.sleep(10);
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting " + "for cp write lock to be locked");
}
}
ttlLatch.countDown();
return super.dataStore();
}
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: internal ttl-cleanup-worker thread wakes up to cleanup expired entries,
* reaches this method after calling purgeExpiredInternal (thus having checkpoint readlock acquired)
* and stops on ttlLatch until updater thread comes in, acquires entry lock and gets stuck
* on acquiring cp read lock
* (because of special cp-write-lock-holder thread already holding cp write lock).
*
* So situation of three threads stuck in deadlock is reproduced.
*/
@Override
public boolean reserve() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("ttl-cleanup-worker"))
return super.reserve();
boolean purgeExpiredFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("purgeExpiredInternal")) {
purgeExpiredFoundInST = true;
break;
}
}
if (!purgeExpiredFoundInST)
return super.reserve();
ttlLatch.countDown();
try {
ttlLatch.await();
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting for ttl latch" + " to be released by updater thread");
}
return super.reserve();
}
};
});
stopGrid(1);
// change BLT to force new partition creation with modified GridDhtLocalPartition class
srv0.cluster().setBaselineTopology(srv0.cluster().topologyVersion());
Thread.sleep(500);
IgniteCache<Object, Object> cache = srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) srv0.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
int key = 0;
while (true) {
if (srv0.affinity(DEFAULT_CACHE_NAME).partition(key) != partId.get())
key++;
else
break;
}
cache.put(key, 1);
int finalKey = key;
IgniteInternalFuture updateFut = GridTestUtils.runAsync(() -> {
log.info(">>> Updater thread has started, updating key " + finalKey);
int i = 10;
while (!timeoutReached.get()) {
cache.put(finalKey, i++);
try {
Thread.sleep(300);
} catch (InterruptedException e) {
log.warning(">>> Updater thread sleep was interrupted");
}
}
}, "updater-thread");
IgniteInternalFuture writeLockHolderFut = GridTestUtils.runAsync(() -> {
while (ttlLatch.getCount() != 1) {
try {
Thread.sleep(20);
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread sleep was interrupted");
break;
}
}
try {
cpWriteLocked.set(true);
rwLock.writeLock().lockInterruptibly();
ttlLatch.await();
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread was interrupted while obtaining write lock.");
} finally {
rwLock.writeLock().unlock();
}
}, "cp-write-lock-holder");
GridTestUtils.runAsync(() -> {
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < TIMEOUT) doSleep(1_000);
timeoutReached.set(true);
});
try {
updateFut.get(TIMEOUT * 2);
} catch (IgniteFutureTimeoutCheckedException ignored) {
fail("Failed to wait for futures for doubled timeout");
} finally {
while (ttlLatch.getCount() > 0) ttlLatch.countDown();
writeLockHolderFut.cancel();
updateFut.cancel();
}
}
Aggregations