Search in sources :

Example 1 with CheckpointReadWriteLock

use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.

the class CheckpointReadLockFailureTest method testReentrance.

/**
 * @throws Exception If failed.
 */
@Test
@WithSystemProperty(key = IGNITE_PDS_LOG_CP_READ_LOCK_HOLDERS, value = "true")
public void testReentrance() throws Exception {
    IgniteEx ig = startGrid(0);
    ig.cluster().state(ClusterState.ACTIVE);
    GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
    CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
    ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
    CountDownLatch waitFirstRLock = new CountDownLatch(1);
    CountDownLatch waitSecondRLock = new CountDownLatch(1);
    long timeout = 500L;
    IgniteInternalFuture f0 = GridTestUtils.runAsync(() -> {
        // noinspection LockAcquiredButNotSafelyReleased
        rwLock.readLock().lock();
        // noinspection LockAcquiredButNotSafelyReleased
        rwLock.readLock().lock();
        rwLock.readLock().unlock();
        waitFirstRLock.countDown();
        try {
            waitSecondRLock.await();
        } catch (InterruptedException e) {
            fail(e.toString());
        }
        rwLock.readLock().unlock();
    }, "async-runnable-runner-1");
    IgniteInternalFuture f1 = GridTestUtils.runAsync(() -> {
        try {
            waitFirstRLock.await();
        } catch (InterruptedException e) {
            fail(e.toString());
        }
        try {
            rwLock.writeLock().tryLock();
            assertFalse(GridTestUtils.waitForCondition(rwLock.writeLock()::isHeldByCurrentThread, timeout));
        } catch (IgniteInterruptedCheckedException e) {
            e.printStackTrace();
        }
        waitSecondRLock.countDown();
        try {
            rwLock.writeLock().tryLock(timeout, TimeUnit.MILLISECONDS);
            assertTrue(rwLock.writeLock().isHeldByCurrentThread());
        } catch (InterruptedException e) {
            e.printStackTrace();
        } finally {
            rwLock.writeLock().unlock();
        }
    }, "async-runnable-runner-2");
    f1.get(4 * timeout);
    f0.get(4 * timeout);
    stopGrid(0);
}
Also used : IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteEx(org.apache.ignite.internal.IgniteEx) CheckpointReadWriteLock(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock) ReentrantReadWriteLockWithTracking(org.apache.ignite.internal.util.ReentrantReadWriteLockWithTracking) CountDownLatch(java.util.concurrent.CountDownLatch) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test) WithSystemProperty(org.apache.ignite.testframework.junits.WithSystemProperty)

Example 2 with CheckpointReadWriteLock

use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.

the class CheckpointReadLockFailureTest method testPrintCpRLockHolder.

/**
 * @throws Exception If failed.
 */
@Test
@WithSystemProperty(key = IGNITE_PDS_LOG_CP_READ_LOCK_HOLDERS, value = "true")
public void testPrintCpRLockHolder() throws Exception {
    CountDownLatch canRelease = new CountDownLatch(1);
    testLog = new ListeningTestLogger(log);
    LogListener lsnr = LogListener.matches(LOCK_HOLD_MESSAGE).build();
    testLog.registerListener(lsnr);
    IgniteEx ig = startGrid(0);
    ig.cluster().state(ClusterState.ACTIVE);
    GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
    CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
    ReentrantReadWriteLockWithTracking tracker = U.field(checkpointReadWriteLock, "checkpointLock");
    GridTestUtils.runAsync(() -> {
        checkpointReadWriteLock.readLock();
        try {
            canRelease.await(tracker.lockWaitThreshold() + 500, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e) {
            e.printStackTrace();
        } finally {
            checkpointReadWriteLock.readUnlock();
        }
    }, "async-runnable-runner-1");
    assertTrue(GridTestUtils.waitForCondition(lsnr::check, tracker.lockWaitThreshold() + 1000));
    stopGrid(0);
}
Also used : LogListener(org.apache.ignite.testframework.LogListener) IgniteEx(org.apache.ignite.internal.IgniteEx) CheckpointReadWriteLock(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock) ListeningTestLogger(org.apache.ignite.testframework.ListeningTestLogger) CountDownLatch(java.util.concurrent.CountDownLatch) ReentrantReadWriteLockWithTracking(org.apache.ignite.internal.util.ReentrantReadWriteLockWithTracking) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test) WithSystemProperty(org.apache.ignite.testframework.junits.WithSystemProperty)

Example 3 with CheckpointReadWriteLock

use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.

the class CheckpointReadLockFailureTest method testFailureTypeOnTimeout.

/**
 * @throws Exception If failed.
 */
@Test
public void testFailureTypeOnTimeout() throws Exception {
    hndLatch = new CountDownLatch(1);
    IgniteEx ig = startGrid(0);
    ig.cluster().state(ClusterState.ACTIVE);
    GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) ig.context().cache().context().database();
    CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
    IgniteInternalFuture acquireWriteLock = GridTestUtils.runAsync(() -> {
        checkpointReadWriteLock.writeLock();
        try {
            doSleep(Long.MAX_VALUE);
        } finally {
            checkpointReadWriteLock.writeUnlock();
        }
    });
    IgniteInternalFuture acquireReadLock = GridTestUtils.runAsync(() -> {
        db.checkpointReadLock();
        db.checkpointReadUnlock();
    });
    assertTrue(hndLatch.await(5, TimeUnit.SECONDS));
    acquireWriteLock.cancel();
    acquireReadLock.get(5, TimeUnit.SECONDS);
    GridTestUtils.waitForCondition(acquireWriteLock::isCancelled, 5000);
    stopGrid(0);
}
Also used : IgniteEx(org.apache.ignite.internal.IgniteEx) CheckpointReadWriteLock(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock) CountDownLatch(java.util.concurrent.CountDownLatch) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 4 with CheckpointReadWriteLock

use of org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock in project ignite by apache.

the class IgnitePdsCacheEntriesExpirationTest method testDeadlockBetweenCachePutAndEntryExpiration.

/**
 * Verifies scenario of a deadlock between thread, modifying a cache entry (acquires cp read lock and entry lock),
 * ttl thread, expiring the entry (acquires cp read lock and entry lock) and checkpoint thread (acquires cp write
 * lock).
 *
 * Checkpoint thread in not used but emulated by the test to avoid test hang (interruptible API for acquiring write
 * lock is used).
 *
 * For more details see <a href="https://ggsystems.atlassian.net/browse/GG-23135">GG-23135</a>.
 *
 * <p> <strong>Important note</strong> Implementation of this test relies heavily on structure of existing code in
 * {@link GridCacheOffheapManager.GridCacheDataStore#purgeExpiredInternal(GridCacheContext, IgniteInClosure2X, int)}
 * and {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} methods.
 *
 * Any changes to those methods could break logic inside the test so if new failures of the test occure test code
 * itself may require refactoring. </p>
 *
 * @throws Exception If failed.
 */
@Test
public void testDeadlockBetweenCachePutAndEntryExpiration() throws Exception {
    AtomicBoolean timeoutReached = new AtomicBoolean(false);
    AtomicBoolean cpWriteLocked = new AtomicBoolean(false);
    AtomicInteger partId = new AtomicInteger();
    CountDownLatch ttlLatch = new CountDownLatch(2);
    IgniteEx srv0 = startGrids(2);
    srv0.cluster().active(true);
    awaitPartitionMapExchange();
    srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
    GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) srv0.cachex(DEFAULT_CACHE_NAME).context().topology();
    top.partitionFactory((ctx, grp, id, recovery) -> {
        partId.set(id);
        return new GridDhtLocalPartition(ctx, grp, id, recovery) {

            /**
             * This method is modified to bring threads in deadlock situation.
             * Idea is the following: updater thread (see code below) on its way to
             * {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} call stops here
             * (already having entry lock acquired) and waits until checkpoint write lock is acquired
             * by another special thread imulating checkpointer thread (cp-write-lock-holder, see code below).
             * After that it enables ttl-cleanup-worker thread to proceed
             * (by counting down ttLatch, see next overridden method) and reproduce deadlock scenario.
             */
            @Override
            public IgniteCacheOffheapManager.CacheDataStore dataStore() {
                Thread t = Thread.currentThread();
                String tName = t.getName();
                if (tName == null || !tName.contains("updater"))
                    return super.dataStore();
                boolean unswapFoundInST = false;
                for (StackTraceElement e : t.getStackTrace()) {
                    if (e.getMethodName().contains("unswap")) {
                        unswapFoundInST = true;
                        break;
                    }
                }
                if (!unswapFoundInST)
                    return super.dataStore();
                while (!cpWriteLocked.get()) {
                    try {
                        Thread.sleep(10);
                    } catch (InterruptedException ignored) {
                        log.warning(">>> Thread caught InterruptedException while waiting " + "for cp write lock to be locked");
                    }
                }
                ttlLatch.countDown();
                return super.dataStore();
            }

            /**
             * This method is modified to bring threads in deadlock situation.
             * Idea is the following: internal ttl-cleanup-worker thread wakes up to cleanup expired entries,
             * reaches this method after calling purgeExpiredInternal (thus having checkpoint readlock acquired)
             * and stops on ttlLatch until updater thread comes in, acquires entry lock and gets stuck
             * on acquiring cp read lock
             * (because of special cp-write-lock-holder thread already holding cp write lock).
             *
             * So situation of three threads stuck in deadlock is reproduced.
             */
            @Override
            public boolean reserve() {
                Thread t = Thread.currentThread();
                String tName = t.getName();
                if (tName == null || !tName.contains("ttl-cleanup-worker"))
                    return super.reserve();
                boolean purgeExpiredFoundInST = false;
                for (StackTraceElement e : t.getStackTrace()) {
                    if (e.getMethodName().contains("purgeExpiredInternal")) {
                        purgeExpiredFoundInST = true;
                        break;
                    }
                }
                if (!purgeExpiredFoundInST)
                    return super.reserve();
                ttlLatch.countDown();
                try {
                    ttlLatch.await();
                } catch (InterruptedException ignored) {
                    log.warning(">>> Thread caught InterruptedException while waiting for ttl latch" + " to be released by updater thread");
                }
                return super.reserve();
            }
        };
    });
    stopGrid(1);
    // change BLT to force new partition creation with modified GridDhtLocalPartition class
    srv0.cluster().setBaselineTopology(srv0.cluster().topologyVersion());
    Thread.sleep(500);
    IgniteCache<Object, Object> cache = srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
    GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) srv0.context().cache().context().database();
    CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
    ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
    int key = 0;
    while (true) {
        if (srv0.affinity(DEFAULT_CACHE_NAME).partition(key) != partId.get())
            key++;
        else
            break;
    }
    cache.put(key, 1);
    int finalKey = key;
    IgniteInternalFuture updateFut = GridTestUtils.runAsync(() -> {
        log.info(">>> Updater thread has started, updating key " + finalKey);
        int i = 10;
        while (!timeoutReached.get()) {
            cache.put(finalKey, i++);
            try {
                Thread.sleep(300);
            } catch (InterruptedException e) {
                log.warning(">>> Updater thread sleep was interrupted");
            }
        }
    }, "updater-thread");
    IgniteInternalFuture writeLockHolderFut = GridTestUtils.runAsync(() -> {
        while (ttlLatch.getCount() != 1) {
            try {
                Thread.sleep(20);
            } catch (InterruptedException e) {
                log.warning(">>> Write lock holder thread sleep was interrupted");
                break;
            }
        }
        try {
            cpWriteLocked.set(true);
            rwLock.writeLock().lockInterruptibly();
            ttlLatch.await();
        } catch (InterruptedException e) {
            log.warning(">>> Write lock holder thread was interrupted while obtaining write lock.");
        } finally {
            rwLock.writeLock().unlock();
        }
    }, "cp-write-lock-holder");
    GridTestUtils.runAsync(() -> {
        long start = System.currentTimeMillis();
        while (System.currentTimeMillis() - start < TIMEOUT) doSleep(1_000);
        timeoutReached.set(true);
    });
    try {
        updateFut.get(TIMEOUT * 2);
    } catch (IgniteFutureTimeoutCheckedException ignored) {
        fail("Failed to wait for futures for doubled timeout");
    } finally {
        while (ttlLatch.getCount() > 0) ttlLatch.countDown();
        writeLockHolderFut.cancel();
        updateFut.cancel();
    }
}
Also used : CheckpointReadWriteLock(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock) CountDownLatch(java.util.concurrent.CountDownLatch) ReentrantReadWriteLockWithTracking(org.apache.ignite.internal.util.ReentrantReadWriteLockWithTracking) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IgniteCacheOffheapManager(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteEx(org.apache.ignite.internal.IgniteEx) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) CacheObject(org.apache.ignite.internal.processors.cache.CacheObject) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Aggregations

CountDownLatch (java.util.concurrent.CountDownLatch)4 IgniteEx (org.apache.ignite.internal.IgniteEx)4 CheckpointReadWriteLock (org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock)4 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)4 Test (org.junit.Test)4 IgniteInternalFuture (org.apache.ignite.internal.IgniteInternalFuture)3 ReentrantReadWriteLockWithTracking (org.apache.ignite.internal.util.ReentrantReadWriteLockWithTracking)3 WithSystemProperty (org.apache.ignite.testframework.junits.WithSystemProperty)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 IgniteFutureTimeoutCheckedException (org.apache.ignite.internal.IgniteFutureTimeoutCheckedException)1 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)1 CacheObject (org.apache.ignite.internal.processors.cache.CacheObject)1 IgniteCacheOffheapManager (org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager)1 GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)1 GridDhtPartitionTopologyImpl (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl)1 ListeningTestLogger (org.apache.ignite.testframework.ListeningTestLogger)1 LogListener (org.apache.ignite.testframework.LogListener)1