Search in sources :

Example 1 with GridDhtPartitionTopologyImpl

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.

the class CacheRentingStateRepairTest method testRebalanceRentingPartitionAndNodeJoin.

/**
 * @param client {@code True} for client node join.
 * @param delay Delay.
 *
 * @throws Exception if failed.
 */
private void testRebalanceRentingPartitionAndNodeJoin(boolean client, long delay) throws Exception {
    try {
        IgniteEx g0 = startGrids(2);
        g0.cluster().baselineAutoAdjustEnabled(false);
        g0.cluster().active(true);
        awaitPartitionMapExchange();
        List<Integer> parts = evictingPartitionsAfterJoin(g0, g0.cache(DEFAULT_CACHE_NAME), 20);
        int delayEvictPart = parts.get(0);
        List<Integer> keys = partitionKeys(g0.cache(DEFAULT_CACHE_NAME), delayEvictPart, 2_000, 0);
        for (Integer key : keys) g0.cache(DEFAULT_CACHE_NAME).put(key, key);
        GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) dht(g0.cache(DEFAULT_CACHE_NAME)).topology();
        GridDhtLocalPartition part = top.localPartition(delayEvictPart);
        assertNotNull(part);
        // Wait for eviction. Same could be achieved by calling awaitPartitionMapExchange(true, true, null, true);
        part.reserve();
        startGrid(2);
        resetBaselineTopology();
        part.release();
        part.rent().get();
        CountDownLatch l1 = new CountDownLatch(1);
        CountDownLatch l2 = new CountDownLatch(1);
        // Create race between processing of final supply message and partition clearing.
        // Evicted partition will be recreated using supplied factory.
        top.partitionFactory((ctx, grp, id, recovery) -> id != delayEvictPart ? new GridDhtLocalPartition(ctx, grp, id, recovery) : new GridDhtLocalPartition(ctx, grp, id, recovery) {

            @Override
            public void beforeApplyBatch(boolean last) {
                if (last) {
                    l1.countDown();
                    U.awaitQuiet(l2);
                    if (// Delay rebalance finish to enforce race with clearing.
                    delay > 0)
                        doSleep(delay);
                }
            }
        });
        stopGrid(2);
        // Trigger rebalance for delayEvictPart after eviction.
        resetBaselineTopology();
        IgniteInternalFuture<?> fut = multithreadedAsync(new Runnable() {

            @Override
            public void run() {
                try {
                    l1.await();
                    // Trigger partition clear on next topology version.
                    if (client)
                        startClientGrid(CLIENT);
                    else
                        startGrid(2);
                    // Finish partition rebalance after initiating clear.
                    l2.countDown();
                } catch (Exception e) {
                    fail(X.getFullStackTrace(e));
                }
            }
        }, 1);
        fut.get();
        awaitPartitionMapExchange(true, true, null, true);
        assertPartitionsSame(idleVerify(g0));
    } finally {
        stopAllGrids();
    }
}
Also used : IgniteEx(org.apache.ignite.internal.IgniteEx) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) CountDownLatch(java.util.concurrent.CountDownLatch) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl)

Example 2 with GridDhtPartitionTopologyImpl

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.

the class GridCacheRebalancingPartitionCountersTest method checkUpdCounter.

/**
 */
private void checkUpdCounter(IgniteEx ignite, List<String> issues, HashMap<Integer, Long> partMap) {
    final CacheGroupContext grpCtx = ignite.context().cache().cacheGroup(CU.cacheId(CACHE_NAME));
    assertNotNull(grpCtx);
    GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) grpCtx.topology();
    List<GridDhtLocalPartition> locParts = top.localPartitions();
    for (GridDhtLocalPartition part : locParts) {
        Long cnt = partMap.get(part.id());
        if (cnt == null)
            partMap.put(part.id(), part.updateCounter());
        if ((cnt != null && part.updateCounter() != cnt) || part.updateCounter() == 0)
            issues.add("Node name " + ignite.name() + "Part = " + part.id() + " updCounter " + part.updateCounter());
    }
}
Also used : GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl)

Example 3 with GridDhtPartitionTopologyImpl

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.

the class TxRollbackOnMapOnInvalidTopologyTest method doTestRollback.

/**
 * Test scenario: mock partition to fail check, start new node.
 * Expected result: Transaction is rolled back.
 *
 * @param near Near mode.
 * @param node Owner.
 */
private void doTestRollback(Ignite near, IgniteEx node) throws Exception {
    List<Integer> primKeys = primaryKeys(node.cache(DEFAULT_CACHE_NAME), 100);
    List<Integer> movingKeys = movingKeysAfterJoin(node, DEFAULT_CACHE_NAME, 100);
    primKeys.removeAll(movingKeys);
    /**
     * {@code primKeys} contains stable partitions.
     */
    int part = primKeys.get(0);
    IgniteEx grid = (IgniteEx) grid(node.affinity(DEFAULT_CACHE_NAME).mapPartitionToNode(part));
    GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) grid.cachex(DEFAULT_CACHE_NAME).context().topology();
    AffinityTopologyVersion failCheckVer = new AffinityTopologyVersion(GRIDS + 2, 1);
    top.partitionFactory((ctx, grp, id, recovery) -> new GridDhtLocalPartition(ctx, grp, id, recovery) {

        @Override
        public boolean primary(AffinityTopologyVersion topVer) {
            return !(id == part && topVer.equals(failCheckVer)) && super.primary(topVer);
        }
    });
    // Re-create mocked part.
    GridDhtLocalPartition p0 = top.localPartition(part);
    p0.rent().get();
    assertTrue(p0.state() == EVICTED);
    ReadWriteLock lock = U.field(top, "lock");
    lock.writeLock().lock();
    p0 = top.getOrCreatePartition(part);
    p0.own();
    lock.writeLock().unlock();
    startGrid(GRIDS);
    awaitPartitionMapExchange();
    try (Transaction tx = near.transactions().txStart()) {
        near.cache(DEFAULT_CACHE_NAME).put(part, part);
        tx.commit();
        fail();
    } catch (TransactionRollbackException ignore) {
    // Expected.
    } catch (Exception e) {
        fail(X.getFullStackTrace(e));
    }
}
Also used : Transaction(org.apache.ignite.transactions.Transaction) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) IgniteEx(org.apache.ignite.internal.IgniteEx) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) TransactionRollbackException(org.apache.ignite.transactions.TransactionRollbackException) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl) TransactionRollbackException(org.apache.ignite.transactions.TransactionRollbackException)

Example 4 with GridDhtPartitionTopologyImpl

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.

the class CacheGroupContext method start.

/**
 * @throws IgniteCheckedException If failed.
 */
public void start() throws IgniteCheckedException {
    GridAffinityAssignmentCache affCache = ctx.affinity().groupAffinity(grpId);
    aff = affCache == null ? GridAffinityAssignmentCache.create(ctx.kernalContext(), ccfg.getAffinity(), ccfg) : affCache;
    if (ccfg.getCacheMode() != LOCAL) {
        top = ctx.kernalContext().resource().resolve(new GridDhtPartitionTopologyImpl(ctx, this));
        metrics.onTopologyInitialized();
    }
    try {
        offheapMgr = ctx.kernalContext().resource().resolve(persistenceEnabled ? new GridCacheOffheapManager() : new IgniteCacheOffheapManagerImpl());
    } catch (Exception e) {
        throw new IgniteCheckedException("Failed to initialize offheap manager", e);
    }
    offheapMgr.start(ctx, this);
    if (!isRecoveryMode()) {
        initializeIO();
        ctx.affinity().onCacheGroupCreated(this);
        ctx.evict().onCacheGroupStarted(this);
    }
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridAffinityAssignmentCache(org.apache.ignite.internal.processors.affinity.GridAffinityAssignmentCache) GridCacheOffheapManager(org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException)

Example 5 with GridDhtPartitionTopologyImpl

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.

the class IgnitePdsCacheEntriesExpirationTest method testDeadlockBetweenCachePutAndEntryExpiration.

/**
 * Verifies scenario of a deadlock between thread, modifying a cache entry (acquires cp read lock and entry lock),
 * ttl thread, expiring the entry (acquires cp read lock and entry lock) and checkpoint thread (acquires cp write
 * lock).
 *
 * Checkpoint thread in not used but emulated by the test to avoid test hang (interruptible API for acquiring write
 * lock is used).
 *
 * For more details see <a href="https://ggsystems.atlassian.net/browse/GG-23135">GG-23135</a>.
 *
 * <p> <strong>Important note</strong> Implementation of this test relies heavily on structure of existing code in
 * {@link GridCacheOffheapManager.GridCacheDataStore#purgeExpiredInternal(GridCacheContext, IgniteInClosure2X, int)}
 * and {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} methods.
 *
 * Any changes to those methods could break logic inside the test so if new failures of the test occure test code
 * itself may require refactoring. </p>
 *
 * @throws Exception If failed.
 */
@Test
public void testDeadlockBetweenCachePutAndEntryExpiration() throws Exception {
    AtomicBoolean timeoutReached = new AtomicBoolean(false);
    AtomicBoolean cpWriteLocked = new AtomicBoolean(false);
    AtomicInteger partId = new AtomicInteger();
    CountDownLatch ttlLatch = new CountDownLatch(2);
    IgniteEx srv0 = startGrids(2);
    srv0.cluster().active(true);
    awaitPartitionMapExchange();
    srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
    GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) srv0.cachex(DEFAULT_CACHE_NAME).context().topology();
    top.partitionFactory((ctx, grp, id, recovery) -> {
        partId.set(id);
        return new GridDhtLocalPartition(ctx, grp, id, recovery) {

            /**
             * This method is modified to bring threads in deadlock situation.
             * Idea is the following: updater thread (see code below) on its way to
             * {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} call stops here
             * (already having entry lock acquired) and waits until checkpoint write lock is acquired
             * by another special thread imulating checkpointer thread (cp-write-lock-holder, see code below).
             * After that it enables ttl-cleanup-worker thread to proceed
             * (by counting down ttLatch, see next overridden method) and reproduce deadlock scenario.
             */
            @Override
            public IgniteCacheOffheapManager.CacheDataStore dataStore() {
                Thread t = Thread.currentThread();
                String tName = t.getName();
                if (tName == null || !tName.contains("updater"))
                    return super.dataStore();
                boolean unswapFoundInST = false;
                for (StackTraceElement e : t.getStackTrace()) {
                    if (e.getMethodName().contains("unswap")) {
                        unswapFoundInST = true;
                        break;
                    }
                }
                if (!unswapFoundInST)
                    return super.dataStore();
                while (!cpWriteLocked.get()) {
                    try {
                        Thread.sleep(10);
                    } catch (InterruptedException ignored) {
                        log.warning(">>> Thread caught InterruptedException while waiting " + "for cp write lock to be locked");
                    }
                }
                ttlLatch.countDown();
                return super.dataStore();
            }

            /**
             * This method is modified to bring threads in deadlock situation.
             * Idea is the following: internal ttl-cleanup-worker thread wakes up to cleanup expired entries,
             * reaches this method after calling purgeExpiredInternal (thus having checkpoint readlock acquired)
             * and stops on ttlLatch until updater thread comes in, acquires entry lock and gets stuck
             * on acquiring cp read lock
             * (because of special cp-write-lock-holder thread already holding cp write lock).
             *
             * So situation of three threads stuck in deadlock is reproduced.
             */
            @Override
            public boolean reserve() {
                Thread t = Thread.currentThread();
                String tName = t.getName();
                if (tName == null || !tName.contains("ttl-cleanup-worker"))
                    return super.reserve();
                boolean purgeExpiredFoundInST = false;
                for (StackTraceElement e : t.getStackTrace()) {
                    if (e.getMethodName().contains("purgeExpiredInternal")) {
                        purgeExpiredFoundInST = true;
                        break;
                    }
                }
                if (!purgeExpiredFoundInST)
                    return super.reserve();
                ttlLatch.countDown();
                try {
                    ttlLatch.await();
                } catch (InterruptedException ignored) {
                    log.warning(">>> Thread caught InterruptedException while waiting for ttl latch" + " to be released by updater thread");
                }
                return super.reserve();
            }
        };
    });
    stopGrid(1);
    // change BLT to force new partition creation with modified GridDhtLocalPartition class
    srv0.cluster().setBaselineTopology(srv0.cluster().topologyVersion());
    Thread.sleep(500);
    IgniteCache<Object, Object> cache = srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
    GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) srv0.context().cache().context().database();
    CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
    ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
    int key = 0;
    while (true) {
        if (srv0.affinity(DEFAULT_CACHE_NAME).partition(key) != partId.get())
            key++;
        else
            break;
    }
    cache.put(key, 1);
    int finalKey = key;
    IgniteInternalFuture updateFut = GridTestUtils.runAsync(() -> {
        log.info(">>> Updater thread has started, updating key " + finalKey);
        int i = 10;
        while (!timeoutReached.get()) {
            cache.put(finalKey, i++);
            try {
                Thread.sleep(300);
            } catch (InterruptedException e) {
                log.warning(">>> Updater thread sleep was interrupted");
            }
        }
    }, "updater-thread");
    IgniteInternalFuture writeLockHolderFut = GridTestUtils.runAsync(() -> {
        while (ttlLatch.getCount() != 1) {
            try {
                Thread.sleep(20);
            } catch (InterruptedException e) {
                log.warning(">>> Write lock holder thread sleep was interrupted");
                break;
            }
        }
        try {
            cpWriteLocked.set(true);
            rwLock.writeLock().lockInterruptibly();
            ttlLatch.await();
        } catch (InterruptedException e) {
            log.warning(">>> Write lock holder thread was interrupted while obtaining write lock.");
        } finally {
            rwLock.writeLock().unlock();
        }
    }, "cp-write-lock-holder");
    GridTestUtils.runAsync(() -> {
        long start = System.currentTimeMillis();
        while (System.currentTimeMillis() - start < TIMEOUT) doSleep(1_000);
        timeoutReached.set(true);
    });
    try {
        updateFut.get(TIMEOUT * 2);
    } catch (IgniteFutureTimeoutCheckedException ignored) {
        fail("Failed to wait for futures for doubled timeout");
    } finally {
        while (ttlLatch.getCount() > 0) ttlLatch.countDown();
        writeLockHolderFut.cancel();
        updateFut.cancel();
    }
}
Also used : CheckpointReadWriteLock(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointReadWriteLock) CountDownLatch(java.util.concurrent.CountDownLatch) ReentrantReadWriteLockWithTracking(org.apache.ignite.internal.util.ReentrantReadWriteLockWithTracking) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IgniteCacheOffheapManager(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteEx(org.apache.ignite.internal.IgniteEx) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) CacheObject(org.apache.ignite.internal.processors.cache.CacheObject) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) GridDhtPartitionTopologyImpl(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Aggregations

GridDhtPartitionTopologyImpl (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl)7 GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)5 IgniteEx (org.apache.ignite.internal.IgniteEx)4 CacheGroupContext (org.apache.ignite.internal.processors.cache.CacheGroupContext)3 CountDownLatch (java.util.concurrent.CountDownLatch)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)2 Transaction (org.apache.ignite.transactions.Transaction)2 Test (org.junit.Test)2 Arrays (java.util.Arrays)1 List (java.util.List)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 ReadWriteLock (java.util.concurrent.locks.ReadWriteLock)1 Stream (java.util.stream.Stream)1 IgniteCache (org.apache.ignite.IgniteCache)1 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)1 TRANSACTIONAL (org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL)1 FULL_SYNC (org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC)1