Search in sources :

Example 1 with GridConcurrentSkipListSet

use of org.apache.ignite.internal.util.GridConcurrentSkipListSet in project ignite by apache.

the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.

/**
 * Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
 * and second is partially rebalanced.
 *
 * First cache map request will trigger client compatible remap for pessimistic txs,
 * second cache map request should use new topology version.
 *
 * For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
 * assignments.
 *
 * Success: tx is finished on ideal topology version over all mapped nodes.
 *
 * @param concurrency Concurrency.
 * @param isolation Isolation.
 */
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
    try {
        IgniteEx crd = startGrid(0);
        IgniteEx g1 = startGrid(1);
        awaitPartitionMapExchange();
        IgniteEx client = startClientGrid("client");
        assertNotNull(client.cache(CACHE1));
        assertNotNull(client.cache(CACHE2));
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
        final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
        AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
        AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
        AtomicReference<Set<Integer>> full = new AtomicReference<>();
        GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
        crdSpi.blockMessages((node, m) -> {
            if (m instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
                // Allow full rebalance for cache 1 and system cache.
                if (msg.groupId() != CU.cacheId(CACHE2))
                    return false;
                // Allow only first batch for cache 2.
                if (msg.topologyVersion().equals(joinVer)) {
                    if (full.get() == null) {
                        Map<Integer, Long> last = U.field(msg, "last");
                        full.set(last.keySet());
                        return false;
                    }
                    return true;
                }
                if (msg.topologyVersion().equals(leftVer)) {
                    Map<Integer, Long> last = U.field(msg, "last");
                    leftVerParts.addAll(last.keySet());
                    return true;
                }
            } else if (m instanceof GridDhtPartitionsFullMessage) {
                GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
                // Delay full message for ideal topology switch.
                GridDhtPartitionExchangeId exchId = msg.exchangeId();
                if (exchId != null && exchId.topologyVersion().equals(idealVer))
                    return true;
            }
            return false;
        });
        TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
        g1Spi.blockMessages((node, msg) -> {
            if (msg instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
                return m.groupId() == CU.cacheId(CACHE2);
            }
            return false;
        });
        startGrid(2);
        crdSpi.waitForBlocked();
        g1Spi.waitForBlocked();
        // Wait partial owning.
        assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
            // Await full rebalance for cache 2.
            GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
            for (int p = 0; p < PARTS_CNT; p++) {
                if (top0.localPartition(p).state() != OWNING)
                    return false;
            }
            // Await partial rebalance for cache 1.
            GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
            for (Integer part : full.get()) {
                if (top1.localPartition(part).state() != OWNING)
                    return false;
            }
            return true;
        }, 10_000));
        // At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
        // Stop supplier in the middle of rebalance.
        g1.close();
        // Wait for topologies and calculate required partitions.
        grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
        AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
        // Search for a partition with incompatible assignment.
        // Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
        int stablePart = -1;
        // Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
        int movingPart = -1;
        for (int p = 0; p < assignment0.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (curr.equals(ideal) && curr.get(0).order() == 1) {
                stablePart = p;
                break;
            }
        }
        assertFalse(stablePart == -1);
        for (int p = 0; p < assignment.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (!curr.equals(ideal) && curr.get(0).order() == 1) {
                movingPart = p;
                break;
            }
        }
        assertFalse(movingPart == -1);
        TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {

            @Override
            public boolean apply(ClusterNode node, Message msg) {
                if (concurrency == PESSIMISTIC)
                    return msg instanceof GridNearLockRequest;
                else
                    return msg instanceof GridNearTxPrepareRequest;
            }
        });
        final int finalStablePart = stablePart;
        final int finalMovingPart = movingPart;
        IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
            try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
                // Will map on crd(order=1).
                client.cache(CACHE1).put(finalStablePart, 0);
                // Next request will remap to ideal topology, but it's not ready on other node except crd.
                client.cache(CACHE2).put(finalMovingPart, 0);
                tx.commit();
            }
        }, 1, "tx-thread");
        // Wait until all missing supply messages are blocked.
        assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
        // Delay first lock request on late topology.
        TestRecordingCommunicationSpi.spi(client).waitForBlocked();
        // At this point only supply messages should be blocked.
        // Unblock to continue rebalance and trigger ideal topology switch.
        crdSpi.stopBlock(true, null, false, true);
        // Wait until ideal topology is ready on crd.
        crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
        // Other node must wait for full message.
        assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
        // Map on unstable topology (PME is in progress on other node).
        TestRecordingCommunicationSpi.spi(client).stopBlock();
        // Capture local transaction.
        IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
        // Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
        try {
            txFut.get(3_000);
            fail("TX must not complete");
        } catch (IgniteFutureTimeoutCheckedException e) {
        // Expected.
        }
        crdSpi.stopBlock();
        txFut.get();
        // Check transaction map version. Should be mapped on ideal topology.
        assertEquals(tx0.topologyVersionSnapshot(), idealVer);
        awaitPartitionMapExchange();
        checkFutures();
    } finally {
        stopAllGrids();
    }
}
Also used : AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet) Set(java.util.Set) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) Message(org.apache.ignite.plugin.extensions.communication.Message) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) GridNearLockRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) AtomicReference(java.util.concurrent.atomic.AtomicReference) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) TestRecordingCommunicationSpi(org.apache.ignite.internal.TestRecordingCommunicationSpi) Transaction(org.apache.ignite.transactions.Transaction) IgniteEx(org.apache.ignite.internal.IgniteEx) GridNearTxPrepareRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxPrepareRequest) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet)

Aggregations

Set (java.util.Set)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 ClusterNode (org.apache.ignite.cluster.ClusterNode)1 IgniteEx (org.apache.ignite.internal.IgniteEx)1 IgniteFutureTimeoutCheckedException (org.apache.ignite.internal.IgniteFutureTimeoutCheckedException)1 TestRecordingCommunicationSpi (org.apache.ignite.internal.TestRecordingCommunicationSpi)1 AffinityAssignment (org.apache.ignite.internal.processors.affinity.AffinityAssignment)1 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)1 GridDhtPartitionExchangeId (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId)1 GridDhtPartitionSupplyMessage (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage)1 GridDhtPartitionsFullMessage (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage)1 GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)1 GridNearLockRequest (org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest)1 GridNearTxPrepareRequest (org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxPrepareRequest)1 GridConcurrentSkipListSet (org.apache.ignite.internal.util.GridConcurrentSkipListSet)1 Message (org.apache.ignite.plugin.extensions.communication.Message)1 Transaction (org.apache.ignite.transactions.Transaction)1