Search in sources :

Example 26 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridNearPessimisticTxPrepareFuture method preparePessimistic.

/**
 */
@SuppressWarnings("unchecked")
private void preparePessimistic() {
    // Non-mvcc implicit-single tx goes fast commit way.
    assert !tx.implicitSingle() || tx.txState().mvccEnabled();
    Map<UUID, GridDistributedTxMapping> mappings = new HashMap<>();
    AffinityTopologyVersion topVer = tx.topologyVersion();
    boolean hasNearCache = false;
    Map<UUID, Collection<UUID>> txNodes;
    if (tx.txState().mvccEnabled()) {
        Collection<GridDistributedTxMapping> mvccMappings = tx.implicitSingle() ? Collections.singleton(tx.mappings().singleMapping()) : tx.mappings().mappings();
        txNodes = new HashMap<>(mvccMappings.size());
        for (GridDistributedTxMapping m : mvccMappings) {
            mappings.put(m.primary().id(), m);
            txNodes.put(m.primary().id(), m.backups());
        }
    } else {
        GridDhtTxMapping txMapping = new GridDhtTxMapping();
        for (IgniteTxEntry txEntry : tx.allEntries()) {
            txEntry.clearEntryReadVersion();
            GridCacheContext cacheCtx = txEntry.context();
            if (cacheCtx.isNear())
                hasNearCache = true;
            List<ClusterNode> nodes;
            if (!cacheCtx.isLocal()) {
                GridDhtPartitionTopology top = cacheCtx.topology();
                nodes = top.nodes(cacheCtx.affinity().partition(txEntry.key()), topVer);
            } else
                nodes = cacheCtx.affinity().nodesByKey(txEntry.key(), topVer);
            if (F.isEmpty(nodes)) {
                onDone(new ClusterTopologyServerNotFoundException("Failed to map keys to nodes (partition " + "is not mapped to any node) [key=" + txEntry.key() + ", partition=" + cacheCtx.affinity().partition(txEntry.key()) + ", topVer=" + topVer + ']'));
                return;
            }
            ClusterNode primary = nodes.get(0);
            GridDistributedTxMapping nodeMapping = mappings.get(primary.id());
            if (nodeMapping == null)
                mappings.put(primary.id(), nodeMapping = new GridDistributedTxMapping(primary));
            txEntry.nodeId(primary.id());
            nodeMapping.add(txEntry);
            txMapping.addMapping(nodes);
        }
        txNodes = txMapping.transactionNodes();
    }
    tx.transactionNodes(txNodes);
    if (!hasNearCache)
        checkOnePhase(txNodes);
    long timeout = tx.remainingTime();
    if (timeout == -1) {
        onDone(new IgniteTxTimeoutCheckedException("Transaction timed out and was rolled back: " + tx));
        return;
    }
    int miniId = 0;
    for (final GridDistributedTxMapping m : mappings.values()) {
        final ClusterNode primary = m.primary();
        if (primary.isLocal()) {
            if (m.hasNearCacheEntries() && m.hasColocatedCacheEntries()) {
                GridNearTxPrepareRequest nearReq = createRequest(txNodes, m, timeout, m.nearEntriesReads(), m.nearEntriesWrites());
                prepareLocal(nearReq, m, ++miniId, true);
                GridNearTxPrepareRequest colocatedReq = createRequest(txNodes, m, timeout, m.colocatedEntriesReads(), m.colocatedEntriesWrites());
                prepareLocal(colocatedReq, m, ++miniId, false);
            } else {
                GridNearTxPrepareRequest req = createRequest(txNodes, m, timeout, m.reads(), m.writes());
                prepareLocal(req, m, ++miniId, m.hasNearCacheEntries());
            }
        } else {
            GridNearTxPrepareRequest req = createRequest(txNodes, m, timeout, m.reads(), m.writes());
            final MiniFuture fut = new MiniFuture(m, ++miniId);
            req.miniId(fut.futureId());
            add((IgniteInternalFuture) fut);
            try {
                cctx.io().send(primary, req, tx.ioPolicy());
                if (msgLog.isDebugEnabled()) {
                    msgLog.debug("Near pessimistic prepare, sent request [txId=" + tx.nearXidVersion() + ", node=" + primary.id() + ']');
                }
            } catch (ClusterTopologyCheckedException e) {
                e.retryReadyFuture(cctx.nextAffinityReadyFuture(topVer));
                fut.onNodeLeft(e);
            } catch (IgniteCheckedException e) {
                if (msgLog.isDebugEnabled()) {
                    msgLog.debug("Near pessimistic prepare, failed send request [txId=" + tx.nearXidVersion() + ", node=" + primary.id() + ", err=" + e + ']');
                }
                fut.onError(e);
                break;
            }
        }
    }
    markInitialized();
}
Also used : IgniteTxEntry(org.apache.ignite.internal.processors.cache.transactions.IgniteTxEntry) ClusterNode(org.apache.ignite.cluster.ClusterNode) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) HashMap(java.util.HashMap) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) GridDistributedTxMapping(org.apache.ignite.internal.processors.cache.distributed.GridDistributedTxMapping) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridDhtTxMapping(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxMapping) ClusterTopologyServerNotFoundException(org.apache.ignite.internal.cluster.ClusterTopologyServerNotFoundException) Collection(java.util.Collection) IgniteTxTimeoutCheckedException(org.apache.ignite.internal.transactions.IgniteTxTimeoutCheckedException) UUID(java.util.UUID) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 27 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.

/**
 * Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
 * and second is partially rebalanced.
 *
 * First cache map request will trigger client compatible remap for pessimistic txs,
 * second cache map request should use new topology version.
 *
 * For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
 * assignments.
 *
 * Success: tx is finished on ideal topology version over all mapped nodes.
 *
 * @param concurrency Concurrency.
 * @param isolation Isolation.
 */
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
    try {
        IgniteEx crd = startGrid(0);
        IgniteEx g1 = startGrid(1);
        awaitPartitionMapExchange();
        IgniteEx client = startClientGrid("client");
        assertNotNull(client.cache(CACHE1));
        assertNotNull(client.cache(CACHE2));
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
        final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
        AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
        AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
        AtomicReference<Set<Integer>> full = new AtomicReference<>();
        GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
        crdSpi.blockMessages((node, m) -> {
            if (m instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
                // Allow full rebalance for cache 1 and system cache.
                if (msg.groupId() != CU.cacheId(CACHE2))
                    return false;
                // Allow only first batch for cache 2.
                if (msg.topologyVersion().equals(joinVer)) {
                    if (full.get() == null) {
                        Map<Integer, Long> last = U.field(msg, "last");
                        full.set(last.keySet());
                        return false;
                    }
                    return true;
                }
                if (msg.topologyVersion().equals(leftVer)) {
                    Map<Integer, Long> last = U.field(msg, "last");
                    leftVerParts.addAll(last.keySet());
                    return true;
                }
            } else if (m instanceof GridDhtPartitionsFullMessage) {
                GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
                // Delay full message for ideal topology switch.
                GridDhtPartitionExchangeId exchId = msg.exchangeId();
                if (exchId != null && exchId.topologyVersion().equals(idealVer))
                    return true;
            }
            return false;
        });
        TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
        g1Spi.blockMessages((node, msg) -> {
            if (msg instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
                return m.groupId() == CU.cacheId(CACHE2);
            }
            return false;
        });
        startGrid(2);
        crdSpi.waitForBlocked();
        g1Spi.waitForBlocked();
        // Wait partial owning.
        assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
            // Await full rebalance for cache 2.
            GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
            for (int p = 0; p < PARTS_CNT; p++) {
                if (top0.localPartition(p).state() != OWNING)
                    return false;
            }
            // Await partial rebalance for cache 1.
            GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
            for (Integer part : full.get()) {
                if (top1.localPartition(part).state() != OWNING)
                    return false;
            }
            return true;
        }, 10_000));
        // At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
        // Stop supplier in the middle of rebalance.
        g1.close();
        // Wait for topologies and calculate required partitions.
        grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
        AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
        // Search for a partition with incompatible assignment.
        // Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
        int stablePart = -1;
        // Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
        int movingPart = -1;
        for (int p = 0; p < assignment0.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (curr.equals(ideal) && curr.get(0).order() == 1) {
                stablePart = p;
                break;
            }
        }
        assertFalse(stablePart == -1);
        for (int p = 0; p < assignment.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (!curr.equals(ideal) && curr.get(0).order() == 1) {
                movingPart = p;
                break;
            }
        }
        assertFalse(movingPart == -1);
        TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {

            @Override
            public boolean apply(ClusterNode node, Message msg) {
                if (concurrency == PESSIMISTIC)
                    return msg instanceof GridNearLockRequest;
                else
                    return msg instanceof GridNearTxPrepareRequest;
            }
        });
        final int finalStablePart = stablePart;
        final int finalMovingPart = movingPart;
        IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
            try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
                // Will map on crd(order=1).
                client.cache(CACHE1).put(finalStablePart, 0);
                // Next request will remap to ideal topology, but it's not ready on other node except crd.
                client.cache(CACHE2).put(finalMovingPart, 0);
                tx.commit();
            }
        }, 1, "tx-thread");
        // Wait until all missing supply messages are blocked.
        assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
        // Delay first lock request on late topology.
        TestRecordingCommunicationSpi.spi(client).waitForBlocked();
        // At this point only supply messages should be blocked.
        // Unblock to continue rebalance and trigger ideal topology switch.
        crdSpi.stopBlock(true, null, false, true);
        // Wait until ideal topology is ready on crd.
        crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
        // Other node must wait for full message.
        assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
        // Map on unstable topology (PME is in progress on other node).
        TestRecordingCommunicationSpi.spi(client).stopBlock();
        // Capture local transaction.
        IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
        // Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
        try {
            txFut.get(3_000);
            fail("TX must not complete");
        } catch (IgniteFutureTimeoutCheckedException e) {
        // Expected.
        }
        crdSpi.stopBlock();
        txFut.get();
        // Check transaction map version. Should be mapped on ideal topology.
        assertEquals(tx0.topologyVersionSnapshot(), idealVer);
        awaitPartitionMapExchange();
        checkFutures();
    } finally {
        stopAllGrids();
    }
}
Also used : AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet) Set(java.util.Set) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) Message(org.apache.ignite.plugin.extensions.communication.Message) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) GridNearLockRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) AtomicReference(java.util.concurrent.atomic.AtomicReference) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) TestRecordingCommunicationSpi(org.apache.ignite.internal.TestRecordingCommunicationSpi) Transaction(org.apache.ignite.transactions.Transaction) IgniteEx(org.apache.ignite.internal.IgniteEx) GridNearTxPrepareRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxPrepareRequest) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet)

Example 28 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class IgniteCacheClientNodePartitionsExchangeTest method waitForTopologyUpdate.

/**
 * @param expNodes Expected number of nodes.
 * @param topVer Expected topology version.
 * @throws Exception If failed.
 */
private void waitForTopologyUpdate(int expNodes, final AffinityTopologyVersion topVer) throws Exception {
    List<Ignite> nodes = G.allGrids();
    assertEquals(expNodes, nodes.size());
    for (Ignite ignite : nodes) {
        final IgniteKernal kernal = (IgniteKernal) ignite;
        GridTestUtils.waitForCondition(new GridAbsPredicate() {

            @Override
            public boolean apply() {
                return topVer.equals(kernal.context().cache().context().exchange().readyAffinityVersion());
            }
        }, 10_000);
        assertEquals("Unexpected affinity version for " + ignite.name(), topVer, kernal.context().cache().context().exchange().readyAffinityVersion());
    }
    Iterator<Ignite> it = nodes.iterator();
    Ignite ignite0 = it.next();
    Affinity<Integer> aff0 = ignite0.affinity(DEFAULT_CACHE_NAME);
    while (it.hasNext()) {
        Ignite ignite = it.next();
        Affinity<Integer> aff = ignite.affinity(DEFAULT_CACHE_NAME);
        assertEquals(aff0.partitions(), aff.partitions());
        for (int part = 0; part < aff.partitions(); part++) assertEquals(aff0.mapPartitionToPrimaryAndBackups(part), aff.mapPartitionToPrimaryAndBackups(part));
    }
    for (Ignite ignite : nodes) {
        final IgniteKernal kernal = (IgniteKernal) ignite;
        for (IgniteInternalCache cache : kernal.context().cache().caches()) {
            GridDhtPartitionTopology top = cache.context().topology();
            waitForReadyTopology(top, topVer);
            assertEquals("Unexpected topology version [node=" + ignite.name() + ", cache=" + cache.name() + ']', topVer, top.readyTopologyVersion());
        }
    }
    awaitPartitionMapExchange();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteKernal(org.apache.ignite.internal.IgniteKernal) GridAbsPredicate(org.apache.ignite.internal.util.lang.GridAbsPredicate) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteInternalCache(org.apache.ignite.internal.processors.cache.IgniteInternalCache) Ignite(org.apache.ignite.Ignite)

Example 29 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class IgnitePdsPartitionsStateRecoveryTest method testPartitionsStateConsistencyAfterRecoveryNoCheckpoints.

/**
 * Test checks that partition state is recovered properly if only logical updates exist.
 *
 * @throws Exception If failed.
 */
@Test
public void testPartitionsStateConsistencyAfterRecoveryNoCheckpoints() throws Exception {
    Assume.assumeFalse("https://issues.apache.org/jira/browse/IGNITE-10603", MvccFeatureChecker.forcedMvcc());
    IgniteEx ignite = startGrid(0);
    ignite.cluster().active(true);
    IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
    forceCheckpoint();
    for (int key = 0; key < 4096; key++) {
        int[] payload = new int[4096];
        Arrays.fill(payload, key);
        cache.put(key, payload);
    }
    GridDhtPartitionTopology topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
    Assert.assertFalse(topology.hasMovingPartitions());
    log.info("Stopping grid...");
    stopGrid(0);
    ignite = startGrid(0);
    awaitPartitionMapExchange();
    topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
    Assert.assertFalse("Node restored moving partitions after join to topology.", topology.hasMovingPartitions());
}
Also used : GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteEx(org.apache.ignite.internal.IgniteEx) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 30 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class IgnitePdsPartitionsStateRecoveryTest method testPartitionsStateConsistencyAfterRecovery.

/**
 * Test checks that partition state is recovered properly if last checkpoint was skipped and there are logical updates to apply.
 *
 * @throws Exception If failed.
 */
@Test
public void testPartitionsStateConsistencyAfterRecovery() throws Exception {
    IgniteEx ignite = startGrid(0);
    ignite.cluster().active(true);
    IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
    for (int key = 0; key < 4096; key++) cache.put(key, key);
    forceCheckpoint();
    for (int key = 0; key < 4096; key++) {
        int[] payload = new int[4096];
        Arrays.fill(payload, key);
        cache.put(key, payload);
    }
    GridDhtPartitionTopology topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
    Assert.assertFalse(topology.hasMovingPartitions());
    log.info("Stopping grid...");
    stopGrid(0);
    ignite = startGrid(0);
    awaitPartitionMapExchange();
    topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
    Assert.assertFalse("Node restored moving partitions after join to topology.", topology.hasMovingPartitions());
}
Also used : GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteEx(org.apache.ignite.internal.IgniteEx) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Aggregations

GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)64 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)24 GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)21 ClusterNode (org.apache.ignite.cluster.ClusterNode)20 Map (java.util.Map)18 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)18 CacheGroupContext (org.apache.ignite.internal.processors.cache.CacheGroupContext)17 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)14 Ignite (org.apache.ignite.Ignite)14 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)12 Test (org.junit.Test)12 IgniteEx (org.apache.ignite.internal.IgniteEx)11 UUID (java.util.UUID)10 IgniteKernal (org.apache.ignite.internal.IgniteKernal)10 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 IgniteException (org.apache.ignite.IgniteException)9 GridCacheContext (org.apache.ignite.internal.processors.cache.GridCacheContext)9 GridDhtPartitionMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)9 HashSet (java.util.HashSet)8