Search in sources :

Example 6 with GridDhtPartitionExchangeId

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId in project ignite by apache.

the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.

/**
 * Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
 * and second is partially rebalanced.
 *
 * First cache map request will trigger client compatible remap for pessimistic txs,
 * second cache map request should use new topology version.
 *
 * For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
 * assignments.
 *
 * Success: tx is finished on ideal topology version over all mapped nodes.
 *
 * @param concurrency Concurrency.
 * @param isolation Isolation.
 */
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
    try {
        IgniteEx crd = startGrid(0);
        IgniteEx g1 = startGrid(1);
        awaitPartitionMapExchange();
        IgniteEx client = startClientGrid("client");
        assertNotNull(client.cache(CACHE1));
        assertNotNull(client.cache(CACHE2));
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
            // Put 500 keys per partition.
            for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
        }
        TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
        final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
        AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
        AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
        AtomicReference<Set<Integer>> full = new AtomicReference<>();
        GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
        crdSpi.blockMessages((node, m) -> {
            if (m instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
                // Allow full rebalance for cache 1 and system cache.
                if (msg.groupId() != CU.cacheId(CACHE2))
                    return false;
                // Allow only first batch for cache 2.
                if (msg.topologyVersion().equals(joinVer)) {
                    if (full.get() == null) {
                        Map<Integer, Long> last = U.field(msg, "last");
                        full.set(last.keySet());
                        return false;
                    }
                    return true;
                }
                if (msg.topologyVersion().equals(leftVer)) {
                    Map<Integer, Long> last = U.field(msg, "last");
                    leftVerParts.addAll(last.keySet());
                    return true;
                }
            } else if (m instanceof GridDhtPartitionsFullMessage) {
                GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
                // Delay full message for ideal topology switch.
                GridDhtPartitionExchangeId exchId = msg.exchangeId();
                if (exchId != null && exchId.topologyVersion().equals(idealVer))
                    return true;
            }
            return false;
        });
        TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
        g1Spi.blockMessages((node, msg) -> {
            if (msg instanceof GridDhtPartitionSupplyMessage) {
                GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
                return m.groupId() == CU.cacheId(CACHE2);
            }
            return false;
        });
        startGrid(2);
        crdSpi.waitForBlocked();
        g1Spi.waitForBlocked();
        // Wait partial owning.
        assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
            // Await full rebalance for cache 2.
            GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
            for (int p = 0; p < PARTS_CNT; p++) {
                if (top0.localPartition(p).state() != OWNING)
                    return false;
            }
            // Await partial rebalance for cache 1.
            GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
            for (Integer part : full.get()) {
                if (top1.localPartition(part).state() != OWNING)
                    return false;
            }
            return true;
        }, 10_000));
        // At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
        // Stop supplier in the middle of rebalance.
        g1.close();
        // Wait for topologies and calculate required partitions.
        grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
        grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
        AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
        AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
        // Search for a partition with incompatible assignment.
        // Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
        int stablePart = -1;
        // Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
        int movingPart = -1;
        for (int p = 0; p < assignment0.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (curr.equals(ideal) && curr.get(0).order() == 1) {
                stablePart = p;
                break;
            }
        }
        assertFalse(stablePart == -1);
        for (int p = 0; p < assignment.assignment().size(); p++) {
            List<ClusterNode> curr = assignment.assignment().get(p);
            List<ClusterNode> ideal = assignment.idealAssignment().get(p);
            if (!curr.equals(ideal) && curr.get(0).order() == 1) {
                movingPart = p;
                break;
            }
        }
        assertFalse(movingPart == -1);
        TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {

            @Override
            public boolean apply(ClusterNode node, Message msg) {
                if (concurrency == PESSIMISTIC)
                    return msg instanceof GridNearLockRequest;
                else
                    return msg instanceof GridNearTxPrepareRequest;
            }
        });
        final int finalStablePart = stablePart;
        final int finalMovingPart = movingPart;
        IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
            try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
                // Will map on crd(order=1).
                client.cache(CACHE1).put(finalStablePart, 0);
                // Next request will remap to ideal topology, but it's not ready on other node except crd.
                client.cache(CACHE2).put(finalMovingPart, 0);
                tx.commit();
            }
        }, 1, "tx-thread");
        // Wait until all missing supply messages are blocked.
        assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
        // Delay first lock request on late topology.
        TestRecordingCommunicationSpi.spi(client).waitForBlocked();
        // At this point only supply messages should be blocked.
        // Unblock to continue rebalance and trigger ideal topology switch.
        crdSpi.stopBlock(true, null, false, true);
        // Wait until ideal topology is ready on crd.
        crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
        // Other node must wait for full message.
        assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
        // Map on unstable topology (PME is in progress on other node).
        TestRecordingCommunicationSpi.spi(client).stopBlock();
        // Capture local transaction.
        IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
        // Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
        try {
            txFut.get(3_000);
            fail("TX must not complete");
        } catch (IgniteFutureTimeoutCheckedException e) {
        // Expected.
        }
        crdSpi.stopBlock();
        txFut.get();
        // Check transaction map version. Should be mapped on ideal topology.
        assertEquals(tx0.topologyVersionSnapshot(), idealVer);
        awaitPartitionMapExchange();
        checkFutures();
    } finally {
        stopAllGrids();
    }
}
Also used : AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet) Set(java.util.Set) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) Message(org.apache.ignite.plugin.extensions.communication.Message) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) GridNearLockRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) GridDhtPartitionsFullMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsFullMessage) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) AtomicReference(java.util.concurrent.atomic.AtomicReference) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) TestRecordingCommunicationSpi(org.apache.ignite.internal.TestRecordingCommunicationSpi) Transaction(org.apache.ignite.transactions.Transaction) IgniteEx(org.apache.ignite.internal.IgniteEx) GridNearTxPrepareRequest(org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxPrepareRequest) GridConcurrentSkipListSet(org.apache.ignite.internal.util.GridConcurrentSkipListSet)

Example 7 with GridDhtPartitionExchangeId

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId in project ignite by apache.

the class IgniteClusterSnapshotSelfTest method testClusterSnapshotOnMovingPartitionsCoordinatorLeft.

/**
 * @throws Exception If fails.
 */
@Test
public void testClusterSnapshotOnMovingPartitionsCoordinatorLeft() throws Exception {
    startGridsWithCache(2, dfltCacheCfg, CACHE_KEYS_RANGE);
    for (Ignite grid : G.allGrids()) {
        TestRecordingCommunicationSpi.spi(grid).blockMessages((node, msg) -> msg instanceof GridDhtPartitionSupplyMessage);
    }
    Ignite ignite = startGrid(2);
    ignite.cluster().setBaselineTopology(ignite.cluster().topologyVersion());
    TestRecordingCommunicationSpi.spi(grid(0)).waitForBlocked();
    CountDownLatch latch = new CountDownLatch(G.allGrids().size());
    IgniteInternalFuture<?> stopFut = GridTestUtils.runAsync(() -> {
        try {
            U.await(latch);
            stopGrid(0);
        } catch (IgniteInterruptedCheckedException e) {
            fail("Must not fail here: " + e.getMessage());
        }
    });
    Queue<T2<GridDhtPartitionExchangeId, Boolean>> exchFuts = new ConcurrentLinkedQueue<>();
    for (Ignite ig : G.allGrids()) {
        ((IgniteEx) ig).context().cache().context().exchange().registerExchangeAwareComponent(new PartitionsExchangeAware() {

            /**
             * {@inheritDoc}
             */
            @Override
            public void onInitBeforeTopologyLock(GridDhtPartitionsExchangeFuture fut) {
                if (!(fut.firstEvent() instanceof DiscoveryCustomEvent))
                    return;
                try {
                    exchFuts.add(new T2<>(fut.exchangeId(), fut.rebalanced()));
                    latch.countDown();
                    stopFut.get();
                } catch (IgniteCheckedException e) {
                    U.log(log, "Interrupted on coordinator: " + e.getMessage());
                }
            }
        });
    }
    IgniteFuture<Void> fut = ignite.snapshot().createSnapshot(SNAPSHOT_NAME);
    stopFut.get();
    assertThrowsAnyCause(log, fut::get, IgniteException.class, "Snapshot creation has been finished with an error");
    assertEquals("Snapshot futures expected: " + exchFuts, 3, exchFuts.size());
    for (T2<GridDhtPartitionExchangeId, Boolean> exch : exchFuts) assertFalse("Snapshot `rebalanced` must be false with moving partitions: " + exch.get1(), exch.get2());
}
Also used : GridDhtPartitionsExchangeFuture(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) CountDownLatch(java.util.concurrent.CountDownLatch) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) DiscoveryCustomEvent(org.apache.ignite.internal.events.DiscoveryCustomEvent) PartitionsExchangeAware(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteEx(org.apache.ignite.internal.IgniteEx) Ignite(org.apache.ignite.Ignite) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) T2(org.apache.ignite.internal.util.typedef.T2) Test(org.junit.Test)

Example 8 with GridDhtPartitionExchangeId

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId in project ignite by apache.

the class GridCachePartitionExchangeManager method onKernalStart0.

/** {@inheritDoc} */
@Override
protected void onKernalStart0(boolean reconnect) throws IgniteCheckedException {
    super.onKernalStart0(reconnect);
    ClusterNode loc = cctx.localNode();
    long startTime = loc.metrics().getStartTime();
    assert startTime > 0;
    // Generate dummy discovery event for local node joining.
    T2<DiscoveryEvent, DiscoCache> locJoin = cctx.discovery().localJoin();
    DiscoveryEvent discoEvt = locJoin.get1();
    DiscoCache discoCache = locJoin.get2();
    GridDhtPartitionExchangeId exchId = initialExchangeId();
    GridDhtPartitionsExchangeFuture fut = exchangeFuture(exchId, discoEvt, discoCache, null, null);
    if (reconnect)
        reconnectExchangeFut = new GridFutureAdapter<>();
    exchWorker.addFirstExchangeFuture(fut);
    if (!cctx.kernalContext().clientNode()) {
        for (int cnt = 0; cnt < cctx.gridConfig().getRebalanceThreadPoolSize(); cnt++) {
            final int idx = cnt;
            cctx.io().addOrderedHandler(rebalanceTopic(cnt), new CI2<UUID, GridCacheMessage>() {

                @Override
                public void apply(final UUID id, final GridCacheMessage m) {
                    if (!enterBusy())
                        return;
                    try {
                        GridCacheContext cacheCtx = cctx.cacheContext(m.cacheId);
                        if (cacheCtx != null) {
                            if (m instanceof GridDhtPartitionSupplyMessage)
                                cacheCtx.preloader().handleSupplyMessage(idx, id, (GridDhtPartitionSupplyMessage) m);
                            else if (m instanceof GridDhtPartitionDemandMessage)
                                cacheCtx.preloader().handleDemandMessage(idx, id, (GridDhtPartitionDemandMessage) m);
                            else
                                U.error(log, "Unsupported message type: " + m.getClass().getName());
                        }
                    } finally {
                        leaveBusy();
                    }
                }
            });
        }
    }
    new IgniteThread(cctx.igniteInstanceName(), "exchange-worker", exchWorker).start();
    if (reconnect) {
        fut.listen(new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {

            @Override
            public void apply(IgniteInternalFuture<AffinityTopologyVersion> fut) {
                try {
                    fut.get();
                    for (GridCacheContext cacheCtx : cctx.cacheContexts()) cacheCtx.preloader().onInitialExchangeComplete(null);
                    reconnectExchangeFut.onDone();
                } catch (IgniteCheckedException e) {
                    for (GridCacheContext cacheCtx : cctx.cacheContexts()) cacheCtx.preloader().onInitialExchangeComplete(e);
                    reconnectExchangeFut.onDone(e);
                }
            }
        });
    } else {
        if (log.isDebugEnabled())
            log.debug("Beginning to wait on local exchange future: " + fut);
        boolean first = true;
        while (true) {
            try {
                fut.get(cctx.preloadExchangeTimeout());
                break;
            } catch (IgniteFutureTimeoutCheckedException ignored) {
                if (first) {
                    U.warn(log, "Failed to wait for initial partition map exchange. " + "Possible reasons are: " + U.nl() + "  ^-- Transactions in deadlock." + U.nl() + "  ^-- Long running transactions (ignore if this is the case)." + U.nl() + "  ^-- Unreleased explicit locks.");
                    first = false;
                } else
                    U.warn(log, "Still waiting for initial partition map exchange [fut=" + fut + ']');
            } catch (IgniteNeedReconnectException e) {
                throw e;
            } catch (Exception e) {
                if (fut.reconnectOnError(e))
                    throw new IgniteNeedReconnectException(cctx.localNode(), e);
                throw e;
            }
        }
        AffinityTopologyVersion nodeStartVer = new AffinityTopologyVersion(discoEvt.topologyVersion(), 0);
        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
            if (nodeStartVer.equals(cacheCtx.startTopologyVersion()))
                cacheCtx.preloader().onInitialExchangeComplete(null);
        }
        if (log.isDebugEnabled())
            log.debug("Finished waiting for initial exchange: " + fut.exchangeId());
    }
}
Also used : DiscoCache(org.apache.ignite.internal.managers.discovery.DiscoCache) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) UUID(java.util.UUID) IgniteNeedReconnectException(org.apache.ignite.internal.IgniteNeedReconnectException) ClusterNode(org.apache.ignite.cluster.ClusterNode) GridDhtPartitionsExchangeFuture(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) GridDhtPartitionSupplyMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteNeedReconnectException(org.apache.ignite.internal.IgniteNeedReconnectException) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) IgniteThread(org.apache.ignite.thread.IgniteThread) GridDhtPartitionDemandMessage(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage)

Example 9 with GridDhtPartitionExchangeId

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId in project ignite by apache.

the class GridDhtPartitionTopologyImpl method initPartitions0.

/**
     * @param exchFut Exchange future.
     * @param updateSeq Update sequence.
     */
private void initPartitions0(GridDhtPartitionsExchangeFuture exchFut, long updateSeq) {
    ClusterNode loc = cctx.localNode();
    ClusterNode oldest = discoCache.oldestAliveServerNodeWithCache();
    GridDhtPartitionExchangeId exchId = exchFut.exchangeId();
    assert topVer.equals(exchFut.topologyVersion()) : "Invalid topology [topVer=" + topVer + ", cache=" + cctx.name() + ", futVer=" + exchFut.topologyVersion() + ", fut=" + exchFut + ']';
    assert cctx.affinity().affinityTopologyVersion().equals(exchFut.topologyVersion()) : "Invalid affinity [topVer=" + cctx.affinity().affinityTopologyVersion() + ", cache=" + cctx.name() + ", futVer=" + exchFut.topologyVersion() + ", fut=" + exchFut + ']';
    List<List<ClusterNode>> aff = cctx.affinity().assignments(exchFut.topologyVersion());
    int num = cctx.affinity().partitions();
    if (cctx.rebalanceEnabled()) {
        boolean added = exchFut.cacheAddedOnExchange(cctx.cacheId(), cctx.receivedFrom());
        boolean first = (loc.equals(oldest) && loc.id().equals(exchId.nodeId()) && exchId.isJoined()) || added;
        if (first) {
            assert exchId.isJoined() || added;
            for (int p = 0; p < num; p++) {
                if (localNode(p, aff)) {
                    GridDhtLocalPartition locPart = createPartition(p);
                    boolean owned = locPart.own();
                    assert owned : "Failed to own partition for oldest node [cacheName" + cctx.name() + ", part=" + locPart + ']';
                    if (log.isDebugEnabled())
                        log.debug("Owned partition for oldest node: " + locPart);
                    updateSeq = updateLocal(p, locPart.state(), updateSeq);
                }
            }
        } else
            createPartitions(aff, updateSeq);
    } else {
        // the partitions this node is not responsible for.
        for (int p = 0; p < num; p++) {
            GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);
            boolean belongs = localNode(p, aff);
            if (locPart != null) {
                if (!belongs) {
                    GridDhtPartitionState state = locPart.state();
                    if (state.active()) {
                        locPart.rent(false);
                        updateSeq = updateLocal(p, locPart.state(), updateSeq);
                        if (log.isDebugEnabled())
                            log.debug("Evicting partition with rebalancing disabled " + "(it does not belong to affinity): " + locPart);
                    }
                } else
                    locPart.own();
            } else if (belongs) {
                locPart = createPartition(p);
                locPart.own();
                updateLocal(p, locPart.state(), updateSeq);
            }
        }
    }
    if (node2part != null && node2part.valid())
        checkEvictions(updateSeq, aff);
    updateRebalanceVersion(aff);
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) List(java.util.List) ArrayList(java.util.ArrayList)

Example 10 with GridDhtPartitionExchangeId

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId in project ignite by apache.

the class GridDhtPartitionTopologyImpl method initPartitions0.

/**
 * @param affVer Affinity version to use.
 * @param exchFut Exchange future.
 * @param updateSeq Update sequence.
 * @return {@code True} if partitions must be refreshed.
 */
private boolean initPartitions0(AffinityTopologyVersion affVer, GridDhtPartitionsExchangeFuture exchFut, long updateSeq) {
    List<List<ClusterNode>> aff = grp.affinity().readyAssignments(affVer);
    boolean needRefresh = false;
    if (grp.affinityNode()) {
        ClusterNode loc = ctx.localNode();
        ClusterNode oldest = discoCache.oldestAliveServerNode();
        GridDhtPartitionExchangeId exchId = exchFut.exchangeId();
        assert grp.affinity().lastVersion().equals(affVer) : "Invalid affinity [topVer=" + grp.affinity().lastVersion() + ", grp=" + grp.cacheOrGroupName() + ", affVer=" + affVer + ", fut=" + exchFut + ']';
        int num = grp.affinity().partitions();
        if (grp.rebalanceEnabled()) {
            boolean added = exchFut.cacheGroupAddedOnExchange(grp.groupId(), grp.receivedFrom());
            boolean first = added || (loc.equals(oldest) && loc.id().equals(exchId.nodeId()) && exchId.isJoined());
            if (first) {
                assert exchId.isJoined() || added;
                for (int p = 0; p < num; p++) {
                    if (localNode(p, aff) || initLocalPartition(p, discoCache)) {
                        GridDhtLocalPartition locPart = createPartition(p);
                        if (grp.persistenceEnabled()) {
                            GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) grp.shared().database();
                            locPart.restoreState(db.readPartitionState(grp, locPart.id()));
                        } else {
                            boolean owned = locPart.own();
                            assert owned : "Failed to own partition for oldest node [grp=" + grp.cacheOrGroupName() + ", part=" + locPart + ']';
                            if (log.isDebugEnabled())
                                log.debug("Owned partition for oldest node [grp=" + grp.cacheOrGroupName() + ", part=" + locPart + ']');
                        }
                        needRefresh = true;
                        updateSeq = updateLocal(p, locPart.state(), updateSeq, affVer);
                    }
                }
            } else
                createPartitions(affVer, aff, updateSeq);
        } else {
            // the partitions this node is not responsible for.
            for (int p = 0; p < num; p++) {
                GridDhtLocalPartition locPart = localPartition0(p, affVer, false, true, false);
                boolean belongs = localNode(p, aff);
                if (locPart != null) {
                    if (!belongs) {
                        GridDhtPartitionState state = locPart.state();
                        if (state.active()) {
                            locPart.rent(false);
                            updateSeq = updateLocal(p, locPart.state(), updateSeq, affVer);
                            if (log.isDebugEnabled()) {
                                log.debug("Evicting partition with rebalancing disabled (it does not belong to " + "affinity) [grp=" + grp.cacheOrGroupName() + ", part=" + locPart + ']');
                            }
                        }
                    } else
                        locPart.own();
                } else if (belongs) {
                    locPart = createPartition(p);
                    locPart.own();
                    updateLocal(p, locPart.state(), updateSeq, affVer);
                }
            }
        }
    }
    updateRebalanceVersion(aff);
    return needRefresh;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridCacheDatabaseSharedManager(org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager) List(java.util.List) ArrayList(java.util.ArrayList) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId)

Aggregations

GridDhtPartitionExchangeId (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId)12 ClusterNode (org.apache.ignite.cluster.ClusterNode)10 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)5 GridDhtPartitionsExchangeFuture (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture)5 DiscoveryEvent (org.apache.ignite.events.DiscoveryEvent)4 IgniteInternalFuture (org.apache.ignite.internal.IgniteInternalFuture)4 GridDhtPartitionSupplyMessage (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage)4 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)3 IgniteEx (org.apache.ignite.internal.IgniteEx)3 IgniteFutureTimeoutCheckedException (org.apache.ignite.internal.IgniteFutureTimeoutCheckedException)3 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 UUID (java.util.UUID)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 IgniteClientDisconnectedCheckedException (org.apache.ignite.internal.IgniteClientDisconnectedCheckedException)2 IgniteNeedReconnectException (org.apache.ignite.internal.IgniteNeedReconnectException)2 TestRecordingCommunicationSpi (org.apache.ignite.internal.TestRecordingCommunicationSpi)2 ClusterTopologyCheckedException (org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)2 DiscoveryCustomEvent (org.apache.ignite.internal.events.DiscoveryCustomEvent)2