Search in sources :

Example 6 with AffinityAssignment

use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.

the class GridDhtPartitionDemander method handleSupplyMessage.

/**
 * Handles supply message from {@code nodeId} with specified {@code topicId}.
 *
 * Supply message contains entries to populate rebalancing partitions.
 *
 * There is a cyclic process:
 * Populate rebalancing partitions with entries from Supply message.
 * If not all partitions specified in {@link #rebalanceFut} were rebalanced or marked as missed
 * send new Demand message to request next batch of entries.
 *
 * @param topicId Topic id.
 * @param nodeId Node id.
 * @param supply Supply message.
 */
public void handleSupplyMessage(int topicId, final UUID nodeId, final GridDhtPartitionSupplyMessage supply) {
    AffinityTopologyVersion topVer = supply.topologyVersion();
    final RebalanceFuture fut = rebalanceFut;
    ClusterNode node = ctx.node(nodeId);
    if (node == null)
        return;
    if (// Topology already changed (for the future that supply message based on).
    topologyChanged(fut))
        return;
    if (!fut.isActual(supply.rebalanceId())) {
        // Supple message based on another future.
        return;
    }
    if (log.isDebugEnabled())
        log.debug("Received supply message [grp=" + grp.cacheOrGroupName() + ", msg=" + supply + ']');
    // Check whether there were class loading errors on unmarshal
    if (supply.classError() != null) {
        U.warn(log, "Rebalancing from node cancelled [grp=" + grp.cacheOrGroupName() + ", node=" + nodeId + "]. Class got undeployed during preloading: " + supply.classError());
        fut.cancel(nodeId);
        return;
    }
    final GridDhtPartitionTopology top = grp.topology();
    if (grp.sharedGroup()) {
        for (GridCacheContext cctx : grp.caches()) {
            if (cctx.statisticsEnabled()) {
                long keysCnt = supply.keysForCache(cctx.cacheId());
                if (keysCnt != -1)
                    cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(keysCnt);
                // Can not be calculated per cache.
                cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
            }
        }
    } else {
        GridCacheContext cctx = grp.singleCacheContext();
        if (cctx.statisticsEnabled()) {
            if (supply.estimatedKeysCount() != -1)
                cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(supply.estimatedKeysCount());
            cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
        }
    }
    try {
        AffinityAssignment aff = grp.affinity().cachedAffinity(topVer);
        GridCacheContext cctx = grp.sharedGroup() ? null : grp.singleCacheContext();
        // Preload.
        for (Map.Entry<Integer, CacheEntryInfoCollection> e : supply.infos().entrySet()) {
            int p = e.getKey();
            if (aff.get(p).contains(ctx.localNode())) {
                GridDhtLocalPartition part = top.localPartition(p, topVer, true);
                assert part != null;
                boolean last = supply.last().containsKey(p);
                if (part.state() == MOVING) {
                    boolean reserved = part.reserve();
                    assert reserved : "Failed to reserve partition [igniteInstanceName=" + ctx.igniteInstanceName() + ", grp=" + grp.cacheOrGroupName() + ", part=" + part + ']';
                    part.lock();
                    try {
                        // Loop through all received entries and try to preload them.
                        for (GridCacheEntryInfo entry : e.getValue().infos()) {
                            if (!preloadEntry(node, p, entry, topVer)) {
                                if (log.isDebugEnabled())
                                    log.debug("Got entries for invalid partition during " + "preloading (will skip) [p=" + p + ", entry=" + entry + ']');
                                break;
                            }
                            if (grp.sharedGroup() && (cctx == null || cctx.cacheId() != entry.cacheId()))
                                cctx = ctx.cacheContext(entry.cacheId());
                            if (cctx != null && cctx.statisticsEnabled())
                                cctx.cache().metrics0().onRebalanceKeyReceived();
                        }
                        // then we take ownership.
                        if (last) {
                            top.own(part);
                            fut.partitionDone(nodeId, p);
                            if (log.isDebugEnabled())
                                log.debug("Finished rebalancing partition: " + part);
                        }
                    } finally {
                        part.unlock();
                        part.release();
                    }
                } else {
                    if (last)
                        fut.partitionDone(nodeId, p);
                    if (log.isDebugEnabled())
                        log.debug("Skipping rebalancing partition (state is not MOVING): " + part);
                }
            } else {
                fut.partitionDone(nodeId, p);
                if (log.isDebugEnabled())
                    log.debug("Skipping rebalancing partition (it does not belong on current node): " + p);
            }
        }
        // Only request partitions based on latest topology version.
        for (Integer miss : supply.missed()) {
            if (aff.get(miss).contains(ctx.localNode()))
                fut.partitionMissed(nodeId, miss);
        }
        for (Integer miss : supply.missed()) fut.partitionDone(nodeId, miss);
        GridDhtPartitionDemandMessage d = new GridDhtPartitionDemandMessage(supply.rebalanceId(), supply.topologyVersion(), grp.groupId());
        d.timeout(grp.config().getRebalanceTimeout());
        d.topic(rebalanceTopics.get(topicId));
        if (!topologyChanged(fut) && !fut.isDone()) {
            // Send demand message.
            try {
                ctx.io().sendOrderedMessage(node, rebalanceTopics.get(topicId), d.convertIfNeeded(node.version()), grp.ioPolicy(), grp.config().getRebalanceTimeout());
            } catch (ClusterTopologyCheckedException e) {
                if (log.isDebugEnabled()) {
                    log.debug("Node left during rebalancing [grp=" + grp.cacheOrGroupName() + ", node=" + node.id() + ", msg=" + e.getMessage() + ']');
                }
            }
        }
    } catch (IgniteSpiException | IgniteCheckedException e) {
        LT.error(log, e, "Error during rebalancing [grp=" + grp.cacheOrGroupName() + ", srcNode=" + node.id() + ", err=" + e + ']');
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridCacheEntryInfo(org.apache.ignite.internal.processors.cache.GridCacheEntryInfo) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) CacheEntryInfoCollection(org.apache.ignite.internal.processors.cache.CacheEntryInfoCollection) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) Map(java.util.Map) HashMap(java.util.HashMap) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 7 with AffinityAssignment

use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.

the class CacheGroupMetricsImpl method getAffinityPartitionsAssignmentMap.

/**
 */
public Map<Integer, List<String>> getAffinityPartitionsAssignmentMap() {
    if (ctx.affinity().lastVersion().topologyVersion() < 0)
        return Collections.emptyMap();
    AffinityAssignment assignment = ctx.affinity().cachedAffinity(AffinityTopologyVersion.NONE);
    int part = 0;
    Map<Integer, List<String>> assignmentMap = new LinkedHashMap<>();
    for (List<ClusterNode> partAssignment : assignment.assignment()) {
        List<String> partNodeIds = new ArrayList<>(partAssignment.size());
        for (ClusterNode node : partAssignment) partNodeIds.add(node.id().toString());
        assignmentMap.put(part, partNodeIds);
        part++;
    }
    return assignmentMap;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with AffinityAssignment

use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.

the class GridDhtPartitionTopologyImpl method beforeExchange.

/**
 * {@inheritDoc}
 */
@Override
public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut, boolean affReady, boolean updateMoving) throws IgniteCheckedException {
    ctx.database().checkpointReadLock();
    try {
        U.writeLock(lock);
        try {
            if (stopping)
                return;
            assert lastTopChangeVer.equals(exchFut.initialVersion()) : "Invalid topology version [topVer=" + lastTopChangeVer + ", exchId=" + exchFut.exchangeId() + ']';
            ExchangeDiscoveryEvents evts = exchFut.context().events();
            if (affReady) {
                assert grp.affinity().lastVersion().equals(evts.topologyVersion()) : "Invalid affinity version [" + "grp=" + grp.cacheOrGroupName() + ", affVer=" + grp.affinity().lastVersion() + ", evtsVer=" + evts.topologyVersion() + ']';
                lastTopChangeVer = readyTopVer = evts.topologyVersion();
                discoCache = evts.discoveryCache();
            }
            if (log.isDebugEnabled()) {
                log.debug("Partition map beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
            }
            long updateSeq = this.updateSeq.incrementAndGet();
            if (exchFut.exchangeType() == ALL && !exchFut.rebalanced())
                cntrMap.clear();
            initializeFullMap(updateSeq);
            boolean grpStarted = exchFut.cacheGroupAddedOnExchange(grp.groupId(), grp.receivedFrom());
            if (evts.hasServerLeft()) {
                for (DiscoveryEvent evt : evts.events()) {
                    if (ExchangeDiscoveryEvents.serverLeftEvent(evt))
                        removeNode(evt.eventNode().id());
                }
            } else if (affReady && grpStarted && exchFut.exchangeType() == NONE) {
                assert !exchFut.context().mergeExchanges() : exchFut;
                assert node2part != null && node2part.valid() : exchFut;
                // Initialize node maps if group was started from joining client.
                final List<ClusterNode> nodes = exchFut.firstEventCache().cacheGroupAffinityNodes(grp.groupId());
                for (ClusterNode node : nodes) {
                    if (!node2part.containsKey(node.id()) && ctx.discovery().alive(node)) {
                        final GridDhtPartitionMap partMap = new GridDhtPartitionMap(node.id(), 1L, exchFut.initialVersion(), new GridPartitionStateMap(), false);
                        final AffinityAssignment aff = grp.affinity().cachedAffinity(exchFut.initialVersion());
                        for (Integer p0 : aff.primaryPartitions(node.id())) partMap.put(p0, OWNING);
                        for (Integer p0 : aff.backupPartitions(node.id())) partMap.put(p0, OWNING);
                        node2part.put(node.id(), partMap);
                    }
                }
            }
            if (grp.affinityNode()) {
                if (grpStarted || exchFut.firstEvent().type() == EVT_DISCOVERY_CUSTOM_EVT || exchFut.serverNodeDiscoveryEvent()) {
                    AffinityTopologyVersion affVer;
                    List<List<ClusterNode>> affAssignment;
                    if (affReady) {
                        affVer = evts.topologyVersion();
                        assert grp.affinity().lastVersion().equals(affVer) : "Invalid affinity [topVer=" + grp.affinity().lastVersion() + ", grp=" + grp.cacheOrGroupName() + ", affVer=" + affVer + ", fut=" + exchFut + ']';
                        affAssignment = grp.affinity().readyAssignments(affVer);
                    } else {
                        assert !exchFut.context().mergeExchanges();
                        affVer = exchFut.initialVersion();
                        affAssignment = grp.affinity().idealAssignmentRaw();
                    }
                    initPartitions(affVer, affAssignment, exchFut, updateSeq);
                }
            }
            consistencyCheck();
            if (updateMoving) {
                assert grp.affinity().lastVersion().equals(evts.topologyVersion());
                createMovingPartitions(grp.affinity().readyAffinity(evts.topologyVersion()));
            }
            if (log.isDebugEnabled()) {
                log.debug("Partition map after beforeExchange [grp=" + grp.cacheOrGroupName() + ", " + "exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
            }
            if (log.isTraceEnabled()) {
                log.trace("Partition states after beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
            }
        } finally {
            lock.writeLock().unlock();
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) GridLongList(org.apache.ignite.internal.util.GridLongList) List(java.util.List) ArrayList(java.util.ArrayList) ExchangeDiscoveryEvents(org.apache.ignite.internal.processors.cache.ExchangeDiscoveryEvents) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap)

Example 9 with AffinityAssignment

use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.

the class GridDhtPartitionTopologyImpl method nodes.

/**
 * {@inheritDoc}
 */
@Override
public List<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) {
    AffinityAssignment affAssignment = grp.affinity().cachedAffinity(topVer);
    List<ClusterNode> affNodes = affAssignment.get(p);
    List<ClusterNode> nodes = nodes0(p, affAssignment, affNodes);
    return nodes != null ? nodes : affNodes;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment)

Example 10 with AffinityAssignment

use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.

the class GridDhtPartitionTopologyImpl method afterExchange.

/**
 * {@inheritDoc}
 */
@Override
public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut) {
    boolean changed = false;
    int partitions = grp.affinity().partitions();
    AffinityTopologyVersion topVer = exchFut.context().events().topologyVersion();
    assert grp.affinity().lastVersion().equals(topVer) : "Affinity is not initialized " + "[grp=" + grp.cacheOrGroupName() + ", topVer=" + topVer + ", affVer=" + grp.affinity().lastVersion() + ", fut=" + exchFut + ']';
    ctx.database().checkpointReadLock();
    try {
        lock.writeLock().lock();
        try {
            if (stopping)
                return false;
            assert readyTopVer.initialized() : readyTopVer;
            assert lastTopChangeVer.equals(readyTopVer);
            if (log.isDebugEnabled()) {
                log.debug("Partition map before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
            }
            if (log.isTraceEnabled()) {
                log.trace("Partition states before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
            }
            long updateSeq = this.updateSeq.incrementAndGet();
            // Skip partition updates in case of not real exchange.
            if (!ctx.localNode().isClient() && exchFut.exchangeType() == ALL) {
                for (int p = 0; p < partitions; p++) {
                    GridDhtLocalPartition locPart = localPartition0(p, topVer, false, true);
                    if (partitionLocalNode(p, topVer)) {
                        // Prepare partition to rebalance if it's not happened on full map update phase.
                        if (locPart == null || locPart.state() == RENTING || locPart.state() == EVICTED)
                            locPart = rebalancePartition(p, true, exchFut);
                        GridDhtPartitionState state = locPart.state();
                        if (state == MOVING) {
                            if (grp.rebalanceEnabled()) {
                                Collection<ClusterNode> owners = owners(p);
                                // then new exchange should be started with detecting lost partitions.
                                if (!F.isEmpty(owners)) {
                                    if (log.isDebugEnabled())
                                        log.debug("Will not own partition (there are owners to rebalance from) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", owners = " + owners + ']');
                                }
                            } else
                                updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
                        }
                    } else {
                        if (locPart != null) {
                            GridDhtPartitionState state = locPart.state();
                            if (state == MOVING) {
                                locPart.rent();
                                updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
                                changed = true;
                                if (log.isDebugEnabled()) {
                                    log.debug("Evicting MOVING partition (it does not belong to affinity) [" + "grp=" + grp.cacheOrGroupName() + ", p=" + locPart.id() + ']');
                                }
                            }
                        }
                    }
                }
            }
            AffinityAssignment aff = grp.affinity().readyAffinity(topVer);
            if (node2part != null && node2part.valid())
                changed |= checkEvictions(updateSeq, aff);
            updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
            consistencyCheck();
            if (log.isTraceEnabled()) {
                log.trace("Partition states after afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
            }
        } finally {
            lock.writeLock().unlock();
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
    return changed;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)

Aggregations

AffinityAssignment (org.apache.ignite.internal.processors.affinity.AffinityAssignment)32 ClusterNode (org.apache.ignite.cluster.ClusterNode)19 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)17 UUID (java.util.UUID)11 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)10 ArrayList (java.util.ArrayList)9 HashMap (java.util.HashMap)7 Map (java.util.Map)7 GridDhtPartitionMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)7 GridCacheEntryRemovedException (org.apache.ignite.internal.processors.cache.GridCacheEntryRemovedException)6 GridTimeoutObject (org.apache.ignite.internal.processors.timeout.GridTimeoutObject)6 List (java.util.List)5 ClusterTopologyCheckedException (org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)5 CacheObject (org.apache.ignite.internal.processors.cache.CacheObject)5 CacheStorePartialUpdateException (org.apache.ignite.internal.processors.cache.CacheStorePartialUpdateException)5 GridCacheOperation (org.apache.ignite.internal.processors.cache.GridCacheOperation)5 GridCacheUpdateAtomicResult (org.apache.ignite.internal.processors.cache.GridCacheUpdateAtomicResult)5 KeyCacheObject (org.apache.ignite.internal.processors.cache.KeyCacheObject)5 GridDhtCacheEntry (org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry)5 IgniteBiTuple (org.apache.ignite.lang.IgniteBiTuple)5