Search in sources :

Example 91 with ClusterNode

use of org.apache.ignite.cluster.ClusterNode in project ignite by apache.

the class GridDhtPartitionTopologyImpl method allOwners.

/**
 * {@inheritDoc}
 */
@Override
public List<List<ClusterNode>> allOwners() {
    lock.readLock().lock();
    try {
        int parts = partitions();
        List<List<ClusterNode>> res = new ArrayList<>(parts);
        for (int i = 0; i < parts; i++) res.add(new ArrayList<>());
        List<ClusterNode> allNodes = discoCache.cacheGroupAffinityNodes(grp.groupId());
        for (int i = 0; i < allNodes.size(); i++) {
            ClusterNode node = allNodes.get(i);
            GridDhtPartitionMap nodeParts = node2part.get(node.id());
            if (nodeParts != null) {
                for (Map.Entry<Integer, GridDhtPartitionState> e : nodeParts.map().entrySet()) {
                    if (e.getValue() == OWNING) {
                        int part = e.getKey();
                        List<ClusterNode> owners = res.get(part);
                        owners.add(node);
                    }
                }
            }
        }
        return res;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) ArrayList(java.util.ArrayList) GridLongList(org.apache.ignite.internal.util.GridLongList) List(java.util.List) ArrayList(java.util.ArrayList) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)

Example 92 with ClusterNode

use of org.apache.ignite.cluster.ClusterNode in project ignite by apache.

the class GridDhtPartitionTopologyImpl method resetOwners.

/**
 * {@inheritDoc}
 */
@Override
public Map<UUID, Set<Integer>> resetOwners(Map<Integer, Set<UUID>> ownersByUpdCounters, Set<Integer> haveHist, GridDhtPartitionsExchangeFuture exchFut) {
    Map<UUID, Set<Integer>> res = new HashMap<>();
    Collection<DiscoveryEvent> evts = exchFut.events().events();
    Set<UUID> joinedNodes = U.newHashSet(evts.size());
    for (DiscoveryEvent evt : evts) {
        if (evt.type() == EVT_NODE_JOINED)
            joinedNodes.add(evt.eventNode().id());
    }
    ctx.database().checkpointReadLock();
    try {
        Map<UUID, Set<Integer>> addToWaitGroups = new HashMap<>();
        lock.writeLock().lock();
        try {
            // First process local partitions.
            UUID locNodeId = ctx.localNodeId();
            for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
                int part = entry.getKey();
                Set<UUID> maxCounterPartOwners = entry.getValue();
                GridDhtLocalPartition locPart = localPartition(part);
                if (locPart == null || locPart.state() != OWNING)
                    continue;
                // Partition state should be mutated only on joining nodes if they are exists for the exchange.
                if (joinedNodes.isEmpty() && !maxCounterPartOwners.contains(locNodeId)) {
                    rebalancePartition(part, !haveHist.contains(part), exchFut);
                    res.computeIfAbsent(locNodeId, n -> new HashSet<>()).add(part);
                }
            }
            // Then process node maps.
            for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
                int part = entry.getKey();
                Set<UUID> maxCounterPartOwners = entry.getValue();
                for (Map.Entry<UUID, GridDhtPartitionMap> remotes : node2part.entrySet()) {
                    UUID remoteNodeId = remotes.getKey();
                    if (!joinedNodes.isEmpty() && !joinedNodes.contains(remoteNodeId))
                        continue;
                    GridDhtPartitionMap partMap = remotes.getValue();
                    GridDhtPartitionState state = partMap.get(part);
                    if (state != OWNING)
                        continue;
                    if (!maxCounterPartOwners.contains(remoteNodeId)) {
                        partMap.put(part, MOVING);
                        partMap.updateSequence(partMap.updateSequence() + 1, partMap.topologyVersion());
                        if (partMap.nodeId().equals(locNodeId))
                            updateSeq.setIfGreater(partMap.updateSequence());
                        res.computeIfAbsent(remoteNodeId, n -> new HashSet<>()).add(part);
                    }
                }
            }
            for (Map.Entry<UUID, Set<Integer>> entry : res.entrySet()) {
                UUID nodeId = entry.getKey();
                Set<Integer> rebalancedParts = entry.getValue();
                addToWaitGroups.put(nodeId, new HashSet<>(rebalancedParts));
                if (!rebalancedParts.isEmpty()) {
                    Set<Integer> historical = rebalancedParts.stream().filter(haveHist::contains).collect(Collectors.toSet());
                    // Filter out partitions having WAL history.
                    rebalancedParts.removeAll(historical);
                    U.warn(log, "Partitions have been scheduled for rebalancing due to outdated update counter " + "[grp=" + grp.cacheOrGroupName() + ", readyTopVer=" + readyTopVer + ", topVer=" + exchFut.initialVersion() + ", nodeId=" + nodeId + ", partsFull=" + S.compact(rebalancedParts) + ", partsHistorical=" + S.compact(historical) + "]");
                }
            }
            node2part = new GridDhtPartitionFullMap(node2part, updateSeq.incrementAndGet());
        } finally {
            lock.writeLock().unlock();
        }
        List<List<ClusterNode>> ideal = ctx.affinity().affinity(groupId()).idealAssignmentRaw();
        for (Map.Entry<UUID, Set<Integer>> entry : addToWaitGroups.entrySet()) {
            // Add to wait groups to ensure late assignment switch after all partitions are rebalanced.
            for (Integer part : entry.getValue()) {
                ctx.cache().context().affinity().addToWaitGroup(groupId(), part, topologyVersionFuture().initialVersion(), ideal.get(part));
            }
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
    return res;
}
Also used : GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) WALPointer(org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer) AtomicReferenceArray(java.util.concurrent.atomic.AtomicReferenceArray) GridLongList(org.apache.ignite.internal.util.GridLongList) EVT_CACHE_REBALANCE_PART_DATA_LOST(org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_PART_DATA_LOST) RollbackRecord(org.apache.ignite.internal.pagemem.wal.record.RollbackRecord) SB(org.apache.ignite.internal.util.typedef.internal.SB) Map(java.util.Map) X(org.apache.ignite.internal.util.typedef.X) GridDhtTopologyFuture(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTopologyFuture) EventType(org.apache.ignite.events.EventType) GridToStringExclude(org.apache.ignite.internal.util.tostring.GridToStringExclude) Collection(java.util.Collection) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) Set(java.util.Set) ALL(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.ExchangeType.ALL) OWNING(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Nullable(org.jetbrains.annotations.Nullable) List(java.util.List) CU(org.apache.ignite.internal.util.typedef.internal.CU) LOST(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.LOST) GridClusterStateProcessor(org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor) ExchangeDiscoveryEvents(org.apache.ignite.internal.processors.cache.ExchangeDiscoveryEvents) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) NotNull(org.jetbrains.annotations.NotNull) GridDhtCacheEntry(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry) RENTING(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.RENTING) GridAtomicLong(org.apache.ignite.internal.util.GridAtomicLong) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) U(org.apache.ignite.internal.util.typedef.internal.U) EVT_DISCOVERY_CUSTOM_EVT(org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT) HashMap(java.util.HashMap) IgniteLogger(org.apache.ignite.IgniteLogger) DiscoCache(org.apache.ignite.internal.managers.discovery.DiscoCache) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) HashSet(java.util.HashSet) ClusterNode(org.apache.ignite.cluster.ClusterNode) F0(org.apache.ignite.internal.util.F0) GridDhtPartitionsExchangeFuture(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture) S(org.apache.ignite.internal.util.typedef.internal.S) MOVING(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.MOVING) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) NoSuchElementException(java.util.NoSuchElementException) EVICTED(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.EVICTED) IGNORE(org.apache.ignite.cache.PartitionLossPolicy.IGNORE) F(org.apache.ignite.internal.util.typedef.F) EVT_NODE_JOINED(org.apache.ignite.events.EventType.EVT_NODE_JOINED) Iterator(java.util.Iterator) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) StripedCompositeReadWriteLock(org.apache.ignite.internal.util.StripedCompositeReadWriteLock) NONE(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.ExchangeType.NONE) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) GridCacheSharedContext(org.apache.ignite.internal.processors.cache.GridCacheSharedContext) GridDhtPartitionExchangeId(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionExchangeId) Collections(java.util.Collections) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) HashMap(java.util.HashMap) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) GridLongList(org.apache.ignite.internal.util.GridLongList) List(java.util.List) ArrayList(java.util.ArrayList) UUID(java.util.UUID) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) HashSet(java.util.HashSet)

Example 93 with ClusterNode

use of org.apache.ignite.cluster.ClusterNode in project ignite by apache.

the class GridDhtPartitionTopologyImpl method afterExchange.

/**
 * {@inheritDoc}
 */
@Override
public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut) {
    boolean changed = false;
    int partitions = grp.affinity().partitions();
    AffinityTopologyVersion topVer = exchFut.context().events().topologyVersion();
    assert grp.affinity().lastVersion().equals(topVer) : "Affinity is not initialized " + "[grp=" + grp.cacheOrGroupName() + ", topVer=" + topVer + ", affVer=" + grp.affinity().lastVersion() + ", fut=" + exchFut + ']';
    ctx.database().checkpointReadLock();
    try {
        lock.writeLock().lock();
        try {
            if (stopping)
                return false;
            assert readyTopVer.initialized() : readyTopVer;
            assert lastTopChangeVer.equals(readyTopVer);
            if (log.isDebugEnabled()) {
                log.debug("Partition map before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
            }
            if (log.isTraceEnabled()) {
                log.trace("Partition states before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
            }
            long updateSeq = this.updateSeq.incrementAndGet();
            // Skip partition updates in case of not real exchange.
            if (!ctx.localNode().isClient() && exchFut.exchangeType() == ALL) {
                for (int p = 0; p < partitions; p++) {
                    GridDhtLocalPartition locPart = localPartition0(p, topVer, false, true);
                    if (partitionLocalNode(p, topVer)) {
                        // Prepare partition to rebalance if it's not happened on full map update phase.
                        if (locPart == null || locPart.state() == RENTING || locPart.state() == EVICTED)
                            locPart = rebalancePartition(p, true, exchFut);
                        GridDhtPartitionState state = locPart.state();
                        if (state == MOVING) {
                            if (grp.rebalanceEnabled()) {
                                Collection<ClusterNode> owners = owners(p);
                                // then new exchange should be started with detecting lost partitions.
                                if (!F.isEmpty(owners)) {
                                    if (log.isDebugEnabled())
                                        log.debug("Will not own partition (there are owners to rebalance from) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", owners = " + owners + ']');
                                }
                            } else
                                updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
                        }
                    } else {
                        if (locPart != null) {
                            GridDhtPartitionState state = locPart.state();
                            if (state == MOVING) {
                                locPart.rent();
                                updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
                                changed = true;
                                if (log.isDebugEnabled()) {
                                    log.debug("Evicting MOVING partition (it does not belong to affinity) [" + "grp=" + grp.cacheOrGroupName() + ", p=" + locPart.id() + ']');
                                }
                            }
                        }
                    }
                }
            }
            AffinityAssignment aff = grp.affinity().readyAffinity(topVer);
            if (node2part != null && node2part.valid())
                changed |= checkEvictions(updateSeq, aff);
            updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
            consistencyCheck();
            if (log.isTraceEnabled()) {
                log.trace("Partition states after afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
            }
        } finally {
            lock.writeLock().unlock();
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
    return changed;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)

Example 94 with ClusterNode

use of org.apache.ignite.cluster.ClusterNode in project ignite by apache.

the class GridDhtPartitionTopologyImpl method nodes.

/**
 * @param p Partition.
 * @param topVer Topology version ({@code -1} for all nodes).
 * @param state Partition state.
 * @param states Additional partition states.
 * @return List of nodes for the partition.
 */
private List<ClusterNode> nodes(int p, AffinityTopologyVersion topVer, GridDhtPartitionState state, GridDhtPartitionState... states) {
    Collection<UUID> allIds = F.nodeIds(discoCache.cacheGroupAffinityNodes(grp.groupId()));
    lock.readLock().lock();
    try {
        assert node2part != null && node2part.valid() : "Invalid node-to-partitions map [topVer=" + topVer + ", grp=" + grp.cacheOrGroupName() + ", allIds=" + allIds + ", node2part=" + node2part + ']';
        // Node IDs can be null if both, primary and backup, nodes disappear.
        // Empirical size to reduce growing of ArrayList.
        // We bear in mind that most of the time we filter OWNING partitions.
        List<ClusterNode> nodes = new ArrayList<>(allIds.size() / 2 + 1);
        for (UUID id : allIds) {
            if (hasState(p, id, state, states)) {
                ClusterNode n = ctx.discovery().node(id);
                if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion()))
                    nodes.add(n);
            }
        }
        return nodes;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) ArrayList(java.util.ArrayList) UUID(java.util.UUID)

Example 95 with ClusterNode

use of org.apache.ignite.cluster.ClusterNode in project ignite by apache.

the class GridDhtPartitionTopologyImpl method checkEvictions.

/**
 * Finds local partitions which don't belong to affinity and runs eviction process for such partitions.
 *
 * @param updateSeq Update sequence.
 * @param aff Affinity assignments.
 * @return {@code True} if there are local partitions need to be evicted.
 */
private boolean checkEvictions(long updateSeq, AffinityAssignment aff) {
    assert lock.isWriteLockedByCurrentThread();
    if (!ctx.kernalContext().state().evictionsAllowed())
        return false;
    boolean hasEvictedPartitions = false;
    UUID locId = ctx.localNodeId();
    for (int p = 0; p < locParts.length(); p++) {
        GridDhtLocalPartition part = locParts.get(p);
        if (part == null || !part.state().active())
            continue;
        List<ClusterNode> affNodes = aff.get(p);
        // This node is affinity node for partition, no need to run eviction.
        if (affNodes.contains(ctx.localNode()))
            continue;
        List<ClusterNode> nodes = nodes(p, aff.topologyVersion(), OWNING);
        Collection<UUID> nodeIds = F.nodeIds(nodes);
        // If all affinity nodes are owners, then evict partition from local node.
        if (nodeIds.containsAll(F.nodeIds(affNodes))) {
            GridDhtPartitionState state0 = part.state();
            part.rent();
            updateSeq = updateLocal(part.id(), part.state(), updateSeq, aff.topologyVersion());
            boolean stateChanged = state0 != part.state();
            hasEvictedPartitions |= stateChanged;
            if (stateChanged && log.isDebugEnabled()) {
                log.debug("Partition has been scheduled for eviction (all affinity nodes are owners) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + part.id() + ", prevState=" + state0 + ", state=" + part.state() + "]");
            }
        } else {
            int ownerCnt = nodeIds.size();
            int affCnt = affNodes.size();
            if (ownerCnt > affCnt) {
                // Sort by node orders in ascending order.
                Collections.sort(nodes, CU.nodeComparator(true));
                int diff = nodes.size() - affCnt;
                for (int i = 0; i < diff; i++) {
                    ClusterNode n = nodes.get(i);
                    if (locId.equals(n.id())) {
                        GridDhtPartitionState state0 = part.state();
                        part.rent();
                        updateSeq = updateLocal(part.id(), part.state(), updateSeq, aff.topologyVersion());
                        boolean stateChanged = state0 != part.state();
                        hasEvictedPartitions |= stateChanged;
                        if (stateChanged && log.isDebugEnabled()) {
                            log.debug("Partition has been scheduled for eviction (this node is oldest non-affinity node) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + part.id() + ", prevState=" + state0 + ", state=" + part.state() + "]");
                        }
                        break;
                    }
                }
            }
        }
    }
    return hasEvictedPartitions;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) UUID(java.util.UUID)

Aggregations

ClusterNode (org.apache.ignite.cluster.ClusterNode)1104 UUID (java.util.UUID)281 ArrayList (java.util.ArrayList)280 Test (org.junit.Test)276 Ignite (org.apache.ignite.Ignite)239 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)239 HashMap (java.util.HashMap)184 Map (java.util.Map)182 List (java.util.List)165 IgniteException (org.apache.ignite.IgniteException)147 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)147 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)143 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)126 Collection (java.util.Collection)113 Message (org.apache.ignite.plugin.extensions.communication.Message)106 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)87 HashSet (java.util.HashSet)85 ClusterTopologyCheckedException (org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)82 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)81 IgniteEx (org.apache.ignite.internal.IgniteEx)81