Search in sources :

Example 51 with GridDhtPartitionMap

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.

the class GridDhtPartitionTopologyImpl method removeNode.

/**
 * Removes node from local {@link #node2part} map and recalculates {@link #diffFromAffinity}.
 *
 * @param nodeId Node to remove.
 */
private void removeNode(UUID nodeId) {
    assert nodeId != null;
    assert lock.isWriteLockedByCurrentThread();
    ClusterNode oldest = discoCache.oldestAliveServerNode();
    assert oldest != null || ctx.kernalContext().clientNode();
    ClusterNode loc = ctx.localNode();
    if (node2part != null) {
        if (loc.equals(oldest) && !node2part.nodeId().equals(loc.id()))
            node2part = new GridDhtPartitionFullMap(loc.id(), loc.order(), updateSeq.get(), node2part, false);
        else
            node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence());
        GridDhtPartitionMap parts = node2part.remove(nodeId);
        if (!grp.isReplicated()) {
            if (parts != null) {
                for (Integer p : parts.keySet()) {
                    Set<UUID> diffIds = diffFromAffinity.get(p);
                    if (diffIds != null)
                        diffIds.remove(nodeId);
                }
            }
        }
        consistencyCheck();
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) UUID(java.util.UUID)

Example 52 with GridDhtPartitionMap

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.

the class GridDhtPartitionTopologyImpl method rebuildDiff.

/**
 * Rebuilds {@link #diffFromAffinity} from given assignment.
 *
 * @param affAssignment New affinity assignment.
 */
private void rebuildDiff(AffinityAssignment affAssignment) {
    assert lock.isWriteLockedByCurrentThread();
    if (node2part == null)
        return;
    if (FAST_DIFF_REBUILD) {
        Collection<UUID> affNodes = F.nodeIds(ctx.discovery().cacheGroupAffinityNodes(grp.groupId(), affAssignment.topologyVersion()));
        for (Map.Entry<Integer, Set<UUID>> e : diffFromAffinity.entrySet()) {
            int p = e.getKey();
            Iterator<UUID> iter = e.getValue().iterator();
            while (iter.hasNext()) {
                UUID nodeId = iter.next();
                if (!affNodes.contains(nodeId) || affAssignment.getIds(p).contains(nodeId))
                    iter.remove();
            }
        }
    } else {
        for (Map.Entry<UUID, GridDhtPartitionMap> e : node2part.entrySet()) {
            UUID nodeId = e.getKey();
            for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
                Integer p0 = e0.getKey();
                GridDhtPartitionState state = e0.getValue();
                Set<UUID> ids = diffFromAffinity.get(p0);
                if ((state == MOVING || state == OWNING || state == RENTING) && !affAssignment.getIds(p0).contains(nodeId)) {
                    if (ids == null)
                        diffFromAffinity.put(p0, ids = U.newHashSet(3));
                    ids.add(nodeId);
                } else {
                    if (ids != null)
                        ids.remove(nodeId);
                }
            }
        }
    }
    diffFromAffinityVer = affAssignment.topologyVersion();
}
Also used : Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) UUID(java.util.UUID) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)

Example 53 with GridDhtPartitionMap

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.

the class GridDhtPartitionTopologyImpl method createMovingPartitions.

/**
 * @param aff Affinity.
 */
private void createMovingPartitions(AffinityAssignment aff) {
    for (Map.Entry<UUID, GridDhtPartitionMap> e : node2part.entrySet()) {
        GridDhtPartitionMap map = e.getValue();
        addMoving(map, aff.backupPartitions(e.getKey()));
        addMoving(map, aff.primaryPartitions(e.getKey()));
    }
}
Also used : GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) UUID(java.util.UUID) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)

Example 54 with GridDhtPartitionMap

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.

the class GridDhtPartitionTopologyImpl method update.

/**
 * {@inheritDoc}
 */
@Override
public boolean update(@Nullable AffinityTopologyVersion exchangeVer, GridDhtPartitionFullMap partMap, @Nullable CachePartitionFullCountersMap incomeCntrMap, Set<Integer> partsToReload, @Nullable Map<Integer, Long> partSizes, @Nullable AffinityTopologyVersion msgTopVer, @Nullable GridDhtPartitionsExchangeFuture exchFut, @Nullable Set<Integer> lostParts) {
    if (log.isDebugEnabled()) {
        log.debug("Updating full partition map " + "[grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", fullMap=" + fullMapString() + ']');
    }
    assert partMap != null;
    ctx.database().checkpointReadLock();
    try {
        lock.writeLock().lock();
        try {
            if (log.isTraceEnabled() && exchangeVer != null) {
                log.trace("Partition states before full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
            }
            if (stopping || !lastTopChangeVer.initialized() || // Ignore message not-related to exchange if exchange is in progress.
            (exchangeVer == null && !lastTopChangeVer.equals(readyTopVer)))
                return false;
            if (incomeCntrMap != null) {
                // update local counters in partitions
                for (int i = 0; i < locParts.length(); i++) {
                    cntrMap.updateCounter(i, incomeCntrMap.updateCounter(i));
                    GridDhtLocalPartition part = locParts.get(i);
                    if (part == null)
                        continue;
                    if (part.state() == OWNING || part.state() == MOVING) {
                        long updCntr = incomeCntrMap.updateCounter(part.id());
                        long curCntr = part.updateCounter();
                        // Avoid zero counter update to empty partition to prevent lazy init.
                        if (updCntr != 0 || curCntr != 0) {
                            part.updateCounter(updCntr);
                            if (updCntr > curCntr) {
                                if (log.isDebugEnabled())
                                    log.debug("Partition update counter has updated [grp=" + grp.cacheOrGroupName() + ", p=" + part.id() + ", state=" + part.state() + ", prevCntr=" + curCntr + ", nextCntr=" + updCntr + "]");
                            }
                        }
                    }
                }
            }
            // TODO FIXME https://issues.apache.org/jira/browse/IGNITE-11800
            if (exchangeVer != null) {
                // Ignore if exchange already finished or new exchange started.
                if (readyTopVer.after(exchangeVer) || lastTopChangeVer.after(exchangeVer)) {
                    U.warn(log, "Stale exchange id for full partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", exchVer=" + exchangeVer + ']');
                    return false;
                }
            }
            boolean fullMapUpdated = node2part == null;
            if (node2part != null) {
                // Merge maps.
                for (GridDhtPartitionMap part : node2part.values()) {
                    GridDhtPartitionMap newPart = partMap.get(part.nodeId());
                    if (shouldOverridePartitionMap(part, newPart)) {
                        fullMapUpdated = true;
                        if (log.isDebugEnabled()) {
                            log.debug("Overriding partition map in full update map [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
                        }
                        if (newPart.nodeId().equals(ctx.localNodeId()))
                            updateSeq.setIfGreater(newPart.updateSequence());
                    } else {
                        // If for some nodes current partition has a newer map, then we keep the newer value.
                        if (log.isDebugEnabled()) {
                            log.debug("Partitions map for the node keeps newer value than message [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
                        }
                        partMap.put(part.nodeId(), part);
                    }
                }
                // Check that we have new nodes.
                for (GridDhtPartitionMap part : partMap.values()) {
                    if (fullMapUpdated)
                        break;
                    fullMapUpdated = !node2part.containsKey(part.nodeId());
                }
                GridDhtPartitionsExchangeFuture topFut = exchFut == null ? ctx.exchange().lastFinishedFuture() : exchFut;
                // topFut can be null if lastFinishedFuture has completed with error.
                if (topFut != null) {
                    for (Iterator<UUID> it = partMap.keySet().iterator(); it.hasNext(); ) {
                        UUID nodeId = it.next();
                        final ClusterNode node = topFut.events().discoveryCache().node(nodeId);
                        if (node == null) {
                            if (log.isTraceEnabled())
                                log.trace("Removing left node from full map update [grp=" + grp.cacheOrGroupName() + ", exchTopVer=" + exchangeVer + ", futVer=" + topFut.initialVersion() + ", nodeId=" + nodeId + ", partMap=" + partMap + ']');
                            it.remove();
                        }
                    }
                }
            } else {
                GridDhtPartitionMap locNodeMap = partMap.get(ctx.localNodeId());
                if (locNodeMap != null)
                    updateSeq.setIfGreater(locNodeMap.updateSequence());
            }
            if (!fullMapUpdated) {
                if (log.isTraceEnabled()) {
                    log.trace("No updates for full partition map (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastExch=" + lastTopChangeVer + ", exchVer=" + exchangeVer + ", curMap=" + node2part + ", newMap=" + partMap + ']');
                }
                return false;
            }
            if (exchangeVer != null) {
                assert exchangeVer.compareTo(readyTopVer) >= 0 && exchangeVer.compareTo(lastTopChangeVer) >= 0;
                lastTopChangeVer = readyTopVer = exchangeVer;
                // Apply lost partitions from full message.
                if (lostParts != null) {
                    this.lostParts = new HashSet<>(lostParts);
                    for (Integer part : lostParts) {
                        GridDhtLocalPartition locPart = localPartition(part);
                        // New partition should be created instead.
                        if (locPart != null && locPart.state() != EVICTED) {
                            locPart.markLost();
                            GridDhtPartitionMap locMap = partMap.get(ctx.localNodeId());
                            locMap.put(part, LOST);
                        }
                    }
                }
            }
            node2part = partMap;
            if (log.isDebugEnabled()) {
                log.debug("Partition map after processFullMessage [grp=" + grp.cacheOrGroupName() + ", exchId=" + (exchFut == null ? null : exchFut.exchangeId()) + ", fullMap=" + fullMapString() + ']');
            }
            if (exchangeVer == null && !grp.isReplicated() && (readyTopVer.initialized() && readyTopVer.compareTo(diffFromAffinityVer) >= 0)) {
                AffinityAssignment affAssignment = grp.affinity().readyAffinity(readyTopVer);
                for (Map.Entry<UUID, GridDhtPartitionMap> e : partMap.entrySet()) {
                    for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
                        int p = e0.getKey();
                        Set<UUID> diffIds = diffFromAffinity.get(p);
                        if ((e0.getValue() == MOVING || e0.getValue() == OWNING || e0.getValue() == RENTING) && !affAssignment.getIds(p).contains(e.getKey())) {
                            if (diffIds == null)
                                diffFromAffinity.put(p, diffIds = U.newHashSet(3));
                            diffIds.add(e.getKey());
                        } else {
                            if (diffIds != null && diffIds.remove(e.getKey())) {
                                if (diffIds.isEmpty())
                                    diffFromAffinity.remove(p);
                            }
                        }
                    }
                }
                diffFromAffinityVer = readyTopVer;
            }
            boolean changed = false;
            GridDhtPartitionMap nodeMap = partMap.get(ctx.localNodeId());
            // Only in real exchange occurred.
            if (exchangeVer != null && nodeMap != null && grp.persistenceEnabled() && readyTopVer.initialized()) {
                assert exchFut != null;
                for (Map.Entry<Integer, GridDhtPartitionState> e : nodeMap.entrySet()) {
                    int p = e.getKey();
                    GridDhtPartitionState state = e.getValue();
                    if (state == OWNING) {
                        GridDhtLocalPartition locPart = locParts.get(p);
                        assert locPart != null : grp.cacheOrGroupName();
                        if (locPart.state() == MOVING) {
                            boolean success = locPart.own();
                            assert success : locPart;
                            changed |= success;
                        }
                    } else if (state == MOVING) {
                        GridDhtLocalPartition locPart = locParts.get(p);
                        rebalancePartition(p, partsToReload.contains(p) || locPart != null && locPart.state() == MOVING && exchFut.localJoinExchange(), exchFut);
                        changed = true;
                    }
                }
            }
            long updateSeq = this.updateSeq.incrementAndGet();
            if (readyTopVer.initialized() && readyTopVer.equals(lastTopChangeVer)) {
                AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);
                // Evictions on exchange are checked in exchange worker thread before rebalancing.
                if (exchangeVer == null)
                    changed |= checkEvictions(updateSeq, aff);
                updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
            }
            if (partSizes != null)
                this.globalPartSizes = partSizes;
            consistencyCheck();
            if (log.isDebugEnabled()) {
                log.debug("Partition map after full update [grp=" + grp.cacheOrGroupName() + ", map=" + fullMapString() + ']');
            }
            if (log.isTraceEnabled() && exchangeVer != null) {
                log.trace("Partition states after full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
            }
            if (changed) {
                if (log.isDebugEnabled())
                    log.debug("Partitions have been scheduled to resend [reason=" + "Full map update [grp" + grp.cacheOrGroupName() + "]");
                ctx.exchange().scheduleResendPartitions();
            }
            return changed;
        } finally {
            lock.writeLock().unlock();
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridDhtPartitionsExchangeFuture(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) UUID(java.util.UUID) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)

Example 55 with GridDhtPartitionMap

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.

the class GridDhtPartitionTopologyImpl method detectLostPartitions.

/**
 * {@inheritDoc}
 */
@Override
public boolean detectLostPartitions(AffinityTopologyVersion resTopVer, GridDhtPartitionsExchangeFuture fut) {
    ctx.database().checkpointReadLock();
    try {
        lock.writeLock().lock();
        try {
            if (node2part == null)
                return false;
            // Do not trigger lost partition events on activation.
            DiscoveryEvent discoEvt = fut.activateCluster() ? null : fut.firstEvent();
            final GridClusterStateProcessor state = grp.shared().kernalContext().state();
            boolean isInMemoryCluster = CU.isInMemoryCluster(grp.shared().kernalContext().discovery().allNodes(), grp.shared().kernalContext().marshallerContext().jdkMarshaller(), U.resolveClassLoader(grp.shared().kernalContext().config()));
            boolean compatibleWithIgnorePlc = isInMemoryCluster && state.isBaselineAutoAdjustEnabled() && state.baselineAutoAdjustTimeout() == 0L;
            // Calculate how data loss is handled.
            boolean safe = grp.config().getPartitionLossPolicy() != IGNORE || !compatibleWithIgnorePlc;
            int parts = grp.affinity().partitions();
            Set<Integer> recentlyLost = null;
            boolean changed = false;
            for (int part = 0; part < parts; part++) {
                boolean lost = F.contains(lostParts, part);
                if (!lost) {
                    boolean hasOwner = false;
                    // Detect if all owners are left.
                    for (GridDhtPartitionMap partMap : node2part.values()) {
                        if (partMap.get(part) == OWNING) {
                            hasOwner = true;
                            break;
                        }
                    }
                    if (!hasOwner) {
                        lost = true;
                        // Do not detect and record lost partition in IGNORE mode.
                        if (safe) {
                            if (lostParts == null)
                                lostParts = new TreeSet<>();
                            lostParts.add(part);
                            if (discoEvt != null) {
                                if (recentlyLost == null)
                                    recentlyLost = new HashSet<>();
                                recentlyLost.add(part);
                                if (grp.eventRecordable(EventType.EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
                                    grp.addRebalanceEvent(part, EVT_CACHE_REBALANCE_PART_DATA_LOST, discoEvt.eventNode(), discoEvt.type(), discoEvt.timestamp());
                                }
                            }
                        }
                    }
                }
                if (lost) {
                    GridDhtLocalPartition locPart = localPartition(part, resTopVer, false, true);
                    if (locPart != null) {
                        if (locPart.state() == LOST)
                            continue;
                        final GridDhtPartitionState prevState = locPart.state();
                        changed = safe ? locPart.markLost() : locPart.own();
                        if (changed) {
                            long updSeq = updateSeq.incrementAndGet();
                            updateLocal(locPart.id(), locPart.state(), updSeq, resTopVer);
                            // If a partition was lost while rebalancing reset it's counter to force demander mode.
                            if (prevState == MOVING)
                                locPart.resetUpdateCounter();
                        }
                    }
                    // Update remote maps according to policy.
                    for (Map.Entry<UUID, GridDhtPartitionMap> entry : node2part.entrySet()) {
                        if (entry.getKey().equals(ctx.localNodeId()))
                            continue;
                        GridDhtPartitionState p0 = entry.getValue().get(part);
                        if (p0 != null && p0 != EVICTED)
                            entry.getValue().put(part, safe ? LOST : OWNING);
                    }
                }
            }
            if (recentlyLost != null) {
                U.warn(log, "Detected lost partitions" + (!safe ? " (will ignore)" : "") + " [grp=" + grp.cacheOrGroupName() + ", parts=" + S.compact(recentlyLost) + ", topVer=" + resTopVer + "]");
            }
            return changed;
        } finally {
            lock.writeLock().unlock();
        }
    } finally {
        ctx.database().checkpointReadUnlock();
    }
}
Also used : DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) GridClusterStateProcessor(org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) TreeSet(java.util.TreeSet) UUID(java.util.UUID) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) CachePartitionFullCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap) Map(java.util.Map) HashMap(java.util.HashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) CachePartitionPartialCountersMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap) HashSet(java.util.HashSet)

Aggregations

GridDhtPartitionMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)67 UUID (java.util.UUID)47 GridDhtPartitionFullMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap)46 Map (java.util.Map)41 HashMap (java.util.HashMap)36 GridPartitionStateMap (org.apache.ignite.internal.util.GridPartitionStateMap)32 CachePartitionPartialCountersMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap)30 CachePartitionFullCountersMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionFullCountersMap)28 ClusterNode (org.apache.ignite.cluster.ClusterNode)20 HashSet (java.util.HashSet)14 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)10 Set (java.util.Set)9 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)8 AffinityAssignment (org.apache.ignite.internal.processors.affinity.AffinityAssignment)8 ArrayList (java.util.ArrayList)7 Ignite (org.apache.ignite.Ignite)7 IgniteKernal (org.apache.ignite.internal.IgniteKernal)7 GridDhtPartitionState (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState)7 GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)7 List (java.util.List)6