Search in sources :

Example 6 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method validatePartitionsState.

/**
 * Validates that partition update counters and cache sizes for all caches are consistent.
 */
private void validatePartitionsState() {
    try {
        U.doInParallel(cctx.kernalContext().pools().getSystemExecutorService(), nonLocalCacheGroupDescriptors(), grpDesc -> {
            CacheGroupContext grpCtx = cctx.cache().cacheGroup(grpDesc.groupId());
            GridDhtPartitionTopology top = grpCtx != null ? grpCtx.topology() : cctx.exchange().clientTopology(grpDesc.groupId(), events().discoveryCache());
            // Do not validate read or write through caches or caches with disabled rebalance
            // or ExpiryPolicy is set or validation is disabled.
            boolean customExpiryPlc = Optional.ofNullable(grpCtx).map(CacheGroupContext::caches).orElseGet(Collections::emptyList).stream().anyMatch(ctx -> ctx.expiry() != null && !(ctx.expiry() instanceof EternalExpiryPolicy));
            if (grpCtx == null || grpCtx.config().isReadThrough() || grpCtx.config().isWriteThrough() || grpCtx.config().getCacheStoreFactory() != null || grpCtx.config().getRebalanceDelay() == -1 || grpCtx.config().getRebalanceMode() == CacheRebalanceMode.NONE || customExpiryPlc || SKIP_PARTITION_SIZE_VALIDATION)
                return null;
            try {
                validator.validatePartitionCountersAndSizes(GridDhtPartitionsExchangeFuture.this, top, msgs);
            } catch (IgniteCheckedException ex) {
                log.warning(String.format(PARTITION_STATE_FAILED_MSG, grpCtx.cacheOrGroupName(), ex.getMessage()));
            // TODO: Handle such errors https://issues.apache.org/jira/browse/IGNITE-7833
            }
            return null;
        });
    } catch (IgniteCheckedException e) {
        throw new IgniteException("Failed to validate partitions state", e);
    }
    timeBag.finishGlobalStage("Validate partitions states");
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteException(org.apache.ignite.IgniteException) EternalExpiryPolicy(javax.cache.expiry.EternalExpiryPolicy) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Collections(java.util.Collections)

Example 7 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method updatePartitionSingleMap.

/**
 * Updates partition map in all caches.
 *
 * @param nodeId Node message received from.
 * @param msg Partitions single message.
 */
private void updatePartitionSingleMap(UUID nodeId, GridDhtPartitionsSingleMessage msg) {
    msgs.put(nodeId, msg);
    for (Map.Entry<Integer, GridDhtPartitionMap> entry : msg.partitions().entrySet()) {
        Integer grpId = entry.getKey();
        CacheGroupContext grp = cctx.cache().cacheGroup(grpId);
        GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(grpId, events().discoveryCache());
        top.update(exchId, entry.getValue(), false);
    }
}
Also used : GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 8 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method finishExchangeOnCoordinator.

/**
 * @param sndResNodes Additional nodes to send finish message to.
 */
private void finishExchangeOnCoordinator(@Nullable Collection<ClusterNode> sndResNodes) {
    if (isDone() || !enterBusy())
        return;
    try {
        if (!F.isEmpty(exchangeGlobalExceptions) && dynamicCacheStartExchange() && isRollbackSupported()) {
            sendExchangeFailureMessage();
            return;
        }
        AffinityTopologyVersion resTopVer = exchCtx.events().topologyVersion();
        if (log.isInfoEnabled()) {
            log.info("finishExchangeOnCoordinator [topVer=" + initialVersion() + ", resVer=" + resTopVer + ']');
        }
        Map<Integer, CacheGroupAffinityMessage> idealAffDiff = null;
        // Reserve at least 2 threads for system operations.
        int parallelismLvl = U.availableThreadCount(cctx.kernalContext(), GridIoPolicy.SYSTEM_POOL, 2);
        if (exchCtx.mergeExchanges()) {
            synchronized (mux) {
                if (mergedJoinExchMsgs != null) {
                    for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
                        msgs.put(e.getKey(), e.getValue());
                        updatePartitionSingleMap(e.getKey(), e.getValue());
                    }
                }
            }
            assert exchCtx.events().hasServerJoin() || exchCtx.events().hasServerLeft();
            exchCtx.events().processEvents(this);
            if (exchCtx.events().hasServerLeft())
                idealAffDiff = cctx.affinity().onServerLeftWithExchangeMergeProtocol(this);
            else
                cctx.affinity().onServerJoinWithExchangeMergeProtocol(this, true);
            doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), cctx.affinity().cacheGroups().values(), desc -> {
                if (desc.config().getCacheMode() == CacheMode.LOCAL)
                    return null;
                CacheGroupContext grp = cctx.cache().cacheGroup(desc.groupId());
                GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(desc.groupId(), events().discoveryCache());
                top.beforeExchange(this, true, true);
                return null;
            });
        }
        span.addLog(() -> "Affinity recalculation (crd)");
        timeBag.finishGlobalStage("Affinity recalculation (crd)");
        Map<Integer, CacheGroupAffinityMessage> joinedNodeAff = new ConcurrentHashMap<>(cctx.cache().cacheGroups().size());
        doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), msgs.values(), msg -> {
            processSingleMessageOnCrdFinish(msg, joinedNodeAff);
            return null;
        });
        timeBag.finishGlobalStage("Collect update counters and create affinity messages");
        if (firstDiscoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
            assert firstDiscoEvt instanceof DiscoveryCustomEvent;
            if (activateCluster() || changedBaseline())
                assignPartitionsStates(null);
            DiscoveryCustomMessage discoveryCustomMessage = ((DiscoveryCustomEvent) firstDiscoEvt).customMessage();
            if (discoveryCustomMessage instanceof DynamicCacheChangeBatch) {
                if (exchActions != null) {
                    Set<String> caches = exchActions.cachesToResetLostPartitions();
                    if (!F.isEmpty(caches))
                        resetLostPartitions(caches);
                    Set<Integer> cacheGroupsToResetOwners = concat(exchActions.cacheGroupsToStart().stream().map(grp -> grp.descriptor().groupId()), exchActions.cachesToResetLostPartitions().stream().map(CU::cacheId)).collect(Collectors.toSet());
                    assignPartitionsStates(cacheGroupsToResetOwners);
                }
            } else if (discoveryCustomMessage instanceof SnapshotDiscoveryMessage && ((SnapshotDiscoveryMessage) discoveryCustomMessage).needAssignPartitions()) {
                markAffinityReassign();
                assignPartitionsStates(null);
            }
        } else if (exchCtx.events().hasServerJoin())
            assignPartitionsStates(null);
        else if (exchCtx.events().hasServerLeft())
            assignPartitionsStates(emptySet());
        // Validation should happen after resetting owners to avoid false desync reporting.
        validatePartitionsState();
        // Recalculate new affinity based on partitions availability.
        if (!exchCtx.mergeExchanges() && forceAffReassignment) {
            idealAffDiff = cctx.affinity().onCustomEventWithEnforcedAffinityReassignment(this);
            timeBag.finishGlobalStage("Ideal affinity diff calculation (enforced)");
        }
        for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
            if (!grpCtx.isLocal())
                grpCtx.topology().applyUpdateCounters();
        }
        timeBag.finishGlobalStage("Apply update counters");
        updateLastVersion(cctx.versions().last());
        cctx.versions().onExchange(lastVer.get().order());
        IgniteProductVersion minVer = exchCtx.events().discoveryCache().minimumNodeVersion();
        GridDhtPartitionsFullMessage msg = createPartitionsMessage(true, minVer.compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE) >= 0);
        if (!cctx.affinity().rebalanceRequired() && !deactivateCluster())
            msg.rebalanced(true);
        if (exchCtx.mergeExchanges()) {
            assert !centralizedAff;
            msg.resultTopologyVersion(resTopVer);
            if (exchCtx.events().hasServerLeft())
                msg.idealAffinityDiff(idealAffDiff);
        } else if (forceAffReassignment)
            msg.idealAffinityDiff(idealAffDiff);
        msg.prepareMarshal(cctx);
        timeBag.finishGlobalStage("Full message preparing");
        synchronized (mux) {
            finishState = new FinishState(crd.id(), resTopVer, msg);
            state = ExchangeLocalState.DONE;
        }
        if (centralizedAff) {
            assert !exchCtx.mergeExchanges();
            IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut = cctx.affinity().initAffinityOnNodeLeft(this);
            if (!fut.isDone())
                fut.listen(this::onAffinityInitialized);
            else
                onAffinityInitialized(fut);
        } else {
            Set<ClusterNode> nodes;
            Map<UUID, GridDhtPartitionsSingleMessage> mergedJoinExchMsgs0;
            synchronized (mux) {
                srvNodes.remove(cctx.localNode());
                nodes = new LinkedHashSet<>(srvNodes);
                mergedJoinExchMsgs0 = mergedJoinExchMsgs;
                if (mergedJoinExchMsgs != null) {
                    for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
                        if (e.getValue() != null) {
                            ClusterNode node = cctx.discovery().node(e.getKey());
                            if (node != null)
                                nodes.add(node);
                        }
                    }
                } else
                    mergedJoinExchMsgs0 = Collections.emptyMap();
                if (!F.isEmpty(sndResNodes))
                    nodes.addAll(sndResNodes);
            }
            if (msg.rebalanced())
                markRebalanced();
            if (!nodes.isEmpty())
                sendAllPartitions(msg, nodes, mergedJoinExchMsgs0, joinedNodeAff);
            timeBag.finishGlobalStage("Full message sending");
            discoveryLag = calculateDiscoveryLag(msgs, mergedJoinExchMsgs0);
            if (!stateChangeExchange())
                onDone(exchCtx.events().topologyVersion(), null);
            for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : pendingSingleMsgs.entrySet()) {
                if (log.isInfoEnabled()) {
                    log.info("Process pending message on coordinator [node=" + e.getKey() + ", ver=" + initialVersion() + ", resVer=" + resTopVer + ']');
                }
                processSingleMessage(e.getKey(), e.getValue());
            }
        }
        if (stateChangeExchange()) {
            StateChangeRequest req = exchActions.stateChangeRequest();
            assert req != null : exchActions;
            boolean stateChangeErr = false;
            if (!F.isEmpty(exchangeGlobalExceptions)) {
                stateChangeErr = true;
                cctx.kernalContext().state().onStateChangeError(exchangeGlobalExceptions, req);
            } else {
                boolean hasMoving = !partsToReload.isEmpty();
                Set<Integer> waitGrps = cctx.affinity().waitGroups();
                if (!hasMoving) {
                    for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
                        if (waitGrps.contains(grpCtx.groupId()) && grpCtx.topology().hasMovingPartitions()) {
                            hasMoving = true;
                            break;
                        }
                    }
                }
                cctx.kernalContext().state().onExchangeFinishedOnCoordinator(this, hasMoving);
            }
            if (!cctx.kernalContext().state().clusterState().localBaselineAutoAdjustment()) {
                ClusterState state = stateChangeErr ? ClusterState.INACTIVE : req.state();
                ChangeGlobalStateFinishMessage stateFinishMsg = new ChangeGlobalStateFinishMessage(req.requestId(), state, !stateChangeErr);
                cctx.discovery().sendCustomEvent(stateFinishMsg);
            }
            timeBag.finishGlobalStage("State finish message sending");
            if (!centralizedAff)
                onDone(exchCtx.events().topologyVersion(), null);
        }
        // Try switch late affinity right now if an exchange has been completed normally.
        if (!centralizedAff && isDone() && error() == null && !cctx.kernalContext().isStopping())
            cctx.exchange().checkRebalanceState();
    } catch (IgniteCheckedException e) {
        if (reconnectOnError(e))
            onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
        else
            onDone(e);
    } finally {
        leaveBusy();
    }
}
Also used : GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) StateChangeRequest(org.apache.ignite.internal.processors.cache.StateChangeRequest) IgniteProductVersion(org.apache.ignite.lang.IgniteProductVersion) DiscoveryCustomMessage(org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) DynamicCacheChangeBatch(org.apache.ignite.internal.processors.cache.DynamicCacheChangeBatch) ArrayList(java.util.ArrayList) List(java.util.List) UUID(java.util.UUID) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IgniteNeedReconnectException(org.apache.ignite.internal.IgniteNeedReconnectException) ClusterNode(org.apache.ignite.cluster.ClusterNode) ClusterState(org.apache.ignite.cluster.ClusterState) DiscoveryDataClusterState(org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) DiscoveryCustomEvent(org.apache.ignite.internal.events.DiscoveryCustomEvent) CU(org.apache.ignite.internal.util.typedef.internal.CU) SnapshotDiscoveryMessage(org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage) ChangeGlobalStateFinishMessage(org.apache.ignite.internal.processors.cluster.ChangeGlobalStateFinishMessage) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 9 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method detectLostPartitions.

/**
 * Detect lost partitions in case of node left or failed. For topology coordinator is called when all {@link
 * GridDhtPartitionsSingleMessage} were received. For other nodes is called when exchange future is completed by
 * {@link GridDhtPartitionsFullMessage}.
 *
 * @param resTopVer Result topology version.
 */
private void detectLostPartitions(AffinityTopologyVersion resTopVer) {
    try {
        // Reserve at least 2 threads for system operations.
        doInParallelUninterruptibly(U.availableThreadCount(cctx.kernalContext(), GridIoPolicy.SYSTEM_POOL, 2), cctx.kernalContext().pools().getSystemExecutorService(), cctx.affinity().cacheGroups().values(), desc -> {
            if (desc.config().getCacheMode() == CacheMode.LOCAL)
                return null;
            CacheGroupContext grp = cctx.cache().cacheGroup(desc.groupId());
            GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(desc.groupId(), events().discoveryCache());
            top.detectLostPartitions(resTopVer, this);
            return null;
        });
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    }
    timeBag.finishGlobalStage("Detect lost partitions");
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteException(org.apache.ignite.IgniteException) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext)

Example 10 with GridDhtPartitionTopology

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.

the class GridCacheQueryAdapter method nodes.

/**
 * @return Nodes to execute on.
 */
private Collection<ClusterNode> nodes() throws IgniteCheckedException {
    CacheMode cacheMode = cctx.config().getCacheMode();
    Integer part = partition();
    switch(cacheMode) {
        case LOCAL:
            if (prj != null)
                U.warn(log, "Ignoring query projection because it's executed over LOCAL cache " + "(only local node will be queried): " + this);
            if (type == SCAN && cctx.config().getCacheMode() == LOCAL && part != null && part >= cctx.affinity().partitions())
                throw new IgniteCheckedException("Invalid partition number: " + part);
            return Collections.singletonList(cctx.localNode());
        case REPLICATED:
            if (prj != null || part != null)
                return nodes(cctx, prj, part);
            GridDhtPartitionTopology topology = cctx.topology();
            if (cctx.affinityNode() && !topology.localPartitionMap().hasMovingPartitions())
                return Collections.singletonList(cctx.localNode());
            topology.readLock();
            try {
                Collection<ClusterNode> affNodes = nodes(cctx, null, null);
                List<ClusterNode> nodes = new ArrayList<>(affNodes);
                Collections.shuffle(nodes);
                for (ClusterNode node : nodes) {
                    if (!topology.partitions(node.id()).hasMovingPartitions())
                        return Collections.singletonList(node);
                }
                return affNodes;
            } finally {
                topology.readUnlock();
            }
        case PARTITIONED:
            return nodes(cctx, prj, part);
        default:
            throw new IllegalStateException("Unknown cache distribution mode: " + cacheMode);
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) ArrayList(java.util.ArrayList) CacheMode(org.apache.ignite.cache.CacheMode)

Aggregations

GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)64 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)24 GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)21 ClusterNode (org.apache.ignite.cluster.ClusterNode)20 Map (java.util.Map)18 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)18 CacheGroupContext (org.apache.ignite.internal.processors.cache.CacheGroupContext)17 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)14 Ignite (org.apache.ignite.Ignite)14 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)12 Test (org.junit.Test)12 IgniteEx (org.apache.ignite.internal.IgniteEx)11 UUID (java.util.UUID)10 IgniteKernal (org.apache.ignite.internal.IgniteKernal)10 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 IgniteException (org.apache.ignite.IgniteException)9 GridCacheContext (org.apache.ignite.internal.processors.cache.GridCacheContext)9 GridDhtPartitionMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)9 HashSet (java.util.HashSet)8