Search in sources :

Example 11 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class IgniteTxAdapter method applyTxSizes.

/**
 * Makes cache sizes changes accumulated during transaction visible outside of transaction.
 */
protected void applyTxSizes() {
    TxCounters txCntrs = txCounters(false);
    if (txCntrs == null)
        return;
    Map<Integer, ? extends Map<Integer, AtomicLong>> sizeDeltas = txCntrs.sizeDeltas();
    for (Map.Entry<Integer, ? extends Map<Integer, AtomicLong>> entry : sizeDeltas.entrySet()) {
        Integer cacheId = entry.getKey();
        Map<Integer, AtomicLong> deltas = entry.getValue();
        assert !F.isEmpty(deltas);
        GridDhtPartitionTopology top = cctx.cacheContext(cacheId).topology();
        // Need to reserve on backups only
        boolean reserve = dht() && remote();
        for (Map.Entry<Integer, AtomicLong> e : deltas.entrySet()) {
            boolean invalid = false;
            int p = e.getKey();
            long delta = e.getValue().get();
            try {
                GridDhtLocalPartition part = top.localPartition(p);
                if (!reserve || part != null && part.reserve()) {
                    assert part != null;
                    try {
                        if (part.state() != GridDhtPartitionState.RENTING)
                            part.dataStore().updateSize(cacheId, delta);
                        else
                            invalid = true;
                    } finally {
                        if (reserve)
                            part.release();
                    }
                } else
                    invalid = true;
            } catch (GridDhtInvalidPartitionException e1) {
                invalid = true;
            }
            if (invalid) {
                assert reserve;
                if (log.isDebugEnabled())
                    log.debug("Trying to apply size delta for invalid partition: " + "[cacheId=" + cacheId + ", part=" + p + "]");
            }
        }
    }
}
Also used : GridDhtInvalidPartitionException(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtInvalidPartitionException) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) AtomicLong(java.util.concurrent.atomic.AtomicLong) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap)

Example 12 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class IgniteTxHandler method applyPartitionsUpdatesCounters.

/**
 * Applies partition counter updates for transactions.
 * <p>
 * Called after entries are written to WAL on commit or during rollback to close gaps in update counter sequence.
 * <p>
 * On rollback counters should be applied on the primary only after backup nodes, otherwise if the primary fail
 * before sending rollback requests to backups remote transactions can be committed by recovery protocol and
 * partition consistency will not be restored when primary returns to the grid because RollbackRecord was written
 * (actual for persistent mode only).
 *
 * @param counters Counter values to be updated.
 * @param rollback {@code True} if applied during rollbacks.
 * @param rollbackOnPrimary {@code True} if rollback happens on primary node. Passed to CQ engine.
 */
public void applyPartitionsUpdatesCounters(Iterable<PartitionUpdateCountersMessage> counters, boolean rollback, boolean rollbackOnPrimary) throws IgniteCheckedException {
    if (counters == null)
        return;
    WALPointer ptr = null;
    try {
        for (PartitionUpdateCountersMessage counter : counters) {
            GridCacheContext ctx0 = ctx.cacheContext(counter.cacheId());
            GridDhtPartitionTopology top = ctx0.topology();
            AffinityTopologyVersion topVer = top.readyTopologyVersion();
            assert top != null;
            for (int i = 0; i < counter.size(); i++) {
                boolean invalid = false;
                try {
                    GridDhtLocalPartition part = top.localPartition(counter.partition(i));
                    if (part != null && part.reserve()) {
                        try {
                            if (part.state() != RENTING) {
                                // Check is actual only for backup node.
                                long start = counter.initialCounter(i);
                                long delta = counter.updatesCount(i);
                                boolean updated = part.updateCounter(start, delta);
                                // Need to log rolled back range for logical recovery.
                                if (updated && rollback) {
                                    CacheGroupContext grpCtx = part.group();
                                    if (grpCtx.persistenceEnabled() && grpCtx.walEnabled() && !grpCtx.mvccEnabled()) {
                                        RollbackRecord rec = new RollbackRecord(grpCtx.groupId(), part.id(), start, delta);
                                        ptr = ctx.wal().log(rec);
                                    }
                                    for (int cntr = 1; cntr <= delta; cntr++) {
                                        ctx0.continuousQueries().skipUpdateCounter(null, part.id(), start + cntr, topVer, rollbackOnPrimary);
                                    }
                                }
                            } else
                                invalid = true;
                        } finally {
                            part.release();
                        }
                    } else
                        invalid = true;
                } catch (GridDhtInvalidPartitionException e) {
                    invalid = true;
                }
                if (log.isDebugEnabled() && invalid) {
                    log.debug("Received partition update counters message for invalid partition, ignoring: " + "[cacheId=" + counter.cacheId() + ", part=" + counter.partition(i) + ']');
                }
            }
        }
    } finally {
        if (ptr != null)
            ctx.wal().flush(ptr, false);
    }
}
Also used : GridDhtInvalidPartitionException(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtInvalidPartitionException) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) PartitionUpdateCountersMessage(org.apache.ignite.internal.processors.cache.distributed.dht.PartitionUpdateCountersMessage) RollbackRecord(org.apache.ignite.internal.pagemem.wal.record.RollbackRecord) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) WALPointer(org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer)

Example 13 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridCommandHandlerIndexingUtils method breakSqlIndex.

/**
 * Deleting records from the index bypassing cache.
 *
 * @param internalCache Cache.
 * @param partId Partition number.
 * @param filter Row filter.
 * @throws Exception If failed.
 */
static <K, V> void breakSqlIndex(IgniteInternalCache<K, V> internalCache, int partId, @Nullable Predicate<CacheDataRow> filter) throws Exception {
    requireNonNull(internalCache);
    GridCacheContext<K, V> cacheCtx = internalCache.context();
    GridDhtLocalPartition locPart = cacheCtx.topology().localPartitions().get(partId);
    GridIterator<CacheDataRow> cacheDataGridIter = cacheCtx.group().offheap().partitionIterator(locPart.id());
    GridQueryProcessor qryProcessor = internalCache.context().kernalContext().query();
    while (cacheDataGridIter.hasNextX()) {
        CacheDataRow cacheDataRow = cacheDataGridIter.nextX();
        if (nonNull(filter) && !filter.test(cacheDataRow))
            continue;
        cacheCtx.shared().database().checkpointReadLock();
        try {
            qryProcessor.remove(cacheCtx, cacheDataRow);
        } finally {
            cacheCtx.shared().database().checkpointReadUnlock();
        }
    }
}
Also used : CacheDataRow(org.apache.ignite.internal.processors.cache.persistence.CacheDataRow) GridQueryProcessor(org.apache.ignite.internal.processors.query.GridQueryProcessor) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)

Example 14 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridDhtPartitionDemander method handleSupplyMessage.

/**
 * Handles supply message from {@code nodeId} with specified {@code topicId}.
 *
 * Supply message contains entries to populate rebalancing partitions.
 *
 * There is a cyclic process:
 * Populate rebalancing partitions with entries from Supply message.
 * If not all partitions specified in {@link #rebalanceFut} were rebalanced or marked as missed
 * send new Demand message to request next batch of entries.
 *
 * @param nodeId Node id.
 * @param supplyMsg Supply message.
 */
public void handleSupplyMessage(final UUID nodeId, final GridDhtPartitionSupplyMessage supplyMsg) {
    AffinityTopologyVersion topVer = supplyMsg.topologyVersion();
    RebalanceFuture fut = rebalanceFut;
    ClusterNode node = ctx.node(nodeId);
    fut.cancelLock.readLock().lock();
    try {
        String errMsg = null;
        if (fut.isDone())
            errMsg = "rebalance completed";
        else if (node == null)
            errMsg = "supplier has left cluster";
        else if (!rebalanceFut.isActual(supplyMsg.rebalanceId()))
            errMsg = "topology changed";
        if (errMsg != null) {
            if (log.isDebugEnabled()) {
                log.debug("Supply message has been ignored (" + errMsg + ") [" + demandRoutineInfo(nodeId, supplyMsg) + ']');
            }
            return;
        }
        if (log.isDebugEnabled())
            log.debug("Received supply message [" + demandRoutineInfo(nodeId, supplyMsg) + ']');
        // Check whether there were error during supplying process.
        Throwable msgExc = null;
        final GridDhtPartitionTopology top = grp.topology();
        if (supplyMsg.classError() != null)
            msgExc = supplyMsg.classError();
        else if (supplyMsg.error() != null)
            msgExc = supplyMsg.error();
        if (msgExc != null) {
            GridDhtPartitionMap partMap = top.localPartitionMap();
            Set<Integer> unstableParts = supplyMsg.infos().keySet().stream().filter(p -> partMap.get(p) == MOVING).collect(Collectors.toSet());
            U.error(log, "Rebalancing routine has failed, some partitions could be unavailable for reading" + " [" + demandRoutineInfo(nodeId, supplyMsg) + ", unavailablePartitions=" + S.compact(unstableParts) + ']', msgExc);
            fut.error(nodeId);
            return;
        }
        fut.receivedBytes.addAndGet(supplyMsg.messageSize());
        if (grp.sharedGroup()) {
            for (GridCacheContext cctx : grp.caches()) {
                if (cctx.statisticsEnabled()) {
                    long keysCnt = supplyMsg.keysForCache(cctx.cacheId());
                    if (keysCnt != -1)
                        cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(keysCnt);
                    // Can not be calculated per cache.
                    cctx.cache().metrics0().onRebalanceBatchReceived(supplyMsg.messageSize());
                }
            }
        } else {
            GridCacheContext cctx = grp.singleCacheContext();
            if (cctx.statisticsEnabled()) {
                if (supplyMsg.estimatedKeysCount() != -1)
                    cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(supplyMsg.estimatedKeysCount());
                cctx.cache().metrics0().onRebalanceBatchReceived(supplyMsg.messageSize());
            }
        }
        try {
            AffinityAssignment aff = grp.affinity().cachedAffinity(topVer);
            // Preload.
            for (Map.Entry<Integer, CacheEntryInfoCollection> e : supplyMsg.infos().entrySet()) {
                int p = e.getKey();
                if (aff.get(p).contains(ctx.localNode())) {
                    GridDhtLocalPartition part;
                    try {
                        part = top.localPartition(p, topVer, true);
                    } catch (GridDhtInvalidPartitionException err) {
                        assert !topVer.equals(top.lastTopologyChangeVersion());
                        if (log.isDebugEnabled()) {
                            log.debug("Failed to get partition for rebalancing [" + "grp=" + grp.cacheOrGroupName() + ", err=" + err + ", p=" + p + ", topVer=" + topVer + ", lastTopVer=" + top.lastTopologyChangeVersion() + ']');
                        }
                        continue;
                    }
                    assert part != null;
                    boolean last = supplyMsg.last().containsKey(p);
                    if (part.state() == MOVING) {
                        boolean reserved = part.reserve();
                        assert reserved : "Failed to reserve partition [igniteInstanceName=" + ctx.igniteInstanceName() + ", grp=" + grp.cacheOrGroupName() + ", part=" + part + ']';
                        part.beforeApplyBatch(last);
                        try {
                            long[] byteRcv = { 0 };
                            GridIterableAdapter<GridCacheEntryInfo> infosWrap = new GridIterableAdapter<>(new IteratorWrapper<GridCacheEntryInfo>(e.getValue().infos().iterator()) {

                                /**
                                 * {@inheritDoc}
                                 */
                                @Override
                                public GridCacheEntryInfo nextX() throws IgniteCheckedException {
                                    GridCacheEntryInfo i = super.nextX();
                                    byteRcv[0] += i.marshalledSize(ctx.cacheObjectContext(i.cacheId()));
                                    return i;
                                }
                            });
                            try {
                                if (grp.mvccEnabled())
                                    mvccPreloadEntries(topVer, node, p, infosWrap);
                                else {
                                    preloadEntries(topVer, part, infosWrap);
                                    rebalanceFut.onReceivedKeys(p, e.getValue().infos().size(), node);
                                }
                            } catch (GridDhtInvalidPartitionException ignored) {
                                if (log.isDebugEnabled())
                                    log.debug("Partition became invalid during rebalancing (will ignore): " + p);
                            }
                            fut.processed.get(p).increment();
                            fut.onReceivedBytes(p, byteRcv[0], node);
                            // If message was last for this partition, then we take ownership.
                            if (last)
                                ownPartition(fut, p, nodeId, supplyMsg);
                        } finally {
                            part.release();
                        }
                    } else {
                        if (last)
                            fut.partitionDone(nodeId, p, false);
                        if (log.isDebugEnabled())
                            log.debug("Skipping rebalancing partition (state is not MOVING): " + '[' + demandRoutineInfo(nodeId, supplyMsg) + ", p=" + p + ']');
                    }
                } else {
                    fut.partitionDone(nodeId, p, false);
                    if (log.isDebugEnabled())
                        log.debug("Skipping rebalancing partition (affinity changed): " + '[' + demandRoutineInfo(nodeId, supplyMsg) + ", p=" + p + ']');
                }
            }
            // Only request partitions based on latest topology version.
            for (Integer miss : supplyMsg.missed()) {
                if (aff.get(miss).contains(ctx.localNode()))
                    fut.partitionMissed(nodeId, miss);
            }
            for (Integer miss : supplyMsg.missed()) fut.partitionDone(nodeId, miss, false);
            GridDhtPartitionDemandMessage d = new GridDhtPartitionDemandMessage(supplyMsg.rebalanceId(), supplyMsg.topologyVersion(), grp.groupId());
            d.timeout(grp.preloader().timeout());
            if (!fut.isDone()) {
                // Send demand message.
                try {
                    ctx.io().sendOrderedMessage(node, d.topic(), d.convertIfNeeded(node.version()), grp.ioPolicy(), grp.preloader().timeout());
                    if (log.isDebugEnabled())
                        log.debug("Send next demand message [" + demandRoutineInfo(nodeId, supplyMsg) + "]");
                } catch (ClusterTopologyCheckedException e) {
                    if (log.isDebugEnabled())
                        log.debug("Supplier has left [" + demandRoutineInfo(nodeId, supplyMsg) + ", errMsg=" + e.getMessage() + ']');
                }
            } else {
                if (log.isDebugEnabled())
                    log.debug("Will not request next demand message [" + demandRoutineInfo(nodeId, supplyMsg) + ", rebalanceFuture=" + fut + ']');
            }
        } catch (IgniteSpiException | IgniteCheckedException e) {
            fut.error(nodeId);
            LT.error(log, e, "Error during rebalancing [" + demandRoutineInfo(nodeId, supplyMsg) + ", err=" + e + ']');
        }
    } finally {
        fut.cancelLock.readLock().unlock();
    }
}
Also used : IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) GridFinishedFuture(org.apache.ignite.internal.util.future.GridFinishedFuture) Collectors.counting(java.util.stream.Collectors.counting) IteratorWrapper(org.apache.ignite.internal.util.lang.GridIterableAdapter.IteratorWrapper) CacheRebalanceMode(org.apache.ignite.cache.CacheRebalanceMode) EVT_CACHE_REBALANCE_STARTED(org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_STARTED) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) EVT_CACHE_REBALANCE_STOPPED(org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_STOPPED) EVT_CACHE_REBALANCE_PART_LOADED(org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_PART_LOADED) MetricUtils.metricName(org.apache.ignite.internal.processors.metric.impl.MetricUtils.metricName) Collectors.toSet(java.util.stream.Collectors.toSet) CACHE_GROUP_METRICS_PREFIX(org.apache.ignite.internal.processors.cache.CacheGroupMetricsImpl.CACHE_GROUP_METRICS_PREFIX) GridCacheEntryInfo(org.apache.ignite.internal.processors.cache.GridCacheEntryInfo) IgniteInClosure(org.apache.ignite.lang.IgniteInClosure) AtomicReferenceFieldUpdater(java.util.concurrent.atomic.AtomicReferenceFieldUpdater) GridToStringExclude(org.apache.ignite.internal.util.tostring.GridToStringExclude) Collection(java.util.Collection) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) DR_PRELOAD(org.apache.ignite.internal.processors.dr.GridDrType.DR_PRELOAD) Set(java.util.Set) NavigableSet(java.util.NavigableSet) UUID(java.util.UUID) GridCacheEntryRemovedException(org.apache.ignite.internal.processors.cache.GridCacheEntryRemovedException) CacheDataRow(org.apache.ignite.internal.processors.cache.persistence.CacheDataRow) Collectors(java.util.stream.Collectors) Nullable(org.jetbrains.annotations.Nullable) List(java.util.List) IgniteConfiguration(org.apache.ignite.configuration.IgniteConfiguration) Stream(java.util.stream.Stream) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) CU(org.apache.ignite.internal.util.typedef.internal.CU) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) IntHashMap(org.apache.ignite.internal.util.collection.IntHashMap) Objects.nonNull(java.util.Objects.nonNull) GridCacheEntryEx(org.apache.ignite.internal.processors.cache.GridCacheEntryEx) EVT_CACHE_REBALANCE_OBJECT_LOADED(org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_OBJECT_LOADED) GridCachePartitionExchangeManager(org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager) LongAdder(java.util.concurrent.atomic.LongAdder) DR_NONE(org.apache.ignite.internal.processors.dr.GridDrType.DR_NONE) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) CacheEntryInfoCollection(org.apache.ignite.internal.processors.cache.CacheEntryInfoCollection) CheckpointProgress(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointProgress) GridCompoundFuture(org.apache.ignite.internal.util.future.GridCompoundFuture) IgnitePredicateX(org.apache.ignite.internal.util.lang.IgnitePredicateX) U(org.apache.ignite.internal.util.typedef.internal.U) HashMap(java.util.HashMap) IgniteLogger(org.apache.ignite.IgniteLogger) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) AtomicReference(java.util.concurrent.atomic.AtomicReference) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) LT(org.apache.ignite.internal.util.typedef.internal.LT) ArrayList(java.util.ArrayList) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) HashSet(java.util.HashSet) ClusterNode(org.apache.ignite.cluster.ClusterNode) CI1(org.apache.ignite.internal.util.typedef.CI1) GridTimeoutObjectAdapter(org.apache.ignite.internal.processors.timeout.GridTimeoutObjectAdapter) S(org.apache.ignite.internal.util.typedef.internal.S) MOVING(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.MOVING) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) PAGE_SNAPSHOT_TAKEN(org.apache.ignite.internal.processors.cache.persistence.CheckpointState.PAGE_SNAPSHOT_TAKEN) FINISHED(org.apache.ignite.internal.processors.cache.persistence.CheckpointState.FINISHED) F(org.apache.ignite.internal.util.typedef.F) GridIterableAdapter(org.apache.ignite.internal.util.lang.GridIterableAdapter) Iterator(java.util.Iterator) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) TTL_ETERNAL(org.apache.ignite.internal.processors.cache.GridCacheUtils.TTL_ETERNAL) GridMutableLong(org.apache.ignite.internal.util.GridMutableLong) GridDhtInvalidPartitionException(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtInvalidPartitionException) MetricRegistry(org.apache.ignite.internal.processors.metric.MetricRegistry) GridToStringInclude(org.apache.ignite.internal.util.tostring.GridToStringInclude) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) GridCacheSharedContext(org.apache.ignite.internal.processors.cache.GridCacheSharedContext) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) WalStateManager(org.apache.ignite.internal.processors.cache.WalStateManager) GridPlainRunnable(org.apache.ignite.internal.util.lang.GridPlainRunnable) CacheMetricsImpl(org.apache.ignite.internal.processors.cache.CacheMetricsImpl) GridCacheMvccEntryInfo(org.apache.ignite.internal.processors.cache.GridCacheMvccEntryInfo) Collections(java.util.Collections) TxState(org.apache.ignite.internal.processors.cache.mvcc.txlog.TxState) PRELOAD_SIZE_UNDER_CHECKPOINT_LOCK(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.PRELOAD_SIZE_UNDER_CHECKPOINT_LOCK) AffinityAssignment(org.apache.ignite.internal.processors.affinity.AffinityAssignment) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) CacheEntryInfoCollection(org.apache.ignite.internal.processors.cache.CacheEntryInfoCollection) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) ClusterNode(org.apache.ignite.cluster.ClusterNode) GridCacheEntryInfo(org.apache.ignite.internal.processors.cache.GridCacheEntryInfo) GridDhtInvalidPartitionException(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtInvalidPartitionException) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) GridIterableAdapter(org.apache.ignite.internal.util.lang.GridIterableAdapter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IntHashMap(org.apache.ignite.internal.util.collection.IntHashMap) HashMap(java.util.HashMap) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 15 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridDhtPartitionSupplier method handleDemandMessage.

/**
 * For each demand message method lookups (or creates new) supply context and starts to iterate entries across requested partitions.
 * Each entry in iterator is placed to prepared supply message.
 *
 * If supply message size in bytes becomes greater than {@link IgniteConfiguration#getRebalanceBatchSize()}
 * method sends this message to demand node and saves partial state of iterated entries to supply context,
 * then restores the context again after new demand message with the same context id is arrived.
 *
 * @param topicId Id of the topic is used for the supply-demand communication.
 * @param nodeId Id of the node which sent the demand message.
 * @param demandMsg Demand message.
 */
public void handleDemandMessage(int topicId, UUID nodeId, GridDhtPartitionDemandMessage demandMsg) {
    assert demandMsg != null;
    assert nodeId != null;
    T3<UUID, Integer, AffinityTopologyVersion> contextId = new T3<>(nodeId, topicId, demandMsg.topologyVersion());
    if (demandMsg.rebalanceId() < 0) {
        // Demand node requested context cleanup.
        synchronized (scMap) {
            SupplyContext sctx = scMap.get(contextId);
            if (sctx != null && sctx.rebalanceId == -demandMsg.rebalanceId()) {
                clearContext(scMap.remove(contextId), log);
                if (log.isDebugEnabled())
                    log.debug("Supply context cleaned [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
            } else {
                if (log.isDebugEnabled())
                    log.debug("Stale supply context cleanup message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
            }
            return;
        }
    }
    ClusterNode demanderNode = grp.shared().discovery().node(nodeId);
    if (demanderNode == null) {
        if (log.isDebugEnabled())
            log.debug("Demand message rejected (demander left cluster) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
        return;
    }
    IgniteRebalanceIterator iter = null;
    SupplyContext sctx = null;
    Set<Integer> remainingParts = null;
    GridDhtPartitionSupplyMessage supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
    try {
        synchronized (scMap) {
            sctx = scMap.remove(contextId);
            if (sctx != null && demandMsg.rebalanceId() < sctx.rebalanceId) {
                // Stale message, return context back and return.
                scMap.put(contextId, sctx);
                if (log.isDebugEnabled())
                    log.debug("Stale demand message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", actualContext=" + sctx + "]");
                return;
            }
        }
        // Demand request should not contain empty partitions if no supply context is associated with it.
        if (sctx == null && (demandMsg.partitions() == null || demandMsg.partitions().isEmpty())) {
            if (log.isDebugEnabled())
                log.debug("Empty demand message (no context and partitions) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
            return;
        }
        if (log.isDebugEnabled())
            log.debug("Demand message accepted [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
        assert !(sctx != null && !demandMsg.partitions().isEmpty());
        long maxBatchesCnt = /* Each thread should gain prefetched batches. */
        grp.preloader().batchesPrefetchCount() * grp.shared().gridConfig().getRebalanceThreadPoolSize();
        if (sctx == null) {
            if (log.isDebugEnabled())
                log.debug("Starting supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", fullPartitions=" + S.compact(demandMsg.partitions().fullSet()) + ", histPartitions=" + S.compact(demandMsg.partitions().historicalSet()) + "]");
        } else
            maxBatchesCnt = 1;
        if (sctx == null || sctx.iterator == null) {
            remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
            CachePartitionPartialCountersMap histMap = demandMsg.partitions().historicalMap();
            for (int i = 0; i < histMap.size(); i++) {
                int p = histMap.partitionAt(i);
                remainingParts.add(p);
            }
            iter = grp.offheap().rebalanceIterator(demandMsg.partitions(), demandMsg.topologyVersion());
            for (Integer part : demandMsg.partitions().fullSet()) {
                if (iter.isPartitionMissing(part))
                    continue;
                GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
                assert loc != null && loc.state() == GridDhtPartitionState.OWNING : "Partition should be in OWNING state: " + loc;
                supplyMsg.addEstimatedKeysCount(loc.dataStore().fullSize());
            }
            for (int i = 0; i < histMap.size(); i++) {
                int p = histMap.partitionAt(i);
                if (iter.isPartitionMissing(p))
                    continue;
                supplyMsg.addEstimatedKeysCount(histMap.updateCounterAt(i) - histMap.initialUpdateCounterAt(i));
            }
        } else {
            iter = sctx.iterator;
            remainingParts = sctx.remainingParts;
        }
        final int msgMaxSize = grp.preloader().batchSize();
        long batchesCnt = 0;
        CacheDataRow prevRow = null;
        while (iter.hasNext()) {
            CacheDataRow row = iter.peek();
            // Prevent mvcc entry history splitting into separate batches.
            boolean canFlushHistory = !grp.mvccEnabled() || prevRow != null && ((grp.sharedGroup() && row.cacheId() != prevRow.cacheId()) || !row.key().equals(prevRow.key()));
            if (canFlushHistory && supplyMsg.messageSize() >= msgMaxSize) {
                if (++batchesCnt >= maxBatchesCnt) {
                    saveSupplyContext(contextId, iter, remainingParts, demandMsg.rebalanceId());
                    reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
                    return;
                } else {
                    if (!reply(topicId, demanderNode, demandMsg, supplyMsg, contextId))
                        return;
                    supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
                }
            }
            row = iter.next();
            prevRow = row;
            int part = row.partition();
            GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
            assert (loc != null && loc.state() == OWNING && loc.reservations() > 0) || iter.isPartitionMissing(part) : "Partition should be in OWNING state and has at least 1 reservation " + loc;
            if (iter.isPartitionMissing(part) && remainingParts.contains(part)) {
                supplyMsg.missed(part);
                remainingParts.remove(part);
                if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
                    grp.addRebalanceMissEvent(part);
                if (log.isDebugEnabled())
                    log.debug("Requested partition is marked as missing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", p=" + part + "]");
                continue;
            }
            if (!remainingParts.contains(part))
                continue;
            GridCacheEntryInfo info = extractEntryInfo(row);
            if (info == null)
                continue;
            supplyMsg.addEntry0(part, iter.historical(part), info, grp.shared(), grp.cacheObjectContext());
            if (iter.isPartitionDone(part)) {
                supplyMsg.last(part, loc.updateCounter());
                remainingParts.remove(part);
                if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
                    grp.addRebalanceSupplyEvent(part);
            }
        }
        Iterator<Integer> remainingIter = remainingParts.iterator();
        while (remainingIter.hasNext()) {
            int p = remainingIter.next();
            if (iter.isPartitionDone(p)) {
                GridDhtLocalPartition loc = top.localPartition(p, demandMsg.topologyVersion(), false);
                assert loc != null : "Supply partition is gone: grp=" + grp.cacheOrGroupName() + ", p=" + p;
                supplyMsg.last(p, loc.updateCounter());
                remainingIter.remove();
                if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
                    grp.addRebalanceSupplyEvent(p);
            } else if (iter.isPartitionMissing(p)) {
                supplyMsg.missed(p);
                remainingIter.remove();
                if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
                    grp.addRebalanceMissEvent(p);
            }
        }
        assert remainingParts.isEmpty() : "Partitions after rebalance should be either done or missing: " + remainingParts;
        if (sctx != null)
            clearContext(sctx, log);
        else
            iter.close();
        reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
        if (log.isInfoEnabled())
            log.info("Finished supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
    } catch (Throwable t) {
        if (iter != null && !iter.isClosed()) {
            try {
                iter.close();
            } catch (IgniteCheckedException e) {
                t.addSuppressed(e);
            }
        }
        if (grp.shared().kernalContext().isStopping())
            return;
        // Sending supply messages with error requires new protocol.
        boolean sendErrMsg = demanderNode.version().compareTo(GridDhtPartitionSupplyMessageV2.AVAILABLE_SINCE) >= 0;
        if (t instanceof IgniteSpiException) {
            if (log.isDebugEnabled())
                log.debug("Failed to send message to node (current node is stopping?) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", msg=" + t.getMessage() + ']');
            sendErrMsg = false;
        } else
            U.error(log, "Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t);
        try {
            if (sctx != null)
                clearContext(sctx, log);
        } catch (Throwable t1) {
            U.error(log, "Failed to cleanup supplying context [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
        }
        if (!sendErrMsg)
            return;
        boolean fallbackToFullRebalance = X.hasCause(t, IgniteHistoricalIteratorException.class);
        try {
            GridDhtPartitionSupplyMessage errMsg;
            if (fallbackToFullRebalance) {
                // Mark the last checkpoint as not applicable for WAL rebalance.
                grp.shared().database().lastCheckpointInapplicableForWalRebalance(grp.groupId());
                // Mark all remaining partitions as missed to trigger full rebalance.
                if (iter == null && F.isEmpty(remainingParts)) {
                    remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
                    remainingParts.addAll(demandMsg.partitions().historicalSet());
                }
                for (int p : Optional.ofNullable(remainingParts).orElseGet(Collections::emptySet)) supplyMsg.missed(p);
                errMsg = supplyMsg;
            } else {
                errMsg = new GridDhtPartitionSupplyMessageV2(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled(), t);
            }
            reply(topicId, demanderNode, demandMsg, errMsg, contextId);
        } catch (Throwable t1) {
            U.error(log, "Failed to send supply error message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
        }
        // instead of triggering failure handler.
        if (!fallbackToFullRebalance) {
            grp.shared().kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, new IgniteCheckedException("Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t)));
        }
    }
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) FailureContext(org.apache.ignite.failure.FailureContext) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) UUID(java.util.UUID) T3(org.apache.ignite.internal.util.typedef.T3) HashSet(java.util.HashSet) ClusterNode(org.apache.ignite.cluster.ClusterNode) CacheDataRow(org.apache.ignite.internal.processors.cache.persistence.CacheDataRow) GridCacheEntryInfo(org.apache.ignite.internal.processors.cache.GridCacheEntryInfo) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) IgniteRebalanceIterator(org.apache.ignite.internal.processors.cache.IgniteRebalanceIterator)

Aggregations

GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)95 GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)21 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)19 IgniteEx (org.apache.ignite.internal.IgniteEx)19 CacheGroupContext (org.apache.ignite.internal.processors.cache.CacheGroupContext)19 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 Test (org.junit.Test)18 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)16 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)16 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)15 ClusterNode (org.apache.ignite.cluster.ClusterNode)15 GridCacheContext (org.apache.ignite.internal.processors.cache.GridCacheContext)15 HashMap (java.util.HashMap)14 HashSet (java.util.HashSet)13 AtomicLong (java.util.concurrent.atomic.AtomicLong)13 CacheDataRow (org.apache.ignite.internal.processors.cache.persistence.CacheDataRow)13 Ignite (org.apache.ignite.Ignite)12 KeyCacheObject (org.apache.ignite.internal.processors.cache.KeyCacheObject)12 IgniteException (org.apache.ignite.IgniteException)11