Search in sources :

Example 1 with GridDhtPartitionState

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.

the class CacheAffinitySharedManager method initAffinityBasedOnPartitionsAvailability.

/**
 * Initializes current affinity assignment based on partitions availability. Nodes that have most recent data will
 * be considered affinity nodes.
 *
 * @param topVer Topology version.
 * @param fut Exchange future.
 * @param c Closure converting affinity diff.
 * @param initAff {@code True} if need initialize affinity.
 * @return Affinity assignment for each of registered cache group.
 */
private <T> Map<Integer, Map<Integer, List<T>>> initAffinityBasedOnPartitionsAvailability(final AffinityTopologyVersion topVer, final GridDhtPartitionsExchangeFuture fut, final IgniteClosure<ClusterNode, T> c, final boolean initAff) {
    final boolean enforcedCentralizedAssignment = DiscoveryCustomEvent.requiresCentralizedAffinityAssignment(fut.firstEvent());
    final WaitRebalanceInfo waitRebalanceInfo = enforcedCentralizedAssignment ? new WaitRebalanceInfo(fut.exchangeId().topologyVersion()) : new WaitRebalanceInfo(fut.context().events().lastServerEventVersion());
    final Collection<ClusterNode> evtNodes = fut.context().events().discoveryCache().serverNodes();
    final Map<Integer, Map<Integer, List<T>>> assignment = new ConcurrentHashMap<>();
    forAllRegisteredCacheGroups(new IgniteInClosureX<CacheGroupDescriptor>() {

        @Override
        public void applyx(CacheGroupDescriptor desc) throws IgniteCheckedException {
            CacheGroupHolder grpHolder = getOrCreateGroupHolder(topVer, desc);
            if (!grpHolder.rebalanceEnabled || (fut.cacheGroupAddedOnExchange(desc.groupId(), desc.receivedFrom()) && !enforcedCentralizedAssignment))
                return;
            AffinityTopologyVersion affTopVer = grpHolder.affinity().lastVersion();
            assert (affTopVer.topologyVersion() > 0 && !affTopVer.equals(topVer)) || enforcedCentralizedAssignment : "Invalid affinity version [last=" + affTopVer + ", futVer=" + topVer + ", grp=" + desc.cacheOrGroupName() + ']';
            List<List<ClusterNode>> curAssignment = grpHolder.affinity().assignments(affTopVer);
            List<List<ClusterNode>> newAssignment = grpHolder.affinity().idealAssignmentRaw();
            assert newAssignment != null;
            List<List<ClusterNode>> newAssignment0 = initAff ? new ArrayList<>(newAssignment) : null;
            GridDhtPartitionTopology top = grpHolder.topology(fut.context().events().discoveryCache());
            Map<Integer, List<T>> cacheAssignment = null;
            for (int p = 0; p < newAssignment.size(); p++) {
                List<ClusterNode> newNodes = newAssignment.get(p);
                List<ClusterNode> curNodes = curAssignment.get(p);
                assert evtNodes.containsAll(newNodes) : "Invalid new assignment [grp=" + grpHolder.aff.cacheOrGroupName() + ", nodes=" + newNodes + ", topVer=" + fut.context().events().discoveryCache().version() + ", evts=" + fut.context().events().events() + "]";
                ClusterNode curPrimary = !curNodes.isEmpty() ? curNodes.get(0) : null;
                ClusterNode newPrimary = !newNodes.isEmpty() ? newNodes.get(0) : null;
                List<ClusterNode> newNodes0 = null;
                assert newPrimary == null || evtNodes.contains(newPrimary) : "Invalid new primary [" + "grp=" + desc.cacheOrGroupName() + ", node=" + newPrimary + ", topVer=" + topVer + ']';
                List<ClusterNode> owners = top.owners(p, topVer);
                // It is essential that curPrimary node has partition in OWNING state.
                if (!owners.isEmpty() && !owners.contains(curPrimary))
                    curPrimary = owners.get(0);
                // If new assignment is empty preserve current ownership for alive nodes.
                if (curPrimary != null && newPrimary == null) {
                    newNodes0 = new ArrayList<>(curNodes.size());
                    for (ClusterNode node : curNodes) {
                        if (evtNodes.contains(node))
                            newNodes0.add(node);
                    }
                } else if (curPrimary != null && !curPrimary.equals(newPrimary)) {
                    GridDhtPartitionState state = top.partitionState(newPrimary.id(), p);
                    if (evtNodes.contains(curPrimary)) {
                        if (state != OWNING) {
                            newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, curPrimary, newNodes, waitRebalanceInfo);
                        }
                    } else {
                        if (state != OWNING) {
                            for (int i = 1; i < curNodes.size(); i++) {
                                ClusterNode curNode = curNodes.get(i);
                                if (top.partitionState(curNode.id(), p) == OWNING && evtNodes.contains(curNode)) {
                                    newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, curNode, newNodes, waitRebalanceInfo);
                                    break;
                                }
                            }
                            if (newNodes0 == null) {
                                for (ClusterNode owner : owners) {
                                    if (evtNodes.contains(owner)) {
                                        newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, owner, newNodes, waitRebalanceInfo);
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
                // This will happen if no primary has changed but some backups still need to be rebalanced.
                if (!owners.isEmpty() && !owners.containsAll(newNodes) && !top.lostPartitions().contains(p))
                    waitRebalanceInfo.add(grpHolder.groupId(), p, newNodes);
                if (newNodes0 != null) {
                    assert evtNodes.containsAll(newNodes0) : "Invalid late assignment [grp=" + grpHolder.aff.cacheOrGroupName() + ", nodes=" + newNodes + ", topVer=" + fut.context().events().discoveryCache().version() + ", evts=" + fut.context().events().events() + "]";
                    if (newAssignment0 != null)
                        newAssignment0.set(p, newNodes0);
                    if (cacheAssignment == null)
                        cacheAssignment = new HashMap<>();
                    List<T> n = new ArrayList<>(newNodes0.size());
                    for (int i = 0; i < newNodes0.size(); i++) n.add(c.apply(newNodes0.get(i)));
                    cacheAssignment.put(p, n);
                }
            }
            if (cacheAssignment != null)
                assignment.put(grpHolder.groupId(), cacheAssignment);
            if (initAff)
                grpHolder.affinity().initialize(topVer, newAssignment0);
            fut.timeBag().finishLocalStage("Affinity recalculation (partitions availability) " + "[grp=" + desc.cacheOrGroupName() + "]");
        }
    });
    if (log.isDebugEnabled()) {
        log.debug("Computed new affinity after node left [topVer=" + topVer + ", waitGrps=" + groupNames(waitRebalanceInfo.waitGrps.keySet()) + ']');
    }
    synchronized (mux) {
        waitInfo = !waitRebalanceInfo.empty() ? waitRebalanceInfo : null;
    }
    return assignment;
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) ArrayList(java.util.ArrayList) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) EVT_NODE_LEFT(org.apache.ignite.events.EventType.EVT_NODE_LEFT) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) GridLongList(org.apache.ignite.internal.util.GridLongList) List(java.util.List) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) GridPartitionStateMap(org.apache.ignite.internal.util.GridPartitionStateMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) LinkedHashMap(java.util.LinkedHashMap) GridDhtPartitionFullMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap) GridDhtPartitionMap(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)

Example 2 with GridDhtPartitionState

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.

the class CacheMetricsImpl method getEntriesStat.

/**
 * Calculates entries count/partitions count metrics using one iteration over local partitions for all metrics
 */
public EntriesStatMetrics getEntriesStat() {
    int owningPartCnt = 0;
    int movingPartCnt = 0;
    long offHeapEntriesCnt = 0L;
    long offHeapPrimaryEntriesCnt = 0L;
    long offHeapBackupEntriesCnt = 0L;
    long heapEntriesCnt = 0L;
    int size = 0;
    long sizeLong = 0L;
    boolean isEmpty;
    try {
        AffinityTopologyVersion topVer = cctx.affinity().affinityTopologyVersion();
        if (AffinityTopologyVersion.NONE.equals(topVer))
            return unknownEntriesStat();
        final GridCacheAdapter<?, ?> cache = cctx.cache();
        if (cache != null) {
            offHeapEntriesCnt = cache.offHeapEntriesCount();
            size = cache.localSize(null);
            sizeLong = cache.localSizeLong(null);
        }
        if (cctx.isLocal()) {
            if (cache != null) {
                offHeapPrimaryEntriesCnt = offHeapEntriesCnt;
                heapEntriesCnt = cache.sizeLong();
            }
        } else {
            IntSet primaries = ImmutableIntSet.wrap(cctx.affinity().primaryPartitions(cctx.localNodeId(), topVer));
            IntSet backups = ImmutableIntSet.wrap(cctx.affinity().backupPartitions(cctx.localNodeId(), topVer));
            if (cctx.isNear() && cache != null)
                heapEntriesCnt = cache.nearSize();
            for (GridDhtLocalPartition part : cctx.topology().currentLocalPartitions()) {
                // Partitions count.
                GridDhtPartitionState partState = part.state();
                if (partState == GridDhtPartitionState.OWNING)
                    owningPartCnt++;
                if (partState == GridDhtPartitionState.MOVING)
                    movingPartCnt++;
                // Offheap entries count
                if (cache == null)
                    continue;
                long cacheSize = part.dataStore().cacheSize(cctx.cacheId());
                if (primaries.contains(part.id()))
                    offHeapPrimaryEntriesCnt += cacheSize;
                else if (backups.contains(part.id()))
                    offHeapBackupEntriesCnt += cacheSize;
                heapEntriesCnt += part.publicSize(cctx.cacheId());
            }
        }
    } catch (Exception e) {
        return unknownEntriesStat();
    }
    isEmpty = (offHeapEntriesCnt == 0);
    EntriesStatMetrics stat = new EntriesStatMetrics();
    stat.offHeapEntriesCount(offHeapEntriesCnt);
    stat.offHeapPrimaryEntriesCount(offHeapPrimaryEntriesCnt);
    stat.offHeapBackupEntriesCount(offHeapBackupEntriesCnt);
    stat.heapEntriesCount(heapEntriesCnt);
    stat.size(size);
    stat.cacheSize(sizeLong);
    stat.keySize(size);
    stat.isEmpty(isEmpty);
    stat.totalPartitionsCount(owningPartCnt + movingPartCnt);
    stat.rebalancingPartitionsCount(movingPartCnt);
    return stat;
}
Also used : AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) IntSet(org.apache.ignite.internal.util.collection.IntSet) ImmutableIntSet(org.apache.ignite.internal.util.collection.ImmutableIntSet) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)

Example 3 with GridDhtPartitionState

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method assignPartitionStates.

/**
 * Collects and determines new owners of partitions for all nodes for given {@code top}.
 *
 * @param top Topology to assign.
 * @param resetOwners True if need to reset partition state considering of counter, false otherwise.
 * @return Partitions supply info list.
 */
private List<SupplyPartitionInfo> assignPartitionStates(GridDhtPartitionTopology top, boolean resetOwners) {
    Map<Integer, CounterWithNodes> maxCntrs = new HashMap<>();
    Map<Integer, TreeSet<Long>> varCntrs = new HashMap<>();
    for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : msgs.entrySet()) {
        CachePartitionPartialCountersMap nodeCntrs = e.getValue().partitionUpdateCounters(top.groupId(), top.partitions());
        assert nodeCntrs != null;
        for (int i = 0; i < nodeCntrs.size(); i++) {
            int p = nodeCntrs.partitionAt(i);
            UUID remoteNodeId = e.getKey();
            GridDhtPartitionState state = top.partitionState(remoteNodeId, p);
            if (state != GridDhtPartitionState.OWNING && state != GridDhtPartitionState.MOVING)
                continue;
            long cntr = state == GridDhtPartitionState.MOVING ? nodeCntrs.initialUpdateCounterAt(i) : nodeCntrs.updateCounterAt(i);
            varCntrs.computeIfAbsent(p, key -> new TreeSet<>()).add(cntr);
            if (state != GridDhtPartitionState.OWNING)
                continue;
            CounterWithNodes maxCntr = maxCntrs.get(p);
            if (maxCntr == null || cntr > maxCntr.cnt)
                maxCntrs.put(p, new CounterWithNodes(cntr, e.getValue().partitionSizes(top.groupId()).get(p), remoteNodeId));
            else if (cntr == maxCntr.cnt)
                maxCntr.nodes.add(remoteNodeId);
        }
    }
    // Also must process counters from the local node.
    for (GridDhtLocalPartition part : top.currentLocalPartitions()) {
        GridDhtPartitionState state = top.partitionState(cctx.localNodeId(), part.id());
        if (state != GridDhtPartitionState.OWNING && state != GridDhtPartitionState.MOVING)
            continue;
        final long cntr = state == GridDhtPartitionState.MOVING ? part.initialUpdateCounter() : part.updateCounter();
        varCntrs.computeIfAbsent(part.id(), key -> new TreeSet<>()).add(cntr);
        if (state != GridDhtPartitionState.OWNING)
            continue;
        CounterWithNodes maxCntr = maxCntrs.get(part.id());
        if (maxCntr == null && cntr == 0) {
            CounterWithNodes cntrObj = new CounterWithNodes(0, 0L, cctx.localNodeId());
            for (UUID nodeId : msgs.keySet()) {
                if (top.partitionState(nodeId, part.id()) == GridDhtPartitionState.OWNING)
                    cntrObj.nodes.add(nodeId);
            }
            maxCntrs.put(part.id(), cntrObj);
        } else if (maxCntr == null || cntr > maxCntr.cnt)
            maxCntrs.put(part.id(), new CounterWithNodes(cntr, part.fullSize(), cctx.localNodeId()));
        else if (cntr == maxCntr.cnt)
            maxCntr.nodes.add(cctx.localNodeId());
    }
    Set<Integer> haveHistory = new HashSet<>();
    List<SupplyPartitionInfo> list = assignHistoricalSuppliers(top, maxCntrs, varCntrs, haveHistory);
    if (resetOwners)
        resetOwnersByCounter(top, maxCntrs, haveHistory);
    return list;
}
Also used : GridCacheMvccCandidate(org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) EVT_NODE_LEFT(org.apache.ignite.events.EventType.EVT_NODE_LEFT) NoopSpan(org.apache.ignite.internal.processors.tracing.NoopSpan) Map(java.util.Map) GridIoPolicy(org.apache.ignite.internal.managers.communication.GridIoPolicy) ExchangeContext(org.apache.ignite.internal.processors.cache.ExchangeContext) BaselineTopology(org.apache.ignite.internal.processors.cluster.BaselineTopology) Latch(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) IgniteInClosure(org.apache.ignite.lang.IgniteInClosure) GridToStringExclude(org.apache.ignite.internal.util.tostring.GridToStringExclude) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) CacheGroupDescriptor(org.apache.ignite.internal.processors.cache.CacheGroupDescriptor) Set(java.util.Set) ChangeGlobalStateFinishMessage(org.apache.ignite.internal.processors.cluster.ChangeGlobalStateFinishMessage) IgniteRunnable(org.apache.ignite.lang.IgniteRunnable) IGNITE_PARTITION_RELEASE_FUTURE_DUMP_THRESHOLD(org.apache.ignite.IgniteSystemProperties.IGNITE_PARTITION_RELEASE_FUTURE_DUMP_THRESHOLD) CountDownLatch(java.util.concurrent.CountDownLatch) SnapshotDiscoveryMessage(org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage) EternalExpiryPolicy(javax.cache.expiry.EternalExpiryPolicy) IgniteConfiguration(org.apache.ignite.configuration.IgniteConfiguration) Stream(java.util.stream.Stream) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT(org.apache.ignite.IgniteSystemProperties.IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT) SecurityContext(org.apache.ignite.internal.processors.security.SecurityContext) ExchangeRecord(org.apache.ignite.internal.pagemem.wal.record.ExchangeRecord) SecurityUtils.remoteSecurityContext(org.apache.ignite.internal.processors.security.SecurityUtils.remoteSecurityContext) ClusterState(org.apache.ignite.cluster.ClusterState) U(org.apache.ignite.internal.util.typedef.internal.U) EVT_DISCOVERY_CUSTOM_EVT(org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT) IgniteLogger(org.apache.ignite.IgniteLogger) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) GridKernalContext(org.apache.ignite.internal.GridKernalContext) LinkedHashMap(java.util.LinkedHashMap) GridCacheDatabaseSharedManager(org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager) ClusterNode(org.apache.ignite.cluster.ClusterNode) CI1(org.apache.ignite.internal.util.typedef.CI1) DiscoveryCustomMessage(org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage) S(org.apache.ignite.internal.util.typedef.internal.S) IgniteDiagnosticAware(org.apache.ignite.internal.IgniteDiagnosticAware) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) Stream.concat(java.util.stream.Stream.concat) LinkedHashSet(java.util.LinkedHashSet) DynamicCacheChangeFailureMessage(org.apache.ignite.internal.processors.cache.DynamicCacheChangeFailureMessage) IOException(java.io.IOException) T2(org.apache.ignite.internal.util.typedef.T2) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) Lock(java.util.concurrent.locks.Lock) GridCacheSharedContext(org.apache.ignite.internal.processors.cache.GridCacheSharedContext) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) DatabaseLifecycleListener(org.apache.ignite.internal.processors.cache.persistence.DatabaseLifecycleListener) GridCacheProcessor(org.apache.ignite.internal.processors.cache.GridCacheProcessor) ATTR_DYNAMIC_CACHE_START_ROLLBACK_SUPPORTED(org.apache.ignite.internal.IgniteNodeAttributes.ATTR_DYNAMIC_CACHE_START_ROLLBACK_SUPPORTED) CacheMode(org.apache.ignite.cache.CacheMode) DynamicCacheChangeBatch(org.apache.ignite.internal.processors.cache.DynamicCacheChangeBatch) DynamicCacheChangeRequest(org.apache.ignite.internal.processors.cache.DynamicCacheChangeRequest) ExchangeDiscoveryEvents.serverLeftEvent(org.apache.ignite.internal.processors.cache.ExchangeDiscoveryEvents.serverLeftEvent) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) DiscoveryCustomEvent(org.apache.ignite.internal.events.DiscoveryCustomEvent) IgniteSystemProperties.getBoolean(org.apache.ignite.IgniteSystemProperties.getBoolean) TimeBag(org.apache.ignite.internal.util.TimeBag) StateChangeRequest(org.apache.ignite.internal.processors.cache.StateChangeRequest) GridDhtPartitionsStateValidator(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionsStateValidator) SYSTEM_POOL(org.apache.ignite.internal.managers.communication.GridIoPolicy.SYSTEM_POOL) IgniteChangeGlobalStateSupport(org.apache.ignite.internal.processors.cluster.IgniteChangeGlobalStateSupport) CacheRebalanceMode(org.apache.ignite.cache.CacheRebalanceMode) IgniteSystemProperties(org.apache.ignite.IgniteSystemProperties) X(org.apache.ignite.internal.util.typedef.X) GridPlainCallable(org.apache.ignite.internal.util.lang.GridPlainCallable) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) FailureType(org.apache.ignite.failure.FailureType) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) Collection(java.util.Collection) IgniteException(org.apache.ignite.IgniteException) SpanTags(org.apache.ignite.internal.processors.tracing.SpanTags) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IgniteSnapshotManager.isSnapshotOperation(org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.isSnapshotOperation) NavigableSet(java.util.NavigableSet) IgniteDiagnosticPrepareContext(org.apache.ignite.internal.IgniteDiagnosticPrepareContext) IgniteNeedReconnectException(org.apache.ignite.internal.IgniteNeedReconnectException) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) IgniteUtils.doInParallelUninterruptibly(org.apache.ignite.internal.util.IgniteUtils.doInParallelUninterruptibly) IgniteSystemProperties.getLong(org.apache.ignite.IgniteSystemProperties.getLong) ExchangeActions(org.apache.ignite.internal.processors.cache.ExchangeActions) Nullable(org.jetbrains.annotations.Nullable) List(java.util.List) EVT_NODE_FAILED(org.apache.ignite.events.EventType.EVT_NODE_FAILED) IgniteTxKey(org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey) CU(org.apache.ignite.internal.util.typedef.internal.CU) IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT_LIMIT(org.apache.ignite.IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT_LIMIT) Optional(java.util.Optional) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) ExchangeDiscoveryEvents(org.apache.ignite.internal.processors.cache.ExchangeDiscoveryEvents) IgniteProductVersion(org.apache.ignite.lang.IgniteProductVersion) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) GridCacheAdapter(org.apache.ignite.internal.processors.cache.GridCacheAdapter) Span(org.apache.ignite.internal.processors.tracing.Span) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) GridCacheVersion(org.apache.ignite.internal.processors.cache.version.GridCacheVersion) HashMap(java.util.HashMap) WalStateAbstractMessage(org.apache.ignite.internal.processors.cache.WalStateAbstractMessage) DiscoCache(org.apache.ignite.internal.managers.discovery.DiscoCache) AtomicReference(java.util.concurrent.atomic.AtomicReference) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) GridClientPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridClientPartitionTopology) FailureContext(org.apache.ignite.failure.FailureContext) IgniteUtils.doInParallel(org.apache.ignite.internal.util.IgniteUtils.doInParallel) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) CachePartitionExchangeWorkerTask(org.apache.ignite.internal.processors.cache.CachePartitionExchangeWorkerTask) GridDhtTopologyFutureAdapter(org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTopologyFutureAdapter) F(org.apache.ignite.internal.util.typedef.F) EVT_NODE_JOINED(org.apache.ignite.events.EventType.EVT_NODE_JOINED) Collections.emptySet(java.util.Collections.emptySet) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) DynamicCacheDescriptor(org.apache.ignite.internal.processors.cache.DynamicCacheDescriptor) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) ExchangeDiscoveryEvents.serverJoinEvent(org.apache.ignite.internal.processors.cache.ExchangeDiscoveryEvents.serverJoinEvent) GridAffinityAssignmentCache(org.apache.ignite.internal.processors.affinity.GridAffinityAssignmentCache) PARTIAL_COUNTERS_MAP_SINCE(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.CachePartitionPartialCountersMap.PARTIAL_COUNTERS_MAP_SINCE) GridToStringInclude(org.apache.ignite.internal.util.tostring.GridToStringInclude) TimeUnit(java.util.concurrent.TimeUnit) ChangeGlobalStateMessage(org.apache.ignite.internal.processors.cluster.ChangeGlobalStateMessage) DiscoveryDataClusterState(org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState) GridMetricManager(org.apache.ignite.internal.processors.metric.GridMetricManager) Collections(java.util.Collections) GridCacheUtils(org.apache.ignite.internal.processors.cache.GridCacheUtils) CacheAffinityChangeMessage(org.apache.ignite.internal.processors.cache.CacheAffinityChangeMessage) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) UUID(java.util.UUID) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet)

Example 4 with GridDhtPartitionState

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.

the class GridCacheOffheapManager method saveStoreMetadata.

/**
 * @param store Store to save metadata.
 * @throws IgniteCheckedException If failed.
 */
private void saveStoreMetadata(CacheDataStore store, Context ctx, boolean beforeDestroy, boolean needSnapshot) throws IgniteCheckedException {
    RowStore rowStore0 = store.rowStore();
    if (rowStore0 != null && (partitionStatesRestored || grp.isLocal())) {
        ((CacheFreeList) rowStore0.freeList()).saveMetadata(grp.statisticsHolderData());
        PartitionMetaStorage<SimpleDataRow> partStore = store.partStorage();
        long updCntr = store.updateCounter();
        long size = store.fullSize();
        long rmvId = globalRemoveId().get();
        byte[] updCntrsBytes = store.partUpdateCounter().getBytes();
        PageMemoryEx pageMem = (PageMemoryEx) grp.dataRegion().pageMemory();
        IgniteWriteAheadLogManager wal = this.ctx.wal();
        GridEncryptionManager encMgr = this.ctx.kernalContext().encryption();
        if (size > 0 || updCntr > 0 || !store.partUpdateCounter().sequential() || (grp.config().isEncryptionEnabled() && encMgr.getEncryptionState(grp.groupId(), store.partId()) > 0)) {
            GridDhtPartitionState state = null;
            // localPartition will not acquire writeLock here because create=false.
            GridDhtLocalPartition part = null;
            if (!grp.isLocal()) {
                if (beforeDestroy)
                    state = GridDhtPartitionState.EVICTED;
                else {
                    part = getPartition(store);
                    if (part != null && part.state() != GridDhtPartitionState.EVICTED)
                        state = part.state();
                }
                // Do not save meta for evicted partitions on next checkpoints.
                if (state == null)
                    return;
            }
            int grpId = grp.groupId();
            long partMetaId = pageMem.partitionMetaPageId(grpId, store.partId());
            long partMetaPage = pageMem.acquirePage(grpId, partMetaId);
            try {
                long partMetaPageAddr = pageMem.writeLock(grpId, partMetaId, partMetaPage);
                if (partMetaPageAddr == 0L) {
                    U.warn(log, "Failed to acquire write lock for meta page [metaPage=" + partMetaPage + ", beforeDestroy=" + beforeDestroy + ", size=" + size + ", updCntr=" + updCntr + ", state=" + state + ']');
                    return;
                }
                boolean changed = false;
                try {
                    PagePartitionMetaIOV3 io = PageIO.getPageIO(partMetaPageAddr);
                    long link = io.getGapsLink(partMetaPageAddr);
                    if (updCntrsBytes == null && link != 0) {
                        partStore.removeDataRowByLink(link, grp.statisticsHolderData());
                        io.setGapsLink(partMetaPageAddr, (link = 0));
                        changed = true;
                    } else if (updCntrsBytes != null && link == 0) {
                        SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes);
                        partStore.insertDataRow(row, grp.statisticsHolderData());
                        io.setGapsLink(partMetaPageAddr, (link = row.link()));
                        changed = true;
                    } else if (updCntrsBytes != null && link != 0) {
                        byte[] prev = partStore.readRow(link);
                        assert prev != null : "Read null gaps using link=" + link;
                        if (!Arrays.equals(prev, updCntrsBytes)) {
                            partStore.removeDataRowByLink(link, grp.statisticsHolderData());
                            SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes);
                            partStore.insertDataRow(row, grp.statisticsHolderData());
                            io.setGapsLink(partMetaPageAddr, (link = row.link()));
                            changed = true;
                        }
                    }
                    if (changed)
                        partStore.saveMetadata(grp.statisticsHolderData());
                    changed |= io.setUpdateCounter(partMetaPageAddr, updCntr);
                    changed |= io.setGlobalRemoveId(partMetaPageAddr, rmvId);
                    changed |= io.setSize(partMetaPageAddr, size);
                    int encryptIdx = 0;
                    int encryptCnt = 0;
                    if (grp.config().isEncryptionEnabled()) {
                        long reencryptState = encMgr.getEncryptionState(grpId, store.partId());
                        if (reencryptState != 0) {
                            encryptIdx = ReencryptStateUtils.pageIndex(reencryptState);
                            encryptCnt = ReencryptStateUtils.pageCount(reencryptState);
                            if (encryptIdx == encryptCnt) {
                                encMgr.setEncryptionState(grp, store.partId(), 0, 0);
                                encryptIdx = encryptCnt = 0;
                            }
                            changed |= io.setEncryptedPageIndex(partMetaPageAddr, encryptIdx);
                            changed |= io.setEncryptedPageCount(partMetaPageAddr, encryptCnt);
                        }
                    }
                    if (state != null)
                        changed |= io.setPartitionState(partMetaPageAddr, (byte) state.ordinal());
                    else
                        assert grp.isLocal() : grp.cacheOrGroupName();
                    long cntrsPageId;
                    if (grp.sharedGroup()) {
                        long initCntrPageId = io.getCountersPageId(partMetaPageAddr);
                        Map<Integer, Long> newSizes = store.cacheSizes();
                        Map<Integer, Long> prevSizes = readSharedGroupCacheSizes(pageMem, grpId, initCntrPageId);
                        if (prevSizes != null && prevSizes.equals(newSizes))
                            // Preventing modification of sizes pages for store
                            cntrsPageId = initCntrPageId;
                        else {
                            cntrsPageId = writeSharedGroupCacheSizes(pageMem, grpId, initCntrPageId, store.partId(), newSizes);
                            if (initCntrPageId == 0 && cntrsPageId != 0) {
                                io.setCountersPageId(partMetaPageAddr, cntrsPageId);
                                changed = true;
                            }
                        }
                    } else
                        cntrsPageId = 0L;
                    int pageCnt;
                    if (needSnapshot) {
                        pageCnt = this.ctx.pageStore().pages(grpId, store.partId());
                        io.setCandidatePageCount(partMetaPageAddr, size == 0 ? 0 : pageCnt);
                        if (state == OWNING) {
                            assert part != null;
                            if (!addPartition(part, ctx.partitionStatMap(), partMetaPageAddr, io, grpId, store.partId(), this.ctx.pageStore().pages(grpId, store.partId()), store.fullSize()))
                                U.warn(log, "Partition was concurrently evicted grpId=" + grpId + ", partitionId=" + part.id());
                        } else if (state == MOVING || state == RENTING) {
                            if (ctx.partitionStatMap().forceSkipIndexPartition(grpId)) {
                                if (log.isInfoEnabled())
                                    log.info("Will not include SQL indexes to snapshot because there is " + "a partition not in " + OWNING + " state [grp=" + grp.cacheOrGroupName() + ", partId=" + store.partId() + ", state=" + state + ']');
                            }
                        }
                        changed = true;
                    } else
                        pageCnt = io.getCandidatePageCount(partMetaPageAddr);
                    if (changed && isWalDeltaRecordNeeded(pageMem, grpId, partMetaId, partMetaPage, wal, null))
                        wal.log(new MetaPageUpdatePartitionDataRecordV3(grpId, partMetaId, updCntr, rmvId, // TODO: Partition size may be long
                        (int) size, cntrsPageId, state == null ? -1 : (byte) state.ordinal(), pageCnt, link, encryptIdx, encryptCnt));
                    if (changed) {
                        partStore.saveMetadata(grp.statisticsHolderData());
                        io.setPartitionMetaStoreReuseListRoot(partMetaPageAddr, partStore.metaPageId());
                    }
                } finally {
                    pageMem.writeUnlock(grpId, partMetaId, partMetaPage, null, changed);
                }
            } finally {
                pageMem.releasePage(grpId, partMetaId, partMetaPage);
            }
        } else if (needSnapshot)
            tryAddEmptyPartitionToSnapshot(store, ctx);
    } else if (needSnapshot)
        tryAddEmptyPartitionToSnapshot(store, ctx);
}
Also used : GridEncryptionManager(org.apache.ignite.internal.managers.encryption.GridEncryptionManager) IgniteWriteAheadLogManager(org.apache.ignite.internal.pagemem.wal.IgniteWriteAheadLogManager) CacheFreeList(org.apache.ignite.internal.processors.cache.persistence.freelist.CacheFreeList) CacheDataRowStore(org.apache.ignite.internal.processors.cache.tree.CacheDataRowStore) PagePartitionMetaIOV3(org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIOV3) SimpleDataRow(org.apache.ignite.internal.processors.cache.persistence.freelist.SimpleDataRow) MetaPageUpdatePartitionDataRecordV3(org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV3) AtomicLong(java.util.concurrent.atomic.AtomicLong) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) PageMemoryEx(org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryEx) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)

Example 5 with GridDhtPartitionState

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.

the class GridCacheOffheapManager method updateState.

/**
 * @param part Partition to restore state for.
 * @param stateId State enum ordinal.
 */
private void updateState(GridDhtLocalPartition part, int stateId) {
    if (stateId != -1) {
        GridDhtPartitionState state = GridDhtPartitionState.fromOrdinal(stateId);
        assert state != null;
        part.restoreState(state == EVICTED ? RENTING : state);
    }
}
Also used : GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState)

Aggregations

GridDhtPartitionState (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState)25 Map (java.util.Map)13 GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)11 HashMap (java.util.HashMap)10 ArrayList (java.util.ArrayList)8 UUID (java.util.UUID)8 GridDhtPartitionMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap)8 GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)7 ClusterNode (org.apache.ignite.cluster.ClusterNode)6 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)6 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 Ignite (org.apache.ignite.Ignite)5 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)5 IgniteException (org.apache.ignite.IgniteException)5 IgniteKernal (org.apache.ignite.internal.IgniteKernal)5 GridDhtPartitionFullMap (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionFullMap)5 HashSet (java.util.HashSet)4 GridPartitionStateMap (org.apache.ignite.internal.util.GridPartitionStateMap)4 GridFutureAdapter (org.apache.ignite.internal.util.future.GridFutureAdapter)4 U (org.apache.ignite.internal.util.typedef.internal.U)4