use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method validatePartitionsState.
/**
* Validates that partition update counters and cache sizes for all caches are consistent.
*/
private void validatePartitionsState() {
try {
U.doInParallel(cctx.kernalContext().pools().getSystemExecutorService(), nonLocalCacheGroupDescriptors(), grpDesc -> {
CacheGroupContext grpCtx = cctx.cache().cacheGroup(grpDesc.groupId());
GridDhtPartitionTopology top = grpCtx != null ? grpCtx.topology() : cctx.exchange().clientTopology(grpDesc.groupId(), events().discoveryCache());
// Do not validate read or write through caches or caches with disabled rebalance
// or ExpiryPolicy is set or validation is disabled.
boolean customExpiryPlc = Optional.ofNullable(grpCtx).map(CacheGroupContext::caches).orElseGet(Collections::emptyList).stream().anyMatch(ctx -> ctx.expiry() != null && !(ctx.expiry() instanceof EternalExpiryPolicy));
if (grpCtx == null || grpCtx.config().isReadThrough() || grpCtx.config().isWriteThrough() || grpCtx.config().getCacheStoreFactory() != null || grpCtx.config().getRebalanceDelay() == -1 || grpCtx.config().getRebalanceMode() == CacheRebalanceMode.NONE || customExpiryPlc || SKIP_PARTITION_SIZE_VALIDATION)
return null;
try {
validator.validatePartitionCountersAndSizes(GridDhtPartitionsExchangeFuture.this, top, msgs);
} catch (IgniteCheckedException ex) {
log.warning(String.format(PARTITION_STATE_FAILED_MSG, grpCtx.cacheOrGroupName(), ex.getMessage()));
// TODO: Handle such errors https://issues.apache.org/jira/browse/IGNITE-7833
}
return null;
});
} catch (IgniteCheckedException e) {
throw new IgniteException("Failed to validate partitions state", e);
}
timeBag.finishGlobalStage("Validate partitions states");
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method updatePartitionSingleMap.
/**
* Updates partition map in all caches.
*
* @param nodeId Node message received from.
* @param msg Partitions single message.
*/
private void updatePartitionSingleMap(UUID nodeId, GridDhtPartitionsSingleMessage msg) {
msgs.put(nodeId, msg);
for (Map.Entry<Integer, GridDhtPartitionMap> entry : msg.partitions().entrySet()) {
Integer grpId = entry.getKey();
CacheGroupContext grp = cctx.cache().cacheGroup(grpId);
GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(grpId, events().discoveryCache());
top.update(exchId, entry.getValue(), false);
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method finishExchangeOnCoordinator.
/**
* @param sndResNodes Additional nodes to send finish message to.
*/
private void finishExchangeOnCoordinator(@Nullable Collection<ClusterNode> sndResNodes) {
if (isDone() || !enterBusy())
return;
try {
if (!F.isEmpty(exchangeGlobalExceptions) && dynamicCacheStartExchange() && isRollbackSupported()) {
sendExchangeFailureMessage();
return;
}
AffinityTopologyVersion resTopVer = exchCtx.events().topologyVersion();
if (log.isInfoEnabled()) {
log.info("finishExchangeOnCoordinator [topVer=" + initialVersion() + ", resVer=" + resTopVer + ']');
}
Map<Integer, CacheGroupAffinityMessage> idealAffDiff = null;
// Reserve at least 2 threads for system operations.
int parallelismLvl = U.availableThreadCount(cctx.kernalContext(), GridIoPolicy.SYSTEM_POOL, 2);
if (exchCtx.mergeExchanges()) {
synchronized (mux) {
if (mergedJoinExchMsgs != null) {
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
msgs.put(e.getKey(), e.getValue());
updatePartitionSingleMap(e.getKey(), e.getValue());
}
}
}
assert exchCtx.events().hasServerJoin() || exchCtx.events().hasServerLeft();
exchCtx.events().processEvents(this);
if (exchCtx.events().hasServerLeft())
idealAffDiff = cctx.affinity().onServerLeftWithExchangeMergeProtocol(this);
else
cctx.affinity().onServerJoinWithExchangeMergeProtocol(this, true);
doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), cctx.affinity().cacheGroups().values(), desc -> {
if (desc.config().getCacheMode() == CacheMode.LOCAL)
return null;
CacheGroupContext grp = cctx.cache().cacheGroup(desc.groupId());
GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(desc.groupId(), events().discoveryCache());
top.beforeExchange(this, true, true);
return null;
});
}
span.addLog(() -> "Affinity recalculation (crd)");
timeBag.finishGlobalStage("Affinity recalculation (crd)");
Map<Integer, CacheGroupAffinityMessage> joinedNodeAff = new ConcurrentHashMap<>(cctx.cache().cacheGroups().size());
doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), msgs.values(), msg -> {
processSingleMessageOnCrdFinish(msg, joinedNodeAff);
return null;
});
timeBag.finishGlobalStage("Collect update counters and create affinity messages");
if (firstDiscoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
assert firstDiscoEvt instanceof DiscoveryCustomEvent;
if (activateCluster() || changedBaseline())
assignPartitionsStates(null);
DiscoveryCustomMessage discoveryCustomMessage = ((DiscoveryCustomEvent) firstDiscoEvt).customMessage();
if (discoveryCustomMessage instanceof DynamicCacheChangeBatch) {
if (exchActions != null) {
Set<String> caches = exchActions.cachesToResetLostPartitions();
if (!F.isEmpty(caches))
resetLostPartitions(caches);
Set<Integer> cacheGroupsToResetOwners = concat(exchActions.cacheGroupsToStart().stream().map(grp -> grp.descriptor().groupId()), exchActions.cachesToResetLostPartitions().stream().map(CU::cacheId)).collect(Collectors.toSet());
assignPartitionsStates(cacheGroupsToResetOwners);
}
} else if (discoveryCustomMessage instanceof SnapshotDiscoveryMessage && ((SnapshotDiscoveryMessage) discoveryCustomMessage).needAssignPartitions()) {
markAffinityReassign();
assignPartitionsStates(null);
}
} else if (exchCtx.events().hasServerJoin())
assignPartitionsStates(null);
else if (exchCtx.events().hasServerLeft())
assignPartitionsStates(emptySet());
// Validation should happen after resetting owners to avoid false desync reporting.
validatePartitionsState();
// Recalculate new affinity based on partitions availability.
if (!exchCtx.mergeExchanges() && forceAffReassignment) {
idealAffDiff = cctx.affinity().onCustomEventWithEnforcedAffinityReassignment(this);
timeBag.finishGlobalStage("Ideal affinity diff calculation (enforced)");
}
for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
if (!grpCtx.isLocal())
grpCtx.topology().applyUpdateCounters();
}
timeBag.finishGlobalStage("Apply update counters");
updateLastVersion(cctx.versions().last());
cctx.versions().onExchange(lastVer.get().order());
IgniteProductVersion minVer = exchCtx.events().discoveryCache().minimumNodeVersion();
GridDhtPartitionsFullMessage msg = createPartitionsMessage(true, minVer.compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE) >= 0);
if (!cctx.affinity().rebalanceRequired() && !deactivateCluster())
msg.rebalanced(true);
if (exchCtx.mergeExchanges()) {
assert !centralizedAff;
msg.resultTopologyVersion(resTopVer);
if (exchCtx.events().hasServerLeft())
msg.idealAffinityDiff(idealAffDiff);
} else if (forceAffReassignment)
msg.idealAffinityDiff(idealAffDiff);
msg.prepareMarshal(cctx);
timeBag.finishGlobalStage("Full message preparing");
synchronized (mux) {
finishState = new FinishState(crd.id(), resTopVer, msg);
state = ExchangeLocalState.DONE;
}
if (centralizedAff) {
assert !exchCtx.mergeExchanges();
IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut = cctx.affinity().initAffinityOnNodeLeft(this);
if (!fut.isDone())
fut.listen(this::onAffinityInitialized);
else
onAffinityInitialized(fut);
} else {
Set<ClusterNode> nodes;
Map<UUID, GridDhtPartitionsSingleMessage> mergedJoinExchMsgs0;
synchronized (mux) {
srvNodes.remove(cctx.localNode());
nodes = new LinkedHashSet<>(srvNodes);
mergedJoinExchMsgs0 = mergedJoinExchMsgs;
if (mergedJoinExchMsgs != null) {
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
if (e.getValue() != null) {
ClusterNode node = cctx.discovery().node(e.getKey());
if (node != null)
nodes.add(node);
}
}
} else
mergedJoinExchMsgs0 = Collections.emptyMap();
if (!F.isEmpty(sndResNodes))
nodes.addAll(sndResNodes);
}
if (msg.rebalanced())
markRebalanced();
if (!nodes.isEmpty())
sendAllPartitions(msg, nodes, mergedJoinExchMsgs0, joinedNodeAff);
timeBag.finishGlobalStage("Full message sending");
discoveryLag = calculateDiscoveryLag(msgs, mergedJoinExchMsgs0);
if (!stateChangeExchange())
onDone(exchCtx.events().topologyVersion(), null);
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : pendingSingleMsgs.entrySet()) {
if (log.isInfoEnabled()) {
log.info("Process pending message on coordinator [node=" + e.getKey() + ", ver=" + initialVersion() + ", resVer=" + resTopVer + ']');
}
processSingleMessage(e.getKey(), e.getValue());
}
}
if (stateChangeExchange()) {
StateChangeRequest req = exchActions.stateChangeRequest();
assert req != null : exchActions;
boolean stateChangeErr = false;
if (!F.isEmpty(exchangeGlobalExceptions)) {
stateChangeErr = true;
cctx.kernalContext().state().onStateChangeError(exchangeGlobalExceptions, req);
} else {
boolean hasMoving = !partsToReload.isEmpty();
Set<Integer> waitGrps = cctx.affinity().waitGroups();
if (!hasMoving) {
for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
if (waitGrps.contains(grpCtx.groupId()) && grpCtx.topology().hasMovingPartitions()) {
hasMoving = true;
break;
}
}
}
cctx.kernalContext().state().onExchangeFinishedOnCoordinator(this, hasMoving);
}
if (!cctx.kernalContext().state().clusterState().localBaselineAutoAdjustment()) {
ClusterState state = stateChangeErr ? ClusterState.INACTIVE : req.state();
ChangeGlobalStateFinishMessage stateFinishMsg = new ChangeGlobalStateFinishMessage(req.requestId(), state, !stateChangeErr);
cctx.discovery().sendCustomEvent(stateFinishMsg);
}
timeBag.finishGlobalStage("State finish message sending");
if (!centralizedAff)
onDone(exchCtx.events().topologyVersion(), null);
}
// Try switch late affinity right now if an exchange has been completed normally.
if (!centralizedAff && isDone() && error() == null && !cctx.kernalContext().isStopping())
cctx.exchange().checkRebalanceState();
} catch (IgniteCheckedException e) {
if (reconnectOnError(e))
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
else
onDone(e);
} finally {
leaveBusy();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method detectLostPartitions.
/**
* Detect lost partitions in case of node left or failed. For topology coordinator is called when all {@link
* GridDhtPartitionsSingleMessage} were received. For other nodes is called when exchange future is completed by
* {@link GridDhtPartitionsFullMessage}.
*
* @param resTopVer Result topology version.
*/
private void detectLostPartitions(AffinityTopologyVersion resTopVer) {
try {
// Reserve at least 2 threads for system operations.
doInParallelUninterruptibly(U.availableThreadCount(cctx.kernalContext(), GridIoPolicy.SYSTEM_POOL, 2), cctx.kernalContext().pools().getSystemExecutorService(), cctx.affinity().cacheGroups().values(), desc -> {
if (desc.config().getCacheMode() == CacheMode.LOCAL)
return null;
CacheGroupContext grp = cctx.cache().cacheGroup(desc.groupId());
GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(desc.groupId(), events().discoveryCache());
top.detectLostPartitions(resTopVer, this);
return null;
});
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
timeBag.finishGlobalStage("Detect lost partitions");
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridCacheQueryAdapter method nodes.
/**
* @return Nodes to execute on.
*/
private Collection<ClusterNode> nodes() throws IgniteCheckedException {
CacheMode cacheMode = cctx.config().getCacheMode();
Integer part = partition();
switch(cacheMode) {
case LOCAL:
if (prj != null)
U.warn(log, "Ignoring query projection because it's executed over LOCAL cache " + "(only local node will be queried): " + this);
if (type == SCAN && cctx.config().getCacheMode() == LOCAL && part != null && part >= cctx.affinity().partitions())
throw new IgniteCheckedException("Invalid partition number: " + part);
return Collections.singletonList(cctx.localNode());
case REPLICATED:
if (prj != null || part != null)
return nodes(cctx, prj, part);
GridDhtPartitionTopology topology = cctx.topology();
if (cctx.affinityNode() && !topology.localPartitionMap().hasMovingPartitions())
return Collections.singletonList(cctx.localNode());
topology.readLock();
try {
Collection<ClusterNode> affNodes = nodes(cctx, null, null);
List<ClusterNode> nodes = new ArrayList<>(affNodes);
Collections.shuffle(nodes);
for (ClusterNode node : nodes) {
if (!topology.partitions(node.id()).hasMovingPartitions())
return Collections.singletonList(node);
}
return affNodes;
} finally {
topology.readUnlock();
}
case PARTITIONED:
return nodes(cctx, prj, part);
default:
throw new IllegalStateException("Unknown cache distribution mode: " + cacheMode);
}
}
Aggregations