use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class CacheAffinitySharedManager method initAffinityBasedOnPartitionsAvailability.
/**
* Initializes current affinity assignment based on partitions availability. Nodes that have most recent data will
* be considered affinity nodes.
*
* @param topVer Topology version.
* @param fut Exchange future.
* @param c Closure converting affinity diff.
* @param initAff {@code True} if need initialize affinity.
* @return Affinity assignment for each of registered cache group.
*/
private <T> Map<Integer, Map<Integer, List<T>>> initAffinityBasedOnPartitionsAvailability(final AffinityTopologyVersion topVer, final GridDhtPartitionsExchangeFuture fut, final IgniteClosure<ClusterNode, T> c, final boolean initAff) {
final boolean enforcedCentralizedAssignment = DiscoveryCustomEvent.requiresCentralizedAffinityAssignment(fut.firstEvent());
final WaitRebalanceInfo waitRebalanceInfo = enforcedCentralizedAssignment ? new WaitRebalanceInfo(fut.exchangeId().topologyVersion()) : new WaitRebalanceInfo(fut.context().events().lastServerEventVersion());
final Collection<ClusterNode> evtNodes = fut.context().events().discoveryCache().serverNodes();
final Map<Integer, Map<Integer, List<T>>> assignment = new ConcurrentHashMap<>();
forAllRegisteredCacheGroups(new IgniteInClosureX<CacheGroupDescriptor>() {
@Override
public void applyx(CacheGroupDescriptor desc) throws IgniteCheckedException {
CacheGroupHolder grpHolder = getOrCreateGroupHolder(topVer, desc);
if (!grpHolder.rebalanceEnabled || (fut.cacheGroupAddedOnExchange(desc.groupId(), desc.receivedFrom()) && !enforcedCentralizedAssignment))
return;
AffinityTopologyVersion affTopVer = grpHolder.affinity().lastVersion();
assert (affTopVer.topologyVersion() > 0 && !affTopVer.equals(topVer)) || enforcedCentralizedAssignment : "Invalid affinity version [last=" + affTopVer + ", futVer=" + topVer + ", grp=" + desc.cacheOrGroupName() + ']';
List<List<ClusterNode>> curAssignment = grpHolder.affinity().assignments(affTopVer);
List<List<ClusterNode>> newAssignment = grpHolder.affinity().idealAssignmentRaw();
assert newAssignment != null;
List<List<ClusterNode>> newAssignment0 = initAff ? new ArrayList<>(newAssignment) : null;
GridDhtPartitionTopology top = grpHolder.topology(fut.context().events().discoveryCache());
Map<Integer, List<T>> cacheAssignment = null;
for (int p = 0; p < newAssignment.size(); p++) {
List<ClusterNode> newNodes = newAssignment.get(p);
List<ClusterNode> curNodes = curAssignment.get(p);
assert evtNodes.containsAll(newNodes) : "Invalid new assignment [grp=" + grpHolder.aff.cacheOrGroupName() + ", nodes=" + newNodes + ", topVer=" + fut.context().events().discoveryCache().version() + ", evts=" + fut.context().events().events() + "]";
ClusterNode curPrimary = !curNodes.isEmpty() ? curNodes.get(0) : null;
ClusterNode newPrimary = !newNodes.isEmpty() ? newNodes.get(0) : null;
List<ClusterNode> newNodes0 = null;
assert newPrimary == null || evtNodes.contains(newPrimary) : "Invalid new primary [" + "grp=" + desc.cacheOrGroupName() + ", node=" + newPrimary + ", topVer=" + topVer + ']';
List<ClusterNode> owners = top.owners(p, topVer);
// It is essential that curPrimary node has partition in OWNING state.
if (!owners.isEmpty() && !owners.contains(curPrimary))
curPrimary = owners.get(0);
// If new assignment is empty preserve current ownership for alive nodes.
if (curPrimary != null && newPrimary == null) {
newNodes0 = new ArrayList<>(curNodes.size());
for (ClusterNode node : curNodes) {
if (evtNodes.contains(node))
newNodes0.add(node);
}
} else if (curPrimary != null && !curPrimary.equals(newPrimary)) {
GridDhtPartitionState state = top.partitionState(newPrimary.id(), p);
if (evtNodes.contains(curPrimary)) {
if (state != OWNING) {
newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, curPrimary, newNodes, waitRebalanceInfo);
}
} else {
if (state != OWNING) {
for (int i = 1; i < curNodes.size(); i++) {
ClusterNode curNode = curNodes.get(i);
if (top.partitionState(curNode.id(), p) == OWNING && evtNodes.contains(curNode)) {
newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, curNode, newNodes, waitRebalanceInfo);
break;
}
}
if (newNodes0 == null) {
for (ClusterNode owner : owners) {
if (evtNodes.contains(owner)) {
newNodes0 = latePrimaryAssignment(grpHolder.affinity(), p, owner, newNodes, waitRebalanceInfo);
break;
}
}
}
}
}
}
// This will happen if no primary has changed but some backups still need to be rebalanced.
if (!owners.isEmpty() && !owners.containsAll(newNodes) && !top.lostPartitions().contains(p))
waitRebalanceInfo.add(grpHolder.groupId(), p, newNodes);
if (newNodes0 != null) {
assert evtNodes.containsAll(newNodes0) : "Invalid late assignment [grp=" + grpHolder.aff.cacheOrGroupName() + ", nodes=" + newNodes + ", topVer=" + fut.context().events().discoveryCache().version() + ", evts=" + fut.context().events().events() + "]";
if (newAssignment0 != null)
newAssignment0.set(p, newNodes0);
if (cacheAssignment == null)
cacheAssignment = new HashMap<>();
List<T> n = new ArrayList<>(newNodes0.size());
for (int i = 0; i < newNodes0.size(); i++) n.add(c.apply(newNodes0.get(i)));
cacheAssignment.put(p, n);
}
}
if (cacheAssignment != null)
assignment.put(grpHolder.groupId(), cacheAssignment);
if (initAff)
grpHolder.affinity().initialize(topVer, newAssignment0);
fut.timeBag().finishLocalStage("Affinity recalculation (partitions availability) " + "[grp=" + desc.cacheOrGroupName() + "]");
}
});
if (log.isDebugEnabled()) {
log.debug("Computed new affinity after node left [topVer=" + topVer + ", waitGrps=" + groupNames(waitRebalanceInfo.waitGrps.keySet()) + ']');
}
synchronized (mux) {
waitInfo = !waitRebalanceInfo.empty() ? waitRebalanceInfo : null;
}
return assignment;
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class CacheAffinitySharedManager method processClientCachesRequests.
/**
* Process non affinity node cache start/close requests, called from exchange thread.
*
* @param msg Change request.
*/
void processClientCachesRequests(ClientCacheChangeDummyDiscoveryMessage msg) {
// Get ready exchange version.
AffinityTopologyVersion topVer = cctx.exchange().readyAffinityVersion();
DiscoCache discoCache = cctx.discovery().discoCache(topVer);
ClusterNode node = discoCache.oldestAliveServerNode();
// Resolve coordinator for specific version.
boolean crd = node != null && node.isLocal();
Map<Integer, Boolean> startedCaches = null;
Set<Integer> closedCaches = null;
// Check and start caches via dummy message.
if (msg.startRequests() != null)
startedCaches = processClientCacheStartRequests(crd, msg, topVer, discoCache);
// Check and close caches via dummy message.
if (msg.cachesToClose() != null)
closedCaches = processCacheCloseRequests(msg, topVer);
// Shedule change message.
if (startedCaches != null || closedCaches != null)
scheduleClientChangeMessage(startedCaches, closedCaches);
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class CacheMetricsImpl method getEntriesStat.
/**
* Calculates entries count/partitions count metrics using one iteration over local partitions for all metrics
*/
public EntriesStatMetrics getEntriesStat() {
int owningPartCnt = 0;
int movingPartCnt = 0;
long offHeapEntriesCnt = 0L;
long offHeapPrimaryEntriesCnt = 0L;
long offHeapBackupEntriesCnt = 0L;
long heapEntriesCnt = 0L;
int size = 0;
long sizeLong = 0L;
boolean isEmpty;
try {
AffinityTopologyVersion topVer = cctx.affinity().affinityTopologyVersion();
if (AffinityTopologyVersion.NONE.equals(topVer))
return unknownEntriesStat();
final GridCacheAdapter<?, ?> cache = cctx.cache();
if (cache != null) {
offHeapEntriesCnt = cache.offHeapEntriesCount();
size = cache.localSize(null);
sizeLong = cache.localSizeLong(null);
}
if (cctx.isLocal()) {
if (cache != null) {
offHeapPrimaryEntriesCnt = offHeapEntriesCnt;
heapEntriesCnt = cache.sizeLong();
}
} else {
IntSet primaries = ImmutableIntSet.wrap(cctx.affinity().primaryPartitions(cctx.localNodeId(), topVer));
IntSet backups = ImmutableIntSet.wrap(cctx.affinity().backupPartitions(cctx.localNodeId(), topVer));
if (cctx.isNear() && cache != null)
heapEntriesCnt = cache.nearSize();
for (GridDhtLocalPartition part : cctx.topology().currentLocalPartitions()) {
// Partitions count.
GridDhtPartitionState partState = part.state();
if (partState == GridDhtPartitionState.OWNING)
owningPartCnt++;
if (partState == GridDhtPartitionState.MOVING)
movingPartCnt++;
// Offheap entries count
if (cache == null)
continue;
long cacheSize = part.dataStore().cacheSize(cctx.cacheId());
if (primaries.contains(part.id()))
offHeapPrimaryEntriesCnt += cacheSize;
else if (backups.contains(part.id()))
offHeapBackupEntriesCnt += cacheSize;
heapEntriesCnt += part.publicSize(cctx.cacheId());
}
}
} catch (Exception e) {
return unknownEntriesStat();
}
isEmpty = (offHeapEntriesCnt == 0);
EntriesStatMetrics stat = new EntriesStatMetrics();
stat.offHeapEntriesCount(offHeapEntriesCnt);
stat.offHeapPrimaryEntriesCount(offHeapPrimaryEntriesCnt);
stat.offHeapBackupEntriesCount(offHeapBackupEntriesCnt);
stat.heapEntriesCount(heapEntriesCnt);
stat.size(size);
stat.cacheSize(sizeLong);
stat.keySize(size);
stat.isEmpty(isEmpty);
stat.totalPartitionsCount(owningPartCnt + movingPartCnt);
stat.rebalancingPartitionsCount(movingPartCnt);
return stat;
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method processFullMessage.
/**
* @param checkCrd If {@code true} checks that local node is exchange coordinator.
* @param node Sender node.
* @param msg Message.
*/
private void processFullMessage(boolean checkCrd, ClusterNode node, GridDhtPartitionsFullMessage msg) {
try {
assert exchId.equals(msg.exchangeId()) : msg;
assert msg.lastVersion() != null : msg;
timeBag.finishGlobalStage("Waiting for Full message");
if (checkCrd) {
assert node != null;
synchronized (mux) {
if (crd == null) {
if (log.isInfoEnabled())
log.info("Ignore full message, all server nodes left: " + msg);
return;
}
switch(state) {
case CRD:
case BECOME_CRD:
{
if (log.isInfoEnabled())
log.info("Ignore full message, node is coordinator: " + msg);
return;
}
case DONE:
{
if (log.isInfoEnabled())
log.info("Ignore full message, future is done: " + msg);
return;
}
case SRV:
case CLIENT:
{
if (!crd.equals(node)) {
if (log.isInfoEnabled()) {
log.info("Received full message from non-coordinator [node=" + node.id() + ", nodeOrder=" + node.order() + ", crd=" + crd.id() + ", crdOrder=" + crd.order() + ']');
}
if (node.order() > crd.order())
fullMsgs.put(node, msg);
return;
} else {
if (!F.isEmpty(msg.getErrorsMap())) {
Exception e = msg.getErrorsMap().get(cctx.localNodeId());
if (e instanceof IgniteNeedReconnectException) {
onDone(e);
return;
}
}
AffinityTopologyVersion resVer = msg.resultTopologyVersion() != null ? msg.resultTopologyVersion() : initialVersion();
if (log.isInfoEnabled()) {
log.info("Received full message, will finish exchange [node=" + node.id() + ", resVer=" + resVer + ']');
}
finishState = new FinishState(crd.id(), resVer, msg);
state = ExchangeLocalState.DONE;
break;
}
}
}
}
} else
assert node == null : node;
AffinityTopologyVersion resTopVer = initialVersion();
if (exchCtx.mergeExchanges()) {
if (msg.resultTopologyVersion() != null && !initialVersion().equals(msg.resultTopologyVersion())) {
if (log.isInfoEnabled()) {
log.info("Received full message, need merge [curFut=" + initialVersion() + ", resVer=" + msg.resultTopologyVersion() + ']');
}
resTopVer = msg.resultTopologyVersion();
if (cctx.exchange().mergeExchanges(this, msg)) {
assert cctx.kernalContext().isStopping() || cctx.kernalContext().clientDisconnected();
// Node is stopping, no need to further process exchange.
return;
}
assert resTopVer.equals(exchCtx.events().topologyVersion()) : "Unexpected result version [" + "msgVer=" + resTopVer + ", locVer=" + exchCtx.events().topologyVersion() + ']';
}
exchCtx.events().processEvents(this);
if (localJoinExchange()) {
Set<Integer> noAffinityGroups = cctx.affinity().onLocalJoin(this, msg.joinedNodeAffinity(), resTopVer);
// Prevent cache usage by a user.
if (!noAffinityGroups.isEmpty()) {
List<GridCacheAdapter> closedCaches = cctx.cache().blockGateways(noAffinityGroups);
closedCaches.forEach(cache -> log.warning("Affinity for cache " + cache.context().name() + " has not received from coordinator during local join. " + " Probably cache is already stopped but not processed on local node yet." + " Cache proxy will be closed for user interactions for safety."));
}
} else {
if (exchCtx.events().hasServerLeft())
cctx.affinity().applyAffinityFromFullMessage(this, msg.idealAffinityDiff());
else
cctx.affinity().onServerJoinWithExchangeMergeProtocol(this, false);
for (CacheGroupContext grp : cctx.cache().cacheGroups()) {
if (grp.isLocal() || cacheGroupStopping(grp.groupId()))
continue;
grp.topology().beforeExchange(this, true, false);
}
}
} else if (localJoinExchange() && !exchCtx.fetchAffinityOnJoin())
cctx.affinity().onLocalJoin(this, msg.joinedNodeAffinity(), resTopVer);
else if (forceAffReassignment)
cctx.affinity().applyAffinityFromFullMessage(this, msg.idealAffinityDiff());
timeBag.finishGlobalStage("Affinity recalculation");
if (dynamicCacheStartExchange() && !F.isEmpty(exchangeGlobalExceptions)) {
assert cctx.localNode().isClient();
// TODO: https://issues.apache.org/jira/browse/IGNITE-8796
// The current exchange has been successfully completed on all server nodes,
// but has failed on that client node for some reason.
// It looks like that we need to rollback dynamically started caches on the client node,
// complete DynamicCacheStartFutures (if they are registered) with the cause of that failure
// and complete current exchange without errors.
onDone(exchangeLocE);
return;
}
updatePartitionFullMap(resTopVer, msg);
if (msg.rebalanced())
markRebalanced();
if (stateChangeExchange() && !F.isEmpty(msg.getErrorsMap()))
cctx.kernalContext().state().onStateChangeError(msg.getErrorsMap(), exchActions.stateChangeRequest());
if (firstDiscoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
DiscoveryCustomMessage discoveryCustomMessage = ((DiscoveryCustomEvent) firstDiscoEvt).customMessage();
if (discoveryCustomMessage instanceof SnapshotDiscoveryMessage && ((SnapshotDiscoveryMessage) discoveryCustomMessage).needAssignPartitions())
markAffinityReassign();
}
onDone(resTopVer, null);
} catch (IgniteCheckedException e) {
onDone(e);
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method waitAndReplyToNode.
/**
* @param nodeId Node ID.
* @param msg Client's message.
*/
public void waitAndReplyToNode(final UUID nodeId, final GridDhtPartitionsSingleMessage msg) {
if (log.isDebugEnabled())
log.debug("Single message will be handled on completion of exchange future: " + this);
listen(failureHandlerWrapper(new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@Override
public void apply(IgniteInternalFuture<AffinityTopologyVersion> fut) {
if (cctx.kernalContext().isStopping())
return;
// Thus, there is no need to create and send GridDhtPartitionsFullMessage.
if (cacheChangeFailureMsgSent)
return;
FinishState finishState0;
synchronized (mux) {
finishState0 = finishState;
}
if (finishState0 == null) {
assert (firstDiscoEvt.type() == EVT_NODE_JOINED && firstDiscoEvt.eventNode().isClient()) : GridDhtPartitionsExchangeFuture.this;
ClusterNode node = cctx.node(nodeId);
if (node == null) {
if (log.isDebugEnabled()) {
log.debug("No node found for nodeId: " + nodeId + ", handling of single message will be stopped: " + msg);
}
return;
}
GridDhtPartitionsFullMessage msg = createPartitionsMessage(true, node.version().compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE) >= 0);
msg.rebalanced(rebalanced());
finishState0 = new FinishState(cctx.localNodeId(), initialVersion(), msg);
}
sendAllPartitionsToNode(finishState0, msg, nodeId);
}
}));
}
Aggregations