use of org.apache.ignite.internal.processors.cache.StateChangeRequest in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method finishExchangeOnCoordinator.
/**
* @param sndResNodes Additional nodes to send finish message to.
*/
private void finishExchangeOnCoordinator(@Nullable Collection<ClusterNode> sndResNodes) {
if (isDone() || !enterBusy())
return;
try {
if (!F.isEmpty(exchangeGlobalExceptions) && dynamicCacheStartExchange() && isRollbackSupported()) {
sendExchangeFailureMessage();
return;
}
AffinityTopologyVersion resTopVer = exchCtx.events().topologyVersion();
if (log.isInfoEnabled()) {
log.info("finishExchangeOnCoordinator [topVer=" + initialVersion() + ", resVer=" + resTopVer + ']');
}
Map<Integer, CacheGroupAffinityMessage> idealAffDiff = null;
// Reserve at least 2 threads for system operations.
int parallelismLvl = U.availableThreadCount(cctx.kernalContext(), GridIoPolicy.SYSTEM_POOL, 2);
if (exchCtx.mergeExchanges()) {
synchronized (mux) {
if (mergedJoinExchMsgs != null) {
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
msgs.put(e.getKey(), e.getValue());
updatePartitionSingleMap(e.getKey(), e.getValue());
}
}
}
assert exchCtx.events().hasServerJoin() || exchCtx.events().hasServerLeft();
exchCtx.events().processEvents(this);
if (exchCtx.events().hasServerLeft())
idealAffDiff = cctx.affinity().onServerLeftWithExchangeMergeProtocol(this);
else
cctx.affinity().onServerJoinWithExchangeMergeProtocol(this, true);
doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), cctx.affinity().cacheGroups().values(), desc -> {
if (desc.config().getCacheMode() == CacheMode.LOCAL)
return null;
CacheGroupContext grp = cctx.cache().cacheGroup(desc.groupId());
GridDhtPartitionTopology top = grp != null ? grp.topology() : cctx.exchange().clientTopology(desc.groupId(), events().discoveryCache());
top.beforeExchange(this, true, true);
return null;
});
}
span.addLog(() -> "Affinity recalculation (crd)");
timeBag.finishGlobalStage("Affinity recalculation (crd)");
Map<Integer, CacheGroupAffinityMessage> joinedNodeAff = new ConcurrentHashMap<>(cctx.cache().cacheGroups().size());
doInParallel(parallelismLvl, cctx.kernalContext().pools().getSystemExecutorService(), msgs.values(), msg -> {
processSingleMessageOnCrdFinish(msg, joinedNodeAff);
return null;
});
timeBag.finishGlobalStage("Collect update counters and create affinity messages");
if (firstDiscoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
assert firstDiscoEvt instanceof DiscoveryCustomEvent;
if (activateCluster() || changedBaseline())
assignPartitionsStates(null);
DiscoveryCustomMessage discoveryCustomMessage = ((DiscoveryCustomEvent) firstDiscoEvt).customMessage();
if (discoveryCustomMessage instanceof DynamicCacheChangeBatch) {
if (exchActions != null) {
Set<String> caches = exchActions.cachesToResetLostPartitions();
if (!F.isEmpty(caches))
resetLostPartitions(caches);
Set<Integer> cacheGroupsToResetOwners = concat(exchActions.cacheGroupsToStart().stream().map(grp -> grp.descriptor().groupId()), exchActions.cachesToResetLostPartitions().stream().map(CU::cacheId)).collect(Collectors.toSet());
assignPartitionsStates(cacheGroupsToResetOwners);
}
} else if (discoveryCustomMessage instanceof SnapshotDiscoveryMessage && ((SnapshotDiscoveryMessage) discoveryCustomMessage).needAssignPartitions()) {
markAffinityReassign();
assignPartitionsStates(null);
}
} else if (exchCtx.events().hasServerJoin())
assignPartitionsStates(null);
else if (exchCtx.events().hasServerLeft())
assignPartitionsStates(emptySet());
// Validation should happen after resetting owners to avoid false desync reporting.
validatePartitionsState();
// Recalculate new affinity based on partitions availability.
if (!exchCtx.mergeExchanges() && forceAffReassignment) {
idealAffDiff = cctx.affinity().onCustomEventWithEnforcedAffinityReassignment(this);
timeBag.finishGlobalStage("Ideal affinity diff calculation (enforced)");
}
for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
if (!grpCtx.isLocal())
grpCtx.topology().applyUpdateCounters();
}
timeBag.finishGlobalStage("Apply update counters");
updateLastVersion(cctx.versions().last());
cctx.versions().onExchange(lastVer.get().order());
IgniteProductVersion minVer = exchCtx.events().discoveryCache().minimumNodeVersion();
GridDhtPartitionsFullMessage msg = createPartitionsMessage(true, minVer.compareToIgnoreTimestamp(PARTIAL_COUNTERS_MAP_SINCE) >= 0);
if (!cctx.affinity().rebalanceRequired() && !deactivateCluster())
msg.rebalanced(true);
if (exchCtx.mergeExchanges()) {
assert !centralizedAff;
msg.resultTopologyVersion(resTopVer);
if (exchCtx.events().hasServerLeft())
msg.idealAffinityDiff(idealAffDiff);
} else if (forceAffReassignment)
msg.idealAffinityDiff(idealAffDiff);
msg.prepareMarshal(cctx);
timeBag.finishGlobalStage("Full message preparing");
synchronized (mux) {
finishState = new FinishState(crd.id(), resTopVer, msg);
state = ExchangeLocalState.DONE;
}
if (centralizedAff) {
assert !exchCtx.mergeExchanges();
IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut = cctx.affinity().initAffinityOnNodeLeft(this);
if (!fut.isDone())
fut.listen(this::onAffinityInitialized);
else
onAffinityInitialized(fut);
} else {
Set<ClusterNode> nodes;
Map<UUID, GridDhtPartitionsSingleMessage> mergedJoinExchMsgs0;
synchronized (mux) {
srvNodes.remove(cctx.localNode());
nodes = new LinkedHashSet<>(srvNodes);
mergedJoinExchMsgs0 = mergedJoinExchMsgs;
if (mergedJoinExchMsgs != null) {
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : mergedJoinExchMsgs.entrySet()) {
if (e.getValue() != null) {
ClusterNode node = cctx.discovery().node(e.getKey());
if (node != null)
nodes.add(node);
}
}
} else
mergedJoinExchMsgs0 = Collections.emptyMap();
if (!F.isEmpty(sndResNodes))
nodes.addAll(sndResNodes);
}
if (msg.rebalanced())
markRebalanced();
if (!nodes.isEmpty())
sendAllPartitions(msg, nodes, mergedJoinExchMsgs0, joinedNodeAff);
timeBag.finishGlobalStage("Full message sending");
discoveryLag = calculateDiscoveryLag(msgs, mergedJoinExchMsgs0);
if (!stateChangeExchange())
onDone(exchCtx.events().topologyVersion(), null);
for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> e : pendingSingleMsgs.entrySet()) {
if (log.isInfoEnabled()) {
log.info("Process pending message on coordinator [node=" + e.getKey() + ", ver=" + initialVersion() + ", resVer=" + resTopVer + ']');
}
processSingleMessage(e.getKey(), e.getValue());
}
}
if (stateChangeExchange()) {
StateChangeRequest req = exchActions.stateChangeRequest();
assert req != null : exchActions;
boolean stateChangeErr = false;
if (!F.isEmpty(exchangeGlobalExceptions)) {
stateChangeErr = true;
cctx.kernalContext().state().onStateChangeError(exchangeGlobalExceptions, req);
} else {
boolean hasMoving = !partsToReload.isEmpty();
Set<Integer> waitGrps = cctx.affinity().waitGroups();
if (!hasMoving) {
for (CacheGroupContext grpCtx : cctx.cache().cacheGroups()) {
if (waitGrps.contains(grpCtx.groupId()) && grpCtx.topology().hasMovingPartitions()) {
hasMoving = true;
break;
}
}
}
cctx.kernalContext().state().onExchangeFinishedOnCoordinator(this, hasMoving);
}
if (!cctx.kernalContext().state().clusterState().localBaselineAutoAdjustment()) {
ClusterState state = stateChangeErr ? ClusterState.INACTIVE : req.state();
ChangeGlobalStateFinishMessage stateFinishMsg = new ChangeGlobalStateFinishMessage(req.requestId(), state, !stateChangeErr);
cctx.discovery().sendCustomEvent(stateFinishMsg);
}
timeBag.finishGlobalStage("State finish message sending");
if (!centralizedAff)
onDone(exchCtx.events().topologyVersion(), null);
}
// Try switch late affinity right now if an exchange has been completed normally.
if (!centralizedAff && isDone() && error() == null && !cctx.kernalContext().isStopping())
cctx.exchange().checkRebalanceState();
} catch (IgniteCheckedException e) {
if (reconnectOnError(e))
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
else
onDone(e);
} finally {
leaveBusy();
}
}
use of org.apache.ignite.internal.processors.cache.StateChangeRequest in project ignite by apache.
the class GridClusterStateProcessor method autoAdjustExchangeActions.
/**
* Add fake state change request into exchange actions if cluster is not persistent and baseline autoadjustment
* is enabled with zero timeout.
*
* @param exchActs Current exchange actions.
* @return New exchange actions.
*/
public ExchangeActions autoAdjustExchangeActions(ExchangeActions exchActs) {
DiscoveryDataClusterState clusterState = globalState;
if (clusterState.localBaselineAutoAdjustment()) {
BaselineTopology blt = clusterState.baselineTopology();
ChangeGlobalStateMessage msg = new ChangeGlobalStateMessage(UUID.randomUUID(), ctx.localNodeId(), null, clusterState.state().active() ? clusterState.state() : ACTIVE, true, blt, true, System.currentTimeMillis());
StateChangeRequest stateChangeReq = new StateChangeRequest(msg, BaselineTopologyHistoryItem.fromBaseline(blt), msg.state(), null);
if (exchActs == null)
exchActs = new ExchangeActions();
exchActs.stateChangeRequest(stateChangeReq);
}
return exchActs;
}
use of org.apache.ignite.internal.processors.cache.StateChangeRequest in project ignite by apache.
the class GridClusterStateProcessor method onStateChangeMessage.
/**
* {@inheritDoc}
*/
@Override
public boolean onStateChangeMessage(AffinityTopologyVersion topVer, ChangeGlobalStateMessage msg, DiscoCache discoCache) {
DiscoveryDataClusterState state = globalState;
if (log.isInfoEnabled()) {
String baseline = msg.baselineTopology() == null ? ": null" : "[id=" + msg.baselineTopology().id() + ']';
U.log(log, "Received " + prettyStr(msg.state()) + " request with BaselineTopology" + baseline + " initiator node ID: " + msg.initiatorNodeId());
}
if (msg.baselineTopology() != null)
compatibilityMode = false;
if (state.transition()) {
if (isApplicable(msg, state)) {
GridChangeGlobalStateFuture fut = changeStateFuture(msg);
if (fut != null)
fut.onDone(concurrentStateChangeError(msg.state(), state.state()));
} else {
final GridChangeGlobalStateFuture stateFut = changeStateFuture(msg);
GridFutureAdapter<Void> transitionFut = transitionFuts.get(state.transitionRequestId());
if (stateFut != null && transitionFut != null) {
transitionFut.listen(new IgniteInClosure<IgniteInternalFuture<Void>>() {
@Override
public void apply(IgniteInternalFuture<Void> fut) {
try {
fut.get();
stateFut.onDone();
} catch (Exception ex) {
stateFut.onDone(ex);
}
}
});
}
}
} else if (isApplicable(msg, state)) {
if (msg.state() == INACTIVE && !msg.forceDeactivation() && allNodesSupports(ctx.discovery().serverNodes(topVer), SAFE_CLUSTER_DEACTIVATION)) {
List<String> inMemCaches = listInMemoryUserCaches();
if (!inMemCaches.isEmpty()) {
GridChangeGlobalStateFuture stateFut = changeStateFuture(msg);
if (stateFut != null) {
stateFut.onDone(new IgniteException(DATA_LOST_ON_DEACTIVATION_WARNING + " In memory caches: " + inMemCaches + " .To deactivate cluster pass '--force' flag."));
}
return false;
}
}
ExchangeActions exchangeActions;
try {
exchangeActions = ctx.cache().onStateChangeRequest(msg, topVer, state);
} catch (IgniteCheckedException e) {
GridChangeGlobalStateFuture fut = changeStateFuture(msg);
if (fut != null)
fut.onDone(e);
return false;
}
Set<UUID> nodeIds = U.newHashSet(discoCache.allNodes().size());
for (ClusterNode node : discoCache.allNodes()) nodeIds.add(node.id());
GridChangeGlobalStateFuture fut = changeStateFuture(msg);
if (fut != null)
fut.setRemaining(nodeIds, topVer.nextMinorVersion());
if (log.isInfoEnabled())
log.info("Started state transition: " + prettyStr(msg.state()));
BaselineTopologyHistoryItem bltHistItem = BaselineTopologyHistoryItem.fromBaseline(state.baselineTopology());
transitionFuts.put(msg.requestId(), new GridFutureAdapter<Void>());
DiscoveryDataClusterState newState = globalState = DiscoveryDataClusterState.createTransitionState(msg.state(), state, activate(state.state(), msg.state()) || msg.forceChangeBaselineTopology() ? msg.baselineTopology() : state.baselineTopology(), msg.requestId(), topVer, nodeIds);
ctx.durableBackgroundTask().onStateChangeStarted(msg);
if (msg.forceChangeBaselineTopology())
newState.setTransitionResult(msg.requestId(), msg.state());
AffinityTopologyVersion stateChangeTopVer = topVer.nextMinorVersion();
StateChangeRequest req = new StateChangeRequest(msg, bltHistItem, state.state(), stateChangeTopVer);
exchangeActions.stateChangeRequest(req);
msg.exchangeActions(exchangeActions);
if (newState.state() != state.state()) {
if (ctx.event().isRecordable(EventType.EVT_CLUSTER_STATE_CHANGE_STARTED)) {
ctx.pools().getStripedExecutorService().execute(() -> ctx.event().record(new ClusterStateChangeStartedEvent(state.state(), newState.state(), ctx.discovery().localNode(), "Cluster state change started.")));
}
}
return true;
} else {
// State already changed.
GridChangeGlobalStateFuture stateFut = changeStateFuture(msg);
if (stateFut != null)
stateFut.onDone();
}
return false;
}
use of org.apache.ignite.internal.processors.cache.StateChangeRequest in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method onClusterStateChangeRequest.
/**
* @param crd Coordinator flag.
* @return Exchange type.
*/
private ExchangeType onClusterStateChangeRequest(boolean crd) {
assert exchActions != null && !exchActions.empty() : this;
StateChangeRequest req = exchActions.stateChangeRequest();
assert req != null : exchActions;
GridKernalContext kctx = cctx.kernalContext();
DiscoveryDataClusterState state = kctx.state().clusterState();
if (state.transitionError() != null)
exchangeLocE = state.transitionError();
if (req.activeChanged()) {
if (req.state().active()) {
if (log.isInfoEnabled()) {
log.info("Start activation process [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + "]. New state: " + req.state());
}
try {
cctx.exchange().exchangerBlockingSectionBegin();
try {
cctx.activate();
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
assert registerCachesFuture == null : "No caches registration should be scheduled before new caches have started.";
cctx.exchange().exchangerBlockingSectionBegin();
try {
registerCachesFuture = cctx.affinity().onCacheChangeRequest(this, crd, exchActions);
if (!kctx.clientNode())
cctx.cache().shutdownNotFinishedRecoveryCaches();
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
if (log.isInfoEnabled()) {
log.info("Successfully activated caches [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + ", newState=" + req.state() + "]");
}
} catch (Exception e) {
U.error(log, "Failed to activate node components [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + ", newState=" + req.state() + "]", e);
exchangeLocE = e;
if (crd) {
cctx.exchange().exchangerBlockingSectionBegin();
try {
synchronized (mux) {
exchangeGlobalExceptions.put(cctx.localNodeId(), e);
}
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
}
}
} else {
if (log.isInfoEnabled()) {
log.info("Start deactivation process [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + "]");
}
cctx.exchange().exchangerBlockingSectionBegin();
try {
kctx.dataStructures().onDeActivate(kctx);
assert registerCachesFuture == null : "No caches registration should be scheduled before new caches have started.";
registerCachesFuture = cctx.affinity().onCacheChangeRequest(this, crd, exchActions);
kctx.encryption().onDeActivate(kctx);
((IgniteChangeGlobalStateSupport) kctx.distributedMetastorage()).onDeActivate(kctx);
if (log.isInfoEnabled()) {
log.info("Successfully deactivated data structures, services and caches [" + "nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + "]");
}
} catch (Exception e) {
U.error(log, "Failed to deactivate node components [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + "]", e);
exchangeLocE = e;
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
}
} else if (req.state().active()) {
cctx.exchange().exchangerBlockingSectionBegin();
// TODO: BLT changes on inactive cluster can't be handled easily because persistent storage hasn't been initialized yet.
try {
if (!forceAffReassignment) {
// possible only if cluster contains nodes without forceAffReassignment mode
assert firstEventCache().minimumNodeVersion().compareToIgnoreTimestamp(FORCE_AFF_REASSIGNMENT_SINCE) < 0 : firstEventCache().minimumNodeVersion();
cctx.affinity().onBaselineTopologyChanged(this, crd);
}
if (CU.isPersistenceEnabled(kctx.config()) && !kctx.clientNode())
kctx.state().onBaselineTopologyChanged(req.baselineTopology(), req.prevBaselineTopologyHistoryItem());
} catch (Exception e) {
U.error(log, "Failed to change baseline topology [nodeId=" + cctx.localNodeId() + ", client=" + kctx.clientNode() + ", topVer=" + initialVersion() + "]", e);
exchangeLocE = e;
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
}
return kctx.clientNode() ? ExchangeType.CLIENT : ExchangeType.ALL;
}
Aggregations