use of org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState in project ignite by apache.
the class GridDiscoveryManager method topologySnapshotMessage.
/**
* @param clo Wrapper of logger.
* @param topVer Topology version.
* @param discoCache Discovery cache.
* @param evtType Event type.
* @param evtNode Event node.
* @param srvNodesNum Server nodes number.
* @param clientNodesNum Client nodes number.
* @param totalCpus Total cpu number.
* @param heap Heap size.
* @param offheap Offheap size.
* @param needNodesDetails Flag for additional alive nodes logging.
*/
private void topologySnapshotMessage(IgniteClosure<String, Void> clo, long topVer, DiscoCache discoCache, int evtType, ClusterNode evtNode, int srvNodesNum, int clientNodesNum, int totalCpus, double heap, double offheap, boolean needNodesDetails) {
DiscoveryDataClusterState state = discoCache.state();
SB summary = new SB(PREFIX);
summary.a(" [");
summary.a(discoOrdered ? "ver=" + topVer + ", " : "");
summary.a("locNode=").a(U.id8(discoCache.localNode().id()));
summary.a(", servers=").a(srvNodesNum);
summary.a(", clients=").a(clientNodesNum);
summary.a(", state=").a(state.active() ? "ACTIVE" : "INACTIVE");
summary.a(", CPUs=").a(totalCpus);
summary.a(", offheap=").a(offheap).a("GB");
summary.a(", heap=").a(heap).a("GB");
if ((evtType == EVT_NODE_JOINED || evtType == EVT_NODE_LEFT || evtType == EVT_NODE_FAILED) && needNodesDetails) {
summary.a(", aliveNodes=[");
for (ClusterNode clusterNode : discoCache.allNodes()) {
if (discoCache.alive(clusterNode.id()))
summary.a(nodeDescription(clusterNode)).a(", ");
}
summary.setLength(summary.length() - 2);
summary.a(']');
}
summary.a(']');
clo.apply(summary.toString());
ClusterNode currCrd = discoCache.oldestServerNode();
if ((evtType == EventType.EVT_NODE_FAILED || evtType == EventType.EVT_NODE_LEFT) && currCrd != null && currCrd.order() > evtNode.order() && !evtNode.isClient() && !evtNode.isDaemon())
clo.apply("Coordinator changed [prev=" + evtNode + ", cur=" + currCrd + "]");
BaselineTopology blt = state.baselineTopology();
if (blt != null && discoCache.baselineNodes() != null) {
int bltSize = discoCache.baselineNodes().size();
int bltOnline = discoCache.aliveBaselineNodes().size();
int bltOffline = bltSize - bltOnline;
clo.apply(" ^-- Baseline [id=" + blt.id() + ", size=" + bltSize + ", online=" + bltOnline + ", offline=" + bltOffline + ']');
ClusterState targetState = ctx.config().getClusterStateOnStart();
if (targetState == null)
targetState = ctx.config().isAutoActivationEnabled() ? ACTIVE : INACTIVE;
if (!state.state().active() && targetState.active()) {
String offlineConsistentIds = "";
if (bltOffline > 0 && bltOffline <= 5) {
Collection<BaselineNode> offlineNodes = new HashSet<>(discoCache.baselineNodes());
offlineNodes.removeAll(discoCache.aliveBaselineNodes());
offlineConsistentIds = ' ' + F.nodeConsistentIds(offlineNodes).toString();
}
if (bltOffline == 0) {
if (evtType == EVT_NODE_JOINED && discoCache.baselineNode(evtNode))
clo.apply(" ^-- All baseline nodes are online, will start auto-activation");
} else
clo.apply(" ^-- " + bltOffline + " nodes left for auto-activation" + offlineConsistentIds);
}
}
}
use of org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState in project ignite by apache.
the class GridCachePartitionExchangeManager method onClusterStateChangeFinish.
/**
*/
private void onClusterStateChangeFinish(IgniteInternalFuture<AffinityTopologyVersion> fut, ExchangeActions exchActions, boolean baselineChanging) {
A.notNull(exchActions, "exchActions");
GridEventStorageManager evtMngr = cctx.kernalContext().event();
if (exchActions.activate() && evtMngr.isRecordable(EVT_CLUSTER_ACTIVATED) || exchActions.deactivate() && evtMngr.isRecordable(EVT_CLUSTER_DEACTIVATED) || exchActions.changedClusterState() && evtMngr.isRecordable(EVT_CLUSTER_STATE_CHANGED)) {
List<Event> evts = new ArrayList<>(2);
ClusterNode locNode = cctx.kernalContext().discovery().localNode();
Collection<BaselineNode> bltNodes = cctx.kernalContext().cluster().get().currentBaselineTopology();
boolean collectionUsed = false;
if (exchActions.activate() && evtMngr.isRecordable(EVT_CLUSTER_ACTIVATED)) {
assert !exchActions.deactivate() : exchActions;
collectionUsed = true;
evts.add(new ClusterActivationEvent(locNode, "Cluster activated.", EVT_CLUSTER_ACTIVATED, bltNodes));
}
if (exchActions.deactivate() && evtMngr.isRecordable(EVT_CLUSTER_DEACTIVATED)) {
assert !exchActions.activate() : exchActions;
collectionUsed = true;
evts.add(new ClusterActivationEvent(locNode, "Cluster deactivated.", EVT_CLUSTER_DEACTIVATED, bltNodes));
}
if (exchActions.changedClusterState() && evtMngr.isRecordable(EVT_CLUSTER_STATE_CHANGED)) {
StateChangeRequest req = exchActions.stateChangeRequest();
if (collectionUsed && bltNodes != null)
bltNodes = new ArrayList<>(bltNodes);
evts.add(new ClusterStateChangeEvent(req.prevState(), req.state(), bltNodes, locNode, "Cluster state changed."));
}
A.notEmpty(evts, "events " + exchActions);
cctx.kernalContext().pools().getSystemExecutorService().submit(() -> evts.forEach(e -> cctx.kernalContext().event().record(e)));
}
GridKernalContext ctx = cctx.kernalContext();
DiscoveryDataClusterState state = ctx.state().clusterState();
if (baselineChanging) {
ctx.pools().getStripedExecutorService().execute(new Runnable() {
@Override
public void run() {
if (ctx.event().isRecordable(EventType.EVT_BASELINE_CHANGED)) {
ctx.event().record(new BaselineChangedEvent(ctx.discovery().localNode(), "Baseline changed.", EventType.EVT_BASELINE_CHANGED, ctx.cluster().get().currentBaselineTopology()));
}
}
});
}
}
use of org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState in project ignite by apache.
the class IgniteSnapshotManager method createSnapshot.
/**
* {@inheritDoc}
*/
@Override
public IgniteFuture<Void> createSnapshot(String name) {
A.notNullOrEmpty(name, "Snapshot name cannot be null or empty.");
A.ensure(U.alphanumericUnderscore(name), "Snapshot name must satisfy the following name pattern: a-zA-Z0-9_");
try {
cctx.kernalContext().security().authorize(ADMIN_SNAPSHOT);
if (!IgniteFeatures.allNodesSupports(cctx.discovery().aliveServerNodes(), PERSISTENCE_CACHE_SNAPSHOT))
throw new IgniteException("Not all nodes in the cluster support a snapshot operation.");
if (!CU.isPersistenceEnabled(cctx.gridConfig())) {
throw new IgniteException("Create snapshot request has been rejected. Snapshots on an in-memory " + "clusters are not allowed.");
}
if (!cctx.kernalContext().state().clusterState().state().active())
throw new IgniteException("Snapshot operation has been rejected. The cluster is inactive.");
DiscoveryDataClusterState clusterState = cctx.kernalContext().state().clusterState();
if (!clusterState.hasBaselineTopology())
throw new IgniteException("Snapshot operation has been rejected. The baseline topology is not configured for cluster.");
if (cctx.kernalContext().clientNode()) {
ClusterNode crd = U.oldest(cctx.kernalContext().discovery().aliveServerNodes(), null);
if (crd == null)
throw new IgniteException("There is no alive server nodes in the cluster");
return new IgniteSnapshotFutureImpl(cctx.kernalContext().closure().callAsyncNoFailover(BALANCE, new CreateSnapshotCallable(name), Collections.singletonList(crd), false, 0, true));
}
ClusterSnapshotFuture snpFut0;
synchronized (snpOpMux) {
if (clusterSnpFut != null && !clusterSnpFut.isDone()) {
throw new IgniteException("Create snapshot request has been rejected. The previous snapshot operation was not completed.");
}
if (clusterSnpReq != null)
throw new IgniteException("Create snapshot request has been rejected. Parallel snapshot processes are not allowed.");
if (localSnapshotNames().contains(name)) {
throw new IgniteException("Create snapshot request has been rejected. Snapshot with given name already exists on local node.");
}
if (isRestoring()) {
throw new IgniteException("Snapshot operation has been rejected. Cache group restore operation is currently in progress.");
}
snpFut0 = new ClusterSnapshotFuture(UUID.randomUUID(), name);
clusterSnpFut = snpFut0;
lastSeenSnpFut = snpFut0;
}
List<String> grps = cctx.cache().persistentGroups().stream().filter(g -> cctx.cache().cacheType(g.cacheOrGroupName()) == CacheType.USER).map(CacheGroupDescriptor::cacheOrGroupName).collect(Collectors.toList());
grps.add(METASTORAGE_CACHE_NAME);
List<ClusterNode> srvNodes = cctx.discovery().serverNodes(AffinityTopologyVersion.NONE);
snpFut0.listen(f -> {
if (f.error() == null)
recordSnapshotEvent(name, SNAPSHOT_FINISHED_MSG + grps, EVT_CLUSTER_SNAPSHOT_FINISHED);
else
recordSnapshotEvent(name, SNAPSHOT_FAILED_MSG + f.error().getMessage(), EVT_CLUSTER_SNAPSHOT_FAILED);
});
startSnpProc.start(snpFut0.rqId, new SnapshotOperationRequest(snpFut0.rqId, cctx.localNodeId(), name, grps, new HashSet<>(F.viewReadOnly(srvNodes, F.node2id(), (node) -> CU.baselineNode(node, clusterState)))));
String msg = "Cluster-wide snapshot operation started [snpName=" + name + ", grps=" + grps + ']';
recordSnapshotEvent(name, msg, EVT_CLUSTER_SNAPSHOT_STARTED);
if (log.isInfoEnabled())
log.info(msg);
return new IgniteFutureImpl<>(snpFut0);
} catch (Exception e) {
recordSnapshotEvent(name, SNAPSHOT_FAILED_MSG + e.getMessage(), EVT_CLUSTER_SNAPSHOT_FAILED);
U.error(log, SNAPSHOT_FAILED_MSG, e);
lastSeenSnpFut = new ClusterSnapshotFuture(name, e);
return new IgniteFinishedFutureImpl<>(e);
}
}
use of org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState in project ignite by apache.
the class SnapshotRestoreProcess method start.
/**
* Start cache group restore operation.
*
* @param snpName Snapshot name.
* @param cacheGrpNames Cache groups to be restored or {@code null} to restore all cache groups from the snapshot.
* @return Future that will be completed when the restore operation is complete and the cache groups are started.
*/
public IgniteFuture<Void> start(String snpName, @Nullable Collection<String> cacheGrpNames) {
IgniteSnapshotManager snpMgr = ctx.cache().context().snapshotMgr();
ClusterSnapshotFuture fut0;
try {
if (ctx.clientNode())
throw new IgniteException(OP_REJECT_MSG + "Client and daemon nodes can not perform this operation.");
DiscoveryDataClusterState clusterState = ctx.state().clusterState();
if (clusterState.state() != ClusterState.ACTIVE || clusterState.transition())
throw new IgniteException(OP_REJECT_MSG + "The cluster should be active.");
if (!clusterState.hasBaselineTopology())
throw new IgniteException(OP_REJECT_MSG + "The baseline topology is not configured for cluster.");
if (!IgniteFeatures.allNodesSupports(ctx.grid().cluster().nodes(), SNAPSHOT_RESTORE_CACHE_GROUP))
throw new IgniteException(OP_REJECT_MSG + "Not all nodes in the cluster support restore operation.");
if (snpMgr.isSnapshotCreating())
throw new IgniteException(OP_REJECT_MSG + "A cluster snapshot operation is in progress.");
synchronized (this) {
if (restoringSnapshotName() != null)
throw new IgniteException(OP_REJECT_MSG + "The previous snapshot restore operation was not completed.");
fut = new ClusterSnapshotFuture(UUID.randomUUID(), snpName);
fut0 = fut;
}
} catch (IgniteException e) {
snpMgr.recordSnapshotEvent(snpName, OP_FAILED_MSG + ": " + e.getMessage(), EventType.EVT_CLUSTER_SNAPSHOT_RESTORE_FAILED);
return new IgniteFinishedFutureImpl<>(e);
}
fut0.listen(f -> {
if (f.error() != null) {
snpMgr.recordSnapshotEvent(snpName, OP_FAILED_MSG + ": " + f.error().getMessage() + " [reqId=" + fut0.rqId + "].", EventType.EVT_CLUSTER_SNAPSHOT_RESTORE_FAILED);
} else {
snpMgr.recordSnapshotEvent(snpName, OP_FINISHED_MSG + " [reqId=" + fut0.rqId + "].", EventType.EVT_CLUSTER_SNAPSHOT_RESTORE_FINISHED);
}
});
String msg = "Cluster-wide snapshot restore operation started [reqId=" + fut0.rqId + ", snpName=" + snpName + (cacheGrpNames == null ? "" : ", caches=" + cacheGrpNames) + ']';
if (log.isInfoEnabled())
log.info(msg);
snpMgr.recordSnapshotEvent(snpName, msg, EventType.EVT_CLUSTER_SNAPSHOT_RESTORE_STARTED);
snpMgr.checkSnapshot(snpName, cacheGrpNames, true).listen(f -> {
if (f.error() != null) {
finishProcess(fut0.rqId, f.error());
return;
}
if (!F.isEmpty(f.result().exceptions())) {
finishProcess(fut0.rqId, F.first(f.result().exceptions().values()));
return;
}
if (fut0.interruptEx != null) {
finishProcess(fut0.rqId, fut0.interruptEx);
return;
}
Set<UUID> dataNodes = new HashSet<>();
Set<String> snpBltNodes = null;
Map<ClusterNode, List<SnapshotMetadata>> metas = f.result().metas();
Map<Integer, String> reqGrpIds = cacheGrpNames == null ? Collections.emptyMap() : cacheGrpNames.stream().collect(Collectors.toMap(CU::cacheId, v -> v));
for (Map.Entry<ClusterNode, List<SnapshotMetadata>> entry : metas.entrySet()) {
dataNodes.add(entry.getKey().id());
for (SnapshotMetadata meta : entry.getValue()) {
assert meta != null : entry.getKey().id();
if (snpBltNodes == null)
snpBltNodes = new HashSet<>(meta.baselineNodes());
reqGrpIds.keySet().removeAll(meta.partitions().keySet());
}
}
if (snpBltNodes == null) {
finishProcess(fut0.rqId, new IllegalArgumentException(OP_REJECT_MSG + "No snapshot data " + "has been found [groups=" + reqGrpIds.values() + ", snapshot=" + snpName + ']'));
return;
}
if (!reqGrpIds.isEmpty()) {
finishProcess(fut0.rqId, new IllegalArgumentException(OP_REJECT_MSG + "Cache group(s) was not " + "found in the snapshot [groups=" + reqGrpIds.values() + ", snapshot=" + snpName + ']'));
return;
}
Collection<UUID> bltNodes = F.viewReadOnly(ctx.discovery().discoCache().aliveBaselineNodes(), F.node2id());
SnapshotOperationRequest req = new SnapshotOperationRequest(fut0.rqId, F.first(dataNodes), snpName, cacheGrpNames, new HashSet<>(bltNodes));
prepareRestoreProc.start(req.requestId(), req);
});
return new IgniteFutureImpl<>(fut0);
}
use of org.apache.ignite.internal.processors.cluster.DiscoveryDataClusterState in project ignite by apache.
the class ClusterCachesInfo method onCacheChangeRequested.
/**
* @param batch Cache change request.
* @param topVer Topology version.
* @return {@code True} if minor topology version should be increased.
*/
public boolean onCacheChangeRequested(DynamicCacheChangeBatch batch, AffinityTopologyVersion topVer) {
DiscoveryDataClusterState state = ctx.state().clusterState();
if (state.active() && !state.transition()) {
Set<IgniteUuid> restartIds = new HashSet<>(F.viewReadOnly(batch.requests(), DynamicCacheChangeRequest::restartId, req -> req.start() && req.restartId() != null));
assert restartIds.size() <= 1 : batch.requests();
Collection<UUID> nodes = ctx.cache().context().snapshotMgr().cacheStartRequiredAliveNodes(F.first(restartIds));
for (UUID nodeId : nodes) {
ClusterNode node = ctx.discovery().node(nodeId);
if (node != null && CU.baselineNode(node, state) && ctx.discovery().alive(node))
continue;
ClusterTopologyCheckedException err = new ClusterTopologyCheckedException("Required node has left the cluster [nodeId=" + nodeId + ']');
for (DynamicCacheChangeRequest req : batch.requests()) ctx.cache().completeCacheStartFuture(req, false, err);
return false;
}
ExchangeActions exchangeActions = new ExchangeActions();
CacheChangeProcessResult res = processCacheChangeRequests(exchangeActions, batch.requests(), topVer, false);
if (res.needExchange) {
assert !exchangeActions.empty() : exchangeActions;
batch.exchangeActions(exchangeActions);
if (!nodes.isEmpty())
exchangeActions.cacheStartRequiredAliveNodes(nodes);
}
return res.needExchange;
} else {
IgniteCheckedException err = new IgniteCheckedException("Failed to start/stop cache, cluster state change " + "is in progress.");
for (DynamicCacheChangeRequest req : batch.requests()) {
if (req.template()) {
ctx.cache().completeTemplateAddFuture(req.startCacheConfiguration().getName(), req.deploymentId());
} else
ctx.cache().completeCacheStartFuture(req, false, err);
}
return false;
}
}
Aggregations