use of org.apache.ignite.internal.processors.metastorage.persistence.DistributedMetaStorageImpl in project ignite by apache.
the class IgniteKernal method start.
/**
* @param cfg Ignite configuration to use.
* @param errHnd Error handler to use for notification about startup problems.
* @param workerRegistry Worker registry.
* @param hnd Default uncaught exception handler used by thread pools.
* @throws IgniteCheckedException Thrown in case of any errors.
*/
public void start(final IgniteConfiguration cfg, GridAbsClosure errHnd, WorkersRegistry workerRegistry, Thread.UncaughtExceptionHandler hnd, TimeBag startTimer) throws IgniteCheckedException {
gw.compareAndSet(null, new GridKernalGatewayImpl(cfg.getIgniteInstanceName()));
GridKernalGateway gw = this.gw.get();
gw.writeLock();
try {
switch(gw.getState()) {
case STARTED:
{
U.warn(log, "Grid has already been started (ignored).");
return;
}
case STARTING:
{
U.warn(log, "Grid is already in process of being started (ignored).");
return;
}
case STOPPING:
{
throw new IgniteCheckedException("Grid is in process of being stopped");
}
case STOPPED:
{
break;
}
}
gw.setState(STARTING);
} finally {
gw.writeUnlock();
}
assert cfg != null;
// Make sure we got proper configuration.
validateCommon(cfg);
igniteInstanceName = cfg.getIgniteInstanceName();
this.cfg = cfg;
log = (GridLoggerProxy) cfg.getGridLogger().getLogger(getClass().getName() + (igniteInstanceName != null ? '%' + igniteInstanceName : ""));
longJVMPauseDetector = new LongJVMPauseDetector(log);
longJVMPauseDetector.start();
RuntimeMXBean rtBean = ManagementFactory.getRuntimeMXBean();
// Ack various information.
ackAsciiLogo();
ackConfigUrl();
ackConfiguration(cfg);
ackDaemon();
ackOsInfo();
ackLanguageRuntime();
ackRemoteManagement();
ackLogger();
ackVmArguments(rtBean);
ackClassPaths(rtBean);
ackSystemProperties();
ackEnvironmentVariables();
ackMemoryConfiguration();
ackCacheConfiguration();
ackP2pConfiguration();
ackRebalanceConfiguration();
ackIPv4StackFlagIsSet();
ackWaitForBackupsOnShutdownPropertyIsUsed();
// Ack 3-rd party licenses location.
if (log.isInfoEnabled() && cfg.getIgniteHome() != null)
log.info("3-rd party licenses can be found at: " + cfg.getIgniteHome() + File.separatorChar + "libs" + File.separatorChar + "licenses");
// with internally reserved names.
for (String name : cfg.getUserAttributes().keySet()) if (name.startsWith(ATTR_PREFIX))
throw new IgniteCheckedException("User attribute has illegal name: '" + name + "'. Note that all names " + "starting with '" + ATTR_PREFIX + "' are reserved for internal use.");
// Ack local node user attributes.
logNodeUserAttributes();
// Ack configuration.
ackSpis();
List<PluginProvider> plugins = cfg.getPluginProviders() != null && cfg.getPluginProviders().length > 0 ? Arrays.asList(cfg.getPluginProviders()) : U.allPluginProviders();
// Spin out SPIs & managers.
try {
ctx = new GridKernalContextImpl(log, this, cfg, gw, plugins, MarshallerUtils.classNameFilter(this.getClass().getClassLoader()), workerRegistry, hnd, longJVMPauseDetector);
startProcessor(new DiagnosticProcessor(ctx));
mBeansMgr = new IgniteMBeansManager(this);
cfg.getMarshaller().setContext(ctx.marshallerContext());
startProcessor(new GridInternalSubscriptionProcessor(ctx));
ClusterProcessor clusterProc = new ClusterProcessor(ctx);
startProcessor(clusterProc);
U.onGridStart();
// Start and configure resource processor first as it contains resources used
// by all other managers and processors.
GridResourceProcessor rsrcProc = new GridResourceProcessor(ctx);
rsrcProc.setSpringContext(rsrcCtx);
scheduler = new IgniteSchedulerImpl(ctx);
startProcessor(rsrcProc);
// Inject resources into lifecycle beans.
if (!cfg.isDaemon() && cfg.getLifecycleBeans() != null) {
for (LifecycleBean bean : cfg.getLifecycleBeans()) {
if (bean != null)
rsrcProc.inject(bean);
}
}
// Lifecycle notification.
notifyLifecycleBeans(BEFORE_NODE_START);
// Starts lifecycle aware components.
U.startLifecycleAware(lifecycleAwares(cfg));
startProcessor(new IgnitePluginProcessor(ctx, cfg, plugins));
startProcessor(new FailureProcessor(ctx));
// Start security processors.
startProcessor(securityProcessor());
startProcessor(new PoolProcessor(ctx));
// Run background network diagnostics.
GridDiagnostic.runBackgroundCheck(igniteInstanceName, ctx.pools().getExecutorService(), log);
// Closure processor should be started before all others
// (except for resource processor), as many components can depend on it.
startProcessor(new GridClosureProcessor(ctx));
// Start some other processors (order & place is important).
startProcessor(new GridPortProcessor(ctx));
startProcessor(new GridJobMetricsProcessor(ctx));
// Timeout processor needs to be started before managers,
// as managers may depend on it.
startProcessor(new GridTimeoutProcessor(ctx));
// NOTE: that order matters as there are dependencies between managers.
try {
startManager(new GridTracingManager(ctx, false));
} catch (IgniteCheckedException e) {
startManager(new GridTracingManager(ctx, true));
}
startManager(new GridMetricManager(ctx));
startManager(new GridSystemViewManager(ctx));
startManager(new GridIoManager(ctx));
startManager(new GridCheckpointManager(ctx));
startManager(new GridEventStorageManager(ctx));
startManager(new GridDeploymentManager(ctx));
startManager(new GridLoadBalancerManager(ctx));
startManager(new GridFailoverManager(ctx));
startManager(new GridCollisionManager(ctx));
startManager(new GridIndexingManager(ctx));
ackSecurity();
// Assign discovery manager to context before other processors start so they
// are able to register custom event listener.
GridDiscoveryManager discoMgr = new GridDiscoveryManager(ctx);
ctx.add(discoMgr, false);
// Start the encryption manager after assigning the discovery manager to context, so it will be
// able to register custom event listener.
startManager(new GridEncryptionManager(ctx));
startProcessor(new PdsConsistentIdProcessor(ctx));
MaintenanceProcessor mntcProcessor = new MaintenanceProcessor(ctx);
startProcessor(mntcProcessor);
if (mntcProcessor.isMaintenanceMode()) {
if (log.isInfoEnabled()) {
log.info("Node is being started in maintenance mode. " + "Starting IsolatedDiscoverySpi instead of configured discovery SPI.");
}
cfg.setClusterStateOnStart(ClusterState.INACTIVE);
if (log.isInfoEnabled())
log.info("Overriding 'clusterStateOnStart' configuration to 'INACTIVE'.");
ctx.config().setDiscoverySpi(new IsolatedDiscoverySpi());
discoMgr = new GridDiscoveryManager(ctx);
// Reinitialized discovery manager won't have a valid consistentId on creation.
discoMgr.consistentId(ctx.pdsFolderResolver().resolveFolders().consistentId());
ctx.add(discoMgr, false);
}
// be able to start receiving messages once discovery completes.
try {
startProcessor(COMPRESSION.createOptional(ctx));
startProcessor(new GridMarshallerMappingProcessor(ctx));
startProcessor(new MvccProcessorImpl(ctx));
startProcessor(createComponent(DiscoveryNodeValidationProcessor.class, ctx));
startProcessor(new GridAffinityProcessor(ctx));
startProcessor(createComponent(GridSegmentationProcessor.class, ctx));
startTimer.finishGlobalStage("Start managers");
startProcessor(createComponent(IgniteCacheObjectProcessor.class, ctx));
startTimer.finishGlobalStage("Configure binary metadata");
startProcessor(createComponent(IGridClusterStateProcessor.class, ctx));
startProcessor(new PerformanceStatisticsProcessor(ctx));
startProcessor(new GridCacheProcessor(ctx));
if (cfg.isAuthenticationEnabled()) {
IgniteSecurityProcessor sec = (IgniteSecurityProcessor) ctx.security();
((IgniteAuthenticationProcessor) sec.securityProcessor()).startProcessor();
}
startProcessor(new IndexProcessor(ctx));
startProcessor(new GridQueryProcessor(ctx));
startProcessor(new ClientListenerProcessor(ctx));
startProcessor(new IgniteServiceProcessor(ctx));
startProcessor(new GridTaskSessionProcessor(ctx));
startProcessor(new GridJobProcessor(ctx));
startProcessor(new GridTaskProcessor(ctx));
startProcessor((GridProcessor) SCHEDULE.createOptional(ctx));
startProcessor(createComponent(IgniteRestProcessor.class, ctx));
startProcessor(new DataStreamProcessor(ctx));
startProcessor(new GridContinuousProcessor(ctx));
startProcessor(new DataStructuresProcessor(ctx));
startProcessor(createComponent(PlatformProcessor.class, ctx));
startProcessor(new DistributedMetaStorageImpl(ctx));
startProcessor(new DistributedConfigurationProcessor(ctx));
startProcessor(new DurableBackgroundTasksProcessor(ctx));
startTimer.finishGlobalStage("Start processors");
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) {
ctx.add(new GridPluginComponent(provider));
provider.start(ctx.plugins().pluginContextForProvider(provider));
startTimer.finishGlobalStage("Start '" + provider.name() + "' plugin");
}
// Start platform plugins.
if (ctx.config().getPlatformConfiguration() != null)
startProcessor(new PlatformPluginProcessor(ctx));
mBeansMgr.registerMBeansDuringInitPhase();
ctx.cluster().initDiagnosticListeners();
fillNodeAttributes(clusterProc.updateNotifierEnabled());
ctx.cache().context().database().notifyMetaStorageSubscribersOnReadyForRead();
((DistributedMetaStorageImpl) ctx.distributedMetastorage()).inMemoryReadyForRead();
startTimer.finishGlobalStage("Init metastore");
ctx.cache().context().database().startMemoryRestore(ctx, startTimer);
ctx.recoveryMode(false);
startTimer.finishGlobalStage("Finish recovery");
} catch (Throwable e) {
U.error(log, "Exception during start processors, node will be stopped and close connections", e);
// Stop discovery spi to close tcp socket.
ctx.discovery().stop(true);
throw e;
}
// All components exept Discovery are started, time to check if maintenance is still needed.
mntcProcessor.prepareAndExecuteMaintenance();
gw.writeLock();
try {
gw.setState(STARTED);
// Start discovery manager last to make sure that grid is fully initialized.
startManager(discoMgr);
} finally {
gw.writeUnlock();
}
startTimer.finishGlobalStage("Join topology");
// Check whether UTF-8 is the default character encoding.
checkFileEncoding();
// Check whether physical RAM is not exceeded.
checkPhysicalRam();
// Suggest configuration optimizations.
suggestOptimizations(cfg);
// Suggest JVM optimizations.
ctx.performance().addAll(JvmConfigurationSuggestions.getSuggestions());
// Suggest Operation System optimizations.
ctx.performance().addAll(OsConfigurationSuggestions.getSuggestions());
DiscoveryLocalJoinData joinData = ctx.discovery().localJoin();
IgniteInternalFuture<Boolean> transitionWaitFut = joinData.transitionWaitFuture();
// Notify discovery manager the first to make sure that topology is discovered.
// Active flag is not used in managers, so it is safe to pass true.
ctx.discovery().onKernalStart(true);
// Notify IO manager the second so further components can send and receive messages.
// Must notify the IO manager before transition state await to make sure IO connection can be established.
ctx.io().onKernalStart(true);
boolean active;
if (transitionWaitFut != null) {
if (log.isInfoEnabled()) {
log.info("Join cluster while cluster state transition is in progress, " + "waiting when transition finish.");
}
active = transitionWaitFut.get();
} else
active = joinData.active();
startTimer.finishGlobalStage("Await transition");
ctx.pools().registerMetrics();
registerMetrics();
ctx.cluster().registerMetrics();
// Register MBeans.
mBeansMgr.registerMBeansAfterNodeStarted();
boolean recon = false;
// Callbacks.
for (GridComponent comp : ctx) {
// Skip discovery manager.
if (comp instanceof GridDiscoveryManager)
continue;
// Skip IO manager.
if (comp instanceof GridIoManager)
continue;
if (comp instanceof GridPluginComponent)
continue;
if (!skipDaemon(comp)) {
try {
comp.onKernalStart(active);
} catch (IgniteNeedReconnectException e) {
ClusterNode locNode = ctx.discovery().localNode();
assert locNode.isClient();
if (!ctx.discovery().reconnectSupported())
throw new IgniteCheckedException("Client node in forceServerMode " + "is not allowed to reconnect to the cluster and will be stopped.");
if (log.isDebugEnabled())
log.debug("Failed to start node components on node start, will wait for reconnect: " + e);
recon = true;
}
}
}
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) provider.onIgniteStart();
if (recon)
reconnectState.waitFirstReconnect();
// Lifecycle bean notifications.
notifyLifecycleBeans(AFTER_NODE_START);
} catch (Throwable e) {
IgniteSpiVersionCheckException verCheckErr = X.cause(e, IgniteSpiVersionCheckException.class);
if (verCheckErr != null)
U.error(log, verCheckErr.getMessage());
else if (X.hasCause(e, InterruptedException.class, IgniteInterruptedCheckedException.class))
U.warn(log, "Grid startup routine has been interrupted (will rollback).");
else
U.error(log, "Got exception while starting (will rollback startup routine).", e);
errHnd.apply();
stop(true);
if (e instanceof Error)
throw e;
else if (e instanceof IgniteCheckedException)
throw (IgniteCheckedException) e;
else
throw new IgniteCheckedException(e);
}
// Mark start timestamp.
startTime = U.currentTimeMillis();
String intervalStr = IgniteSystemProperties.getString(IGNITE_STARVATION_CHECK_INTERVAL);
// Start starvation checker if enabled.
boolean starveCheck = !isDaemon() && !"0".equals(intervalStr);
if (starveCheck) {
final long interval = F.isEmpty(intervalStr) ? DFLT_PERIODIC_STARVATION_CHECK_FREQ : Long.parseLong(intervalStr);
starveTask = ctx.timeout().schedule(new Runnable() {
/**
* Last completed task count.
*/
private long lastCompletedCntPub;
/**
* Last completed task count.
*/
private long lastCompletedCntSys;
/**
* Last completed task count.
*/
private long lastCompletedCntQry;
@Override
public void run() {
if (ctx.pools().getExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getExecutorService();
lastCompletedCntPub = checkPoolStarvation(exec, lastCompletedCntPub, "public");
}
if (ctx.pools().getSystemExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getSystemExecutorService();
lastCompletedCntSys = checkPoolStarvation(exec, lastCompletedCntSys, "system");
}
if (ctx.pools().getQueryExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getQueryExecutorService();
lastCompletedCntQry = checkPoolStarvation(exec, lastCompletedCntQry, "query");
}
if (ctx.pools().getStripedExecutorService() != null)
ctx.pools().getStripedExecutorService().detectStarvation();
}
/**
* @param exec Thread pool executor to check.
* @param lastCompletedCnt Last completed tasks count.
* @param pool Pool name for message.
* @return Current completed tasks count.
*/
private long checkPoolStarvation(ThreadPoolExecutor exec, long lastCompletedCnt, String pool) {
long completedCnt = exec.getCompletedTaskCount();
// at least one waiting request, then it is possible starvation.
if (exec.getPoolSize() == exec.getActiveCount() && completedCnt == lastCompletedCnt && !exec.getQueue().isEmpty())
LT.warn(log, "Possible thread pool starvation detected (no task completed in last " + interval + "ms, is " + pool + " thread pool size large enough?)");
return completedCnt;
}
}, interval, interval);
}
long metricsLogFreq = cfg.getMetricsLogFrequency();
if (metricsLogFreq > 0) {
metricsLogTask = ctx.timeout().schedule(new Runnable() {
private final DecimalFormat dblFmt = doubleFormat();
@Override
public void run() {
ackNodeMetrics(dblFmt, ctx.pools().getExecutorService(), ctx.pools().getSystemExecutorService(), ctx.pools().getStripedExecutorService(), ctx.pools().customExecutors());
}
}, metricsLogFreq, metricsLogFreq);
}
ctx.performance().add("Disable assertions (remove '-ea' from JVM options)", !U.assertionsEnabled());
ctx.performance().logSuggestions(log, igniteInstanceName);
U.quietAndInfo(log, "To start Console Management & Monitoring run ignitevisorcmd.{sh|bat}");
if (!IgniteSystemProperties.getBoolean(IgniteSystemProperties.IGNITE_QUIET, true))
ackClassPathContent();
ackStart(rtBean);
if (!isDaemon())
ctx.discovery().ackTopology(ctx.discovery().localJoin().joinTopologyVersion().topologyVersion(), EventType.EVT_NODE_JOINED, localNode());
startTimer.finishGlobalStage("Await exchange");
}
use of org.apache.ignite.internal.processors.metastorage.persistence.DistributedMetaStorageImpl in project ignite by apache.
the class IgniteSnapshotManager method initLocalSnapshotStartStage.
/**
* @param req Request on snapshot creation.
* @return Future which will be completed when a snapshot has been started.
*/
private IgniteInternalFuture<SnapshotOperationResponse> initLocalSnapshotStartStage(SnapshotOperationRequest req) {
if (cctx.kernalContext().clientNode() || !CU.baselineNode(cctx.localNode(), cctx.kernalContext().state().clusterState()))
return new GridFinishedFuture<>();
// so it is safe to set new snapshot task inside this method without synchronization.
if (clusterSnpReq != null) {
return new GridFinishedFuture<>(new IgniteCheckedException("Snapshot operation has been rejected. " + "Another snapshot operation in progress [req=" + req + ", curr=" + clusterSnpReq + ']'));
}
Set<UUID> leftNodes = new HashSet<>(req.nodes());
leftNodes.removeAll(F.viewReadOnly(cctx.discovery().serverNodes(AffinityTopologyVersion.NONE), F.node2id()));
if (!leftNodes.isEmpty()) {
return new GridFinishedFuture<>(new IgniteCheckedException("Some of baseline nodes left the cluster " + "prior to snapshot operation start: " + leftNodes));
}
if (!cctx.localNode().isClient() && cctx.kernalContext().encryption().isMasterKeyChangeInProgress()) {
return new GridFinishedFuture<>(new IgniteCheckedException("Snapshot operation has been rejected. Master " + "key changing process is not finished yet."));
}
if (!cctx.localNode().isClient() && cctx.kernalContext().encryption().reencryptionInProgress()) {
return new GridFinishedFuture<>(new IgniteCheckedException("Snapshot operation has been rejected. Caches " + "re-encryption process is not finished yet."));
}
List<Integer> grpIds = new ArrayList<>(F.viewReadOnly(req.groups(), CU::cacheId));
Set<Integer> leftGrps = new HashSet<>(grpIds);
leftGrps.removeAll(cctx.cache().cacheGroupDescriptors().keySet());
boolean withMetaStorage = leftGrps.remove(METASTORAGE_CACHE_ID);
if (!leftGrps.isEmpty()) {
return new GridFinishedFuture<>(new IgniteCheckedException("Some of requested cache groups doesn't exist " + "on the local node [missed=" + leftGrps + ", nodeId=" + cctx.localNodeId() + ']'));
}
Map<Integer, Set<Integer>> parts = new HashMap<>();
// Cache group context may be 'null' on some nodes e.g. a node filter is set.
for (Integer grpId : grpIds) {
if (cctx.cache().cacheGroup(grpId) == null)
continue;
parts.put(grpId, null);
}
IgniteInternalFuture<?> task0;
if (parts.isEmpty() && !withMetaStorage)
task0 = new GridFinishedFuture<>(Collections.emptySet());
else {
task0 = registerSnapshotTask(req.snapshotName(), req.operationalNodeId(), parts, withMetaStorage, locSndrFactory.apply(req.snapshotName()));
if (withMetaStorage && task0 instanceof SnapshotFutureTask) {
((DistributedMetaStorageImpl) cctx.kernalContext().distributedMetastorage()).suspend(((SnapshotFutureTask) task0).started());
}
clusterSnpReq = req;
}
return task0.chain(fut -> {
if (fut.error() != null)
throw F.wrap(fut.error());
try {
Set<String> blts = req.nodes().stream().map(n -> cctx.discovery().node(n).consistentId().toString()).collect(Collectors.toSet());
File smf = new File(snapshotLocalDir(req.snapshotName()), snapshotMetaFileName(cctx.localNode().consistentId().toString()));
if (smf.exists())
throw new GridClosureException(new IgniteException("Snapshot metafile must not exist: " + smf.getAbsolutePath()));
smf.getParentFile().mkdirs();
SnapshotMetadata meta = new SnapshotMetadata(req.requestId(), req.snapshotName(), cctx.localNode().consistentId().toString(), pdsSettings.folderName(), cctx.gridConfig().getDataStorageConfiguration().getPageSize(), grpIds, blts, (Set<GroupPartitionId>) fut.result());
try (OutputStream out = new BufferedOutputStream(new FileOutputStream(smf))) {
U.marshal(marsh, meta, out);
log.info("Snapshot metafile has been created: " + smf.getAbsolutePath());
}
SnapshotHandlerContext ctx = new SnapshotHandlerContext(meta, req.groups(), cctx.localNode());
return new SnapshotOperationResponse(handlers.invokeAll(SnapshotHandlerType.CREATE, ctx));
} catch (IOException | IgniteCheckedException e) {
throw F.wrap(e);
}
});
}
Aggregations