use of org.apache.ignite.internal.maintenance.MaintenanceProcessor in project ignite by apache.
the class MaintenanceRegistrySimpleTest method testMaintenanceTaskReplacement.
/**
* {@link MaintenanceTask} could be replaced with new parameters after registration, old task is deleted.
*
* @throws IgniteCheckedException If initialization failed.
*/
@Test
public void testMaintenanceTaskReplacement() throws IgniteCheckedException {
String name0 = "taskName0";
String descr = "description";
String oldParams = "oldParams";
String newParams = "newParams";
MaintenanceProcessor proc = new MaintenanceProcessor(initContext(true));
proc.start();
assertFalse(proc.isMaintenanceMode());
proc.registerMaintenanceTask(new MaintenanceTask(name0, descr, oldParams));
proc.registerMaintenanceTask(new MaintenanceTask(name0, descr, newParams));
proc.stop(false);
proc.start();
assertTrue(proc.isMaintenanceMode());
MaintenanceTask task = proc.activeMaintenanceTask(name0);
assertNotNull(task);
assertEquals(newParams, task.parameters());
}
use of org.apache.ignite.internal.maintenance.MaintenanceProcessor in project gridgain by gridgain.
the class IgniteKernal method start.
/**
* @param cfg Ignite configuration to use.
* @param errHnd Error handler to use for notification about startup problems.
* @param workerRegistry Worker registry.
* @param hnd Default uncaught exception handler used by thread pools.
* @throws IgniteCheckedException Thrown in case of any errors.
*/
public void start(final IgniteConfiguration cfg, GridAbsClosure errHnd, WorkersRegistry workerRegistry, Thread.UncaughtExceptionHandler hnd, TimeBag startTimer) throws IgniteCheckedException {
gw.compareAndSet(null, new GridKernalGatewayImpl(cfg.getIgniteInstanceName()));
GridKernalGateway gw = this.gw.get();
gw.writeLock();
try {
switch(gw.getState()) {
case STARTED:
{
U.warn(log, "Grid has already been started (ignored).");
return;
}
case STARTING:
{
U.warn(log, "Grid is already in process of being started (ignored).");
return;
}
case STOPPING:
{
throw new IgniteCheckedException("Grid is in process of being stopped");
}
case STOPPED:
{
break;
}
}
gw.setState(STARTING);
} finally {
gw.writeUnlock();
}
assert cfg != null;
// Make sure we got proper configuration.
validateCommon(cfg);
igniteInstanceName = cfg.getIgniteInstanceName();
this.cfg = cfg;
log = (GridLoggerProxy) cfg.getGridLogger().getLogger(getClass().getName() + (igniteInstanceName != null ? '%' + igniteInstanceName : ""));
longJVMPauseDetector = new LongJVMPauseDetector(log);
longJVMPauseDetector.start();
RuntimeMXBean rtBean = ManagementFactory.getRuntimeMXBean();
// Ack various information.
ackAsciiLogo();
ackConfigUrl();
ackConfiguration(cfg);
ackDaemon();
ackOsInfo();
ackLanguageRuntime();
ackRemoteManagement();
ackLogger();
ackVmArguments(rtBean);
ackClassPaths(rtBean);
ackSystemProperties();
ackEnvironmentVariables();
ackMemoryConfiguration();
ackCacheConfiguration();
ackP2pConfiguration();
ackRebalanceConfiguration();
ackIPv4StackFlagIsSet();
ackWaitForBackupsOnShutdownPropertyIsUsed();
// Ack 3-rd party licenses location.
if (log.isInfoEnabled() && cfg.getIgniteHome() != null)
log.info("3-rd party licenses can be found at: " + cfg.getIgniteHome() + File.separatorChar + "libs" + File.separatorChar + "licenses");
// with internally reserved names.
for (String name : cfg.getUserAttributes().keySet()) if (name.startsWith(ATTR_PREFIX))
throw new IgniteCheckedException("User attribute has illegal name: '" + name + "'. Note that all names " + "starting with '" + ATTR_PREFIX + "' are reserved for internal use.");
// Ack local node user attributes.
logNodeUserAttributes();
// Ack configuration.
ackSpis();
List<PluginProvider> plugins = cfg.getPluginProviders() != null && cfg.getPluginProviders().length > 0 ? Arrays.asList(cfg.getPluginProviders()) : U.allPluginProviders();
// Spin out SPIs & managers.
try {
ctx = new GridKernalContextImpl(log, this, cfg, gw, plugins, MarshallerUtils.classNameFilter(this.getClass().getClassLoader()), workerRegistry, hnd, longJVMPauseDetector);
startProcessor(new DiagnosticProcessor(ctx));
mBeansMgr = new IgniteMBeansManager(this);
cfg.getMarshaller().setContext(ctx.marshallerContext());
startProcessor(new GridInternalSubscriptionProcessor(ctx));
ClusterProcessor clusterProc = new ClusterProcessor(ctx);
startProcessor(clusterProc);
U.onGridStart();
// Start and configure resource processor first as it contains resources used
// by all other managers and processors.
GridResourceProcessor rsrcProc = new GridResourceProcessor(ctx);
rsrcProc.setSpringContext(rsrcCtx);
scheduler = new IgniteSchedulerImpl(ctx);
startProcessor(rsrcProc);
// Inject resources into lifecycle beans.
if (!cfg.isDaemon() && cfg.getLifecycleBeans() != null) {
for (LifecycleBean bean : cfg.getLifecycleBeans()) {
if (bean != null)
rsrcProc.inject(bean);
}
}
// Lifecycle notification.
notifyLifecycleBeans(BEFORE_NODE_START);
// Starts lifecycle aware components.
U.startLifecycleAware(lifecycleAwares(cfg));
startProcessor(new IgnitePluginProcessor(ctx, cfg, plugins));
startProcessor(new FailureProcessor(ctx));
startProcessor(new PoolProcessor(ctx));
// Run background network diagnostics.
GridDiagnostic.runBackgroundCheck(igniteInstanceName, ctx.pools().getExecutorService(), log);
// Closure processor should be started before all others
// (except for resource processor), as many components can depend on it.
startProcessor(new GridClosureProcessor(ctx));
// Start some other processors (order & place is important).
startProcessor(new GridPortProcessor(ctx));
startProcessor(new GridJobMetricsProcessor(ctx));
// Timeout processor needs to be started before managers,
// as managers may depend on it.
startProcessor(new GridTimeoutProcessor(ctx));
// Start security processors.
startProcessor(securityProcessor());
// NOTE: that order matters as there are dependencies between managers.
try {
startManager(new GridTracingManager(ctx, false));
} catch (IgniteCheckedException e) {
startManager(new GridTracingManager(ctx, true));
}
startManager(new GridMetricManager(ctx));
startManager(new GridSystemViewManager(ctx));
startManager(new GridIoManager(ctx));
startManager(new GridCheckpointManager(ctx));
startManager(new GridEventStorageManager(ctx));
startManager(new GridDeploymentManager(ctx));
startManager(new GridLoadBalancerManager(ctx));
startManager(new GridFailoverManager(ctx));
startManager(new GridCollisionManager(ctx));
startManager(new GridIndexingManager(ctx));
ackSecurity();
// Assign discovery manager to context before other processors start so they
// are able to register custom event listener.
GridDiscoveryManager discoMgr = new GridDiscoveryManager(ctx);
ctx.add(discoMgr, false);
// Start the encryption manager after assigning the discovery manager to context, so it will be
// able to register custom event listener.
startManager(new GridEncryptionManager(ctx));
startProcessor(new PdsConsistentIdProcessor(ctx));
MaintenanceProcessor mntcProcessor = new MaintenanceProcessor(ctx);
startProcessor(mntcProcessor);
if (mntcProcessor.isMaintenanceMode()) {
if (log.isInfoEnabled()) {
log.info("Node is being started in maintenance mode. " + "Starting IsolatedDiscoverySpi instead of configured discovery SPI.");
}
cfg.setClusterStateOnStart(ClusterState.INACTIVE);
if (log.isInfoEnabled())
log.info("Overriding 'clusterStateOnStart' configuration to 'INACTIVE'.");
ctx.config().setDiscoverySpi(new IsolatedDiscoverySpi());
discoMgr = new GridDiscoveryManager(ctx);
// Reinitialized discovery manager won't have a valid consistentId on creation.
discoMgr.consistentId(ctx.pdsFolderResolver().resolveFolders().consistentId());
ctx.add(discoMgr, false);
}
// be able to start receiving messages once discovery completes.
try {
startProcessor(COMPRESSION.createOptional(ctx));
startProcessor(new GridMarshallerMappingProcessor(ctx));
startProcessor(new MvccProcessorImpl(ctx));
startProcessor(createComponent(DiscoveryNodeValidationProcessor.class, ctx));
startProcessor(new GridAffinityProcessor(ctx));
startProcessor(createComponent(GridSegmentationProcessor.class, ctx));
startTimer.finishGlobalStage("Start managers");
startProcessor(createComponent(IgniteCacheObjectProcessor.class, ctx));
startTimer.finishGlobalStage("Configure binary metadata");
startProcessor(createComponent(IGridClusterStateProcessor.class, ctx));
startProcessor(new IgniteAuthenticationProcessor(ctx));
startProcessor(new GridCacheProcessor(ctx));
startProcessor(new GridQueryProcessor(ctx));
startProcessor(new ClientListenerProcessor(ctx));
startProcessor(createServiceProcessor());
startProcessor(new GridTaskSessionProcessor(ctx));
startProcessor(new GridJobProcessor(ctx));
startProcessor(new GridTaskProcessor(ctx));
startProcessor(SCHEDULE.createOptional(ctx));
startProcessor(new GridRestProcessor(ctx));
startProcessor(new DataStreamProcessor(ctx));
startProcessor(new GridContinuousProcessor(ctx));
startProcessor(new DataStructuresProcessor(ctx));
startProcessor(createComponent(PlatformProcessor.class, ctx));
if (isFeatureEnabled(IGNITE_DISTRIBUTED_META_STORAGE_FEATURE))
startProcessor(new DistributedMetaStorageImpl(ctx));
startProcessor(new DistributedConfigurationProcessor(ctx));
startProcessor(new DurableBackgroundTasksProcessor(ctx));
// Start transactional data replication processor.
startProcessor(createComponent(TransactionalDrProcessor.class, ctx));
startTimer.finishGlobalStage("Start processors");
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) {
ctx.add(new GridPluginComponent(provider));
provider.start(ctx.plugins().pluginContextForProvider(provider));
startTimer.finishGlobalStage("Start '" + provider.name() + "' plugin");
}
// Start platform plugins.
if (ctx.config().getPlatformConfiguration() != null)
startProcessor(new PlatformPluginProcessor(ctx));
mBeansMgr.registerMBeansDuringInitPhase();
ctx.cluster().initDiagnosticListeners();
fillNodeAttributes(clusterProc.updateNotifierEnabled());
ctx.cache().context().database().notifyMetaStorageSubscribersOnReadyForRead();
if (isFeatureEnabled(IGNITE_DISTRIBUTED_META_STORAGE_FEATURE))
((DistributedMetaStorageImpl) ctx.distributedMetastorage()).inMemoryReadyForRead();
startTimer.finishGlobalStage("Init metastore");
ctx.cache().context().database().startMemoryRestore(ctx, startTimer);
ctx.recoveryMode(false);
startTimer.finishGlobalStage("Finish recovery");
} catch (Throwable e) {
U.error(log, "Exception during start processors, node will be stopped and close connections", e);
// Stop discovery spi to close tcp socket.
ctx.discovery().stop(true);
throw e;
}
// All components exept Discovery are started, time to check if maintenance is still needed.
mntcProcessor.prepareAndExecuteMaintenance();
gw.writeLock();
try {
gw.setState(STARTED);
// Start discovery manager last to make sure that grid is fully initialized.
startManager(discoMgr);
} finally {
gw.writeUnlock();
}
startTimer.finishGlobalStage("Join topology");
// Check whether UTF-8 is the default character encoding.
checkFileEncoding();
// Check whether physical RAM is not exceeded.
checkPhysicalRam();
// Suggest configuration optimizations.
suggestOptimizations(cfg);
// Suggest JVM optimizations.
ctx.performance().addAll(JvmConfigurationSuggestions.getSuggestions());
// Suggest Operation System optimizations.
ctx.performance().addAll(OsConfigurationSuggestions.getSuggestions());
DiscoveryLocalJoinData joinData = ctx.discovery().localJoin();
IgniteInternalFuture<Boolean> transitionWaitFut = joinData.transitionWaitFuture();
// Notify discovery manager the first to make sure that topology is discovered.
// Active flag is not used in managers, so it is safe to pass true.
ctx.discovery().onKernalStart(true);
// Notify IO manager the second so further components can send and receive messages.
// Must notify the IO manager before transition state await to make sure IO connection can be established.
ctx.io().onKernalStart(true);
boolean active;
if (transitionWaitFut != null) {
if (log.isInfoEnabled()) {
log.info("Join cluster while cluster state transition is in progress, " + "waiting when transition finish.");
}
active = transitionWaitFut.get();
} else
active = joinData.active();
startTimer.finishGlobalStage("Await transition");
ctx.pools().registerMetrics();
registerMetrics();
ctx.cluster().registerMetrics();
// Register MBeans.
mBeansMgr.registerMBeansAfterNodeStarted();
boolean recon = false;
// Callbacks.
for (GridComponent comp : ctx) {
// Skip discovery manager.
if (comp instanceof GridDiscoveryManager)
continue;
// Skip IO manager.
if (comp instanceof GridIoManager)
continue;
if (comp instanceof GridPluginComponent)
continue;
if (!skipDaemon(comp)) {
try {
comp.onKernalStart(active);
} catch (IgniteNeedReconnectException e) {
ClusterNode locNode = ctx.discovery().localNode();
assert locNode.isClient();
if (!ctx.discovery().reconnectSupported())
throw new IgniteCheckedException("Client node in forceServerMode " + "is not allowed to reconnect to the cluster and will be stopped.");
if (log.isDebugEnabled())
log.debug("Failed to start node components on node start, will wait for reconnect: " + e);
recon = true;
}
}
}
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) provider.onIgniteStart();
if (recon)
reconnectState.waitFirstReconnect();
// Lifecycle bean notifications.
notifyLifecycleBeans(AFTER_NODE_START);
} catch (Throwable e) {
IgniteSpiVersionCheckException verCheckErr = X.cause(e, IgniteSpiVersionCheckException.class);
if (verCheckErr != null)
U.error(log, verCheckErr.getMessage());
else if (X.hasCause(e, InterruptedException.class, IgniteInterruptedCheckedException.class))
U.warn(log, "Grid startup routine has been interrupted (will rollback).");
else
U.error(log, "Got exception while starting (will rollback startup routine).", e);
errHnd.apply();
stop(true);
if (e instanceof Error)
throw e;
else if (e instanceof IgniteCheckedException)
throw (IgniteCheckedException) e;
else
throw new IgniteCheckedException(e);
}
// Mark start timestamp.
startTime = U.currentTimeMillis();
String intervalStr = IgniteSystemProperties.getString(IGNITE_STARVATION_CHECK_INTERVAL);
// Start starvation checker if enabled.
boolean starveCheck = !isDaemon() && !"0".equals(intervalStr);
if (starveCheck) {
final long interval = F.isEmpty(intervalStr) ? DFLT_PERIODIC_STARVATION_CHECK_FREQ : Long.parseLong(intervalStr);
starveTask = ctx.timeout().schedule(new Runnable() {
/**
* Last completed task count.
*/
private long lastCompletedCntPub;
/**
* Last completed task count.
*/
private long lastCompletedCntSys;
/**
* Last completed task count.
*/
private long lastCompletedCntQry;
@Override
public void run() {
if (ctx.pools().getExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getExecutorService();
lastCompletedCntPub = checkPoolStarvation(exec, lastCompletedCntPub, "public");
}
if (ctx.pools().getSystemExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getSystemExecutorService();
lastCompletedCntSys = checkPoolStarvation(exec, lastCompletedCntSys, "system");
}
if (ctx.pools().getQueryExecutorService() instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) ctx.pools().getQueryExecutorService();
lastCompletedCntQry = checkPoolStarvation(exec, lastCompletedCntQry, "query");
}
if (ctx.pools().getStripedExecutorService() != null)
ctx.pools().getStripedExecutorService().detectStarvation();
}
/**
* @param exec Thread pool executor to check.
* @param lastCompletedCnt Last completed tasks count.
* @param pool Pool name for message.
* @return Current completed tasks count.
*/
private long checkPoolStarvation(ThreadPoolExecutor exec, long lastCompletedCnt, String pool) {
long completedCnt = exec.getCompletedTaskCount();
// at least one waiting request, then it is possible starvation.
if (exec.getPoolSize() == exec.getActiveCount() && completedCnt == lastCompletedCnt && !exec.getQueue().isEmpty())
LT.warn(log, "Possible thread pool starvation detected (no task completed in last " + interval + "ms, is " + pool + " thread pool size large enough?)");
return completedCnt;
}
}, interval, interval);
}
long metricsLogFreq = cfg.getMetricsLogFrequency();
if (metricsLogFreq > 0) {
metricsLogTask = ctx.timeout().schedule(new Runnable() {
private final DecimalFormat dblFmt = doubleFormat();
@Override
public void run() {
ackNodeMetrics(dblFmt, ctx.pools().getExecutorService(), ctx.pools().getSystemExecutorService(), ctx.pools().getStripedExecutorService(), ctx.pools().customExecutors());
}
}, metricsLogFreq, metricsLogFreq);
}
ctx.performance().add("Disable assertions (remove '-ea' from JVM options)", !U.assertionsEnabled());
ctx.performance().logSuggestions(log, igniteInstanceName);
U.quietAndInfo(log, "To start Console Management & Monitoring run ignitevisorcmd.{sh|bat}");
if (!IgniteSystemProperties.getBoolean(IgniteSystemProperties.IGNITE_QUIET, true))
ackClassPathContent();
ackStart(rtBean);
if (!isDaemon())
ctx.discovery().ackTopology(ctx.discovery().localJoin().joinTopologyVersion().topologyVersion(), EventType.EVT_NODE_JOINED, localNode());
startTimer.finishGlobalStage("Await exchange");
}
use of org.apache.ignite.internal.maintenance.MaintenanceProcessor in project gridgain by gridgain.
the class MaintenanceRegistrySimpleTest method testMaintenanceTaskReplacement.
/**
* {@link MaintenanceTask} could be replaced with new parameters after registration, old task is deleted.
*
* @throws IgniteCheckedException If initialization failed.
*/
@Test
public void testMaintenanceTaskReplacement() throws IgniteCheckedException {
String name0 = "taskName0";
String descr = "description";
String oldParams = "oldParams";
String newParams = "newParams";
MaintenanceProcessor proc = new MaintenanceProcessor(initContext(true));
proc.start();
assertFalse(proc.isMaintenanceMode());
proc.registerMaintenanceTask(new MaintenanceTask(name0, descr, oldParams));
proc.registerMaintenanceTask(new MaintenanceTask(name0, descr, newParams));
proc.stop(false);
proc.start();
assertTrue(proc.isMaintenanceMode());
MaintenanceTask task = proc.activeMaintenanceTask(name0);
assertNotNull(task);
assertEquals(newParams, task.parameters());
}
use of org.apache.ignite.internal.maintenance.MaintenanceProcessor in project gridgain by gridgain.
the class MaintenanceRegistrySimpleTest method testDeleteMaintenanceTask.
/**
* Registered {@link MaintenanceTask} can be deleted before node entered Maintenance Mode (before node restart).
*
* @throws IgniteCheckedException If initialization failed.
*/
@Test
public void testDeleteMaintenanceTask() throws IgniteCheckedException {
String name = "name0";
MaintenanceTask task = new MaintenanceTask(name, "description", null);
MaintenanceProcessor proc = new MaintenanceProcessor(initContext(true));
proc.start();
proc.registerMaintenanceTask(task);
assertFalse(proc.isMaintenanceMode());
proc.unregisterMaintenanceTask(name);
proc.stop(false);
proc.start();
assertNull(proc.activeMaintenanceTask(name));
assertFalse(proc.isMaintenanceMode());
}
use of org.apache.ignite.internal.maintenance.MaintenanceProcessor in project gridgain by gridgain.
the class MaintenanceRegistrySimpleTest method testMaintenanceTaskUpdate.
/**
* {@link MaintenanceTask} could be updated using remapping function.
*
* @throws Exception If failed.
*/
@Test
public void testMaintenanceTaskUpdate() throws Exception {
String name0 = "taskName0";
String descr = "description";
String oldParams = "oldParams";
String newParams = "newParams";
MaintenanceProcessor proc = new MaintenanceProcessor(initContext(true));
proc.start();
assertFalse(proc.isMaintenanceMode());
MaintenanceTask oldTask = new MaintenanceTask(name0, descr, oldParams);
assertNull(proc.registerMaintenanceTask(oldTask));
MaintenanceTask newTask = new MaintenanceTask(name0, descr, newParams);
proc.registerMaintenanceTask(newTask, prevTask -> new MaintenanceTask(newTask.name(), newTask.description(), prevTask.parameters() + newTask.parameters()));
proc.stop(false);
proc.start();
assertTrue(proc.isMaintenanceMode());
MaintenanceTask task = proc.activeMaintenanceTask(name0);
assertNotNull(task);
assertEquals(oldParams + newParams, task.parameters());
}
Aggregations