use of org.apache.ignite.cluster.ClusterMetrics in project ignite by apache.
the class AdaptiveJobCountLoadProbe method getLoad.
/**
* {@inheritDoc}
*/
@Override
public double getLoad(ClusterNode node, int jobsSentSinceLastUpdate) {
ClusterMetrics metrics = node.metrics();
if (useAvg) {
double load = metrics.getAverageActiveJobs() + metrics.getAverageWaitingJobs();
if (load > 0)
return load;
}
double load = metrics.getCurrentActiveJobs() + metrics.getCurrentWaitingJobs();
return load < 0 ? 0 : load;
}
use of org.apache.ignite.cluster.ClusterMetrics in project ignite by apache.
the class SqlSystemViewsSelfTest method testNodesViews.
/**
* Test nodes system view.
*
* @throws Exception If failed.
*/
@Test
public void testNodesViews() throws Exception {
Ignite igniteSrv = startGrid(getTestIgniteInstanceName(), getConfiguration().setMetricsUpdateFrequency(500L));
Ignite igniteCli = startClientGrid(getTestIgniteInstanceName(1), getConfiguration().setMetricsUpdateFrequency(500L));
startGrid(getTestIgniteInstanceName(2), getConfiguration().setMetricsUpdateFrequency(500L).setDaemon(true));
UUID nodeId0 = igniteSrv.cluster().localNode().id();
awaitPartitionMapExchange();
List<List<?>> resAll = execSql("SELECT NODE_ID, CONSISTENT_ID, VERSION, IS_CLIENT, IS_DAEMON, " + "NODE_ORDER, ADDRESSES, HOSTNAMES FROM " + systemSchemaName() + ".NODES");
assertColumnTypes(resAll.get(0), UUID.class, String.class, String.class, Boolean.class, Boolean.class, Long.class, String.class, String.class);
assertEquals(3, resAll.size());
List<List<?>> resSrv = execSql("SELECT NODE_ID, NODE_ORDER FROM " + systemSchemaName() + ".NODES WHERE IS_CLIENT = FALSE AND IS_DAEMON = FALSE");
assertEquals(1, resSrv.size());
assertEquals(nodeId0, resSrv.get(0).get(0));
assertEquals(1L, resSrv.get(0).get(1));
List<List<?>> resCli = execSql("SELECT NODE_ID, NODE_ORDER FROM " + systemSchemaName() + ".NODES WHERE IS_CLIENT = TRUE");
assertEquals(1, resCli.size());
assertEquals(nodeId(1), resCli.get(0).get(0));
assertEquals(2L, resCli.get(0).get(1));
List<List<?>> resDaemon = execSql("SELECT NODE_ID, NODE_ORDER FROM " + systemSchemaName() + ".NODES WHERE IS_DAEMON = TRUE");
assertEquals(1, resDaemon.size());
assertEquals(nodeId(2), resDaemon.get(0).get(0));
assertEquals(3L, resDaemon.get(0).get(1));
// Check index on ID column.
assertEquals(0, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODES WHERE NODE_ID = '-'").size());
assertEquals(1, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODES WHERE NODE_ID = ?", nodeId0).size());
assertEquals(1, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODES WHERE NODE_ID = ?", nodeId(2)).size());
// Check index on ID column with disjunction.
assertEquals(3, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODES WHERE NODE_ID = ? " + "OR node_order=1 OR node_order=2 OR node_order=3", nodeId0).size());
// Check quick-count.
assertEquals(3L, execSql("SELECT COUNT(*) FROM " + systemSchemaName() + ".NODES").get(0).get(0));
// Check joins
assertEquals(nodeId0, execSql("SELECT N1.NODE_ID FROM " + systemSchemaName() + ".NODES N1 JOIN " + systemSchemaName() + ".NODES N2 ON N1.NODE_ORDER = N2.NODE_ORDER JOIN " + systemSchemaName() + ".NODES N3 ON N2.NODE_ID = N3.NODE_ID WHERE N3.NODE_ORDER = 1").get(0).get(0));
// Check sub-query
assertEquals(nodeId0, execSql("SELECT N1.NODE_ID FROM " + systemSchemaName() + ".NODES N1 " + "WHERE NOT EXISTS (SELECT 1 FROM " + systemSchemaName() + ".NODES N2 WHERE N2.NODE_ID = N1.NODE_ID AND N2.NODE_ORDER <> 1)").get(0).get(0));
// Check node attributes view
String cliAttrName = IgniteNodeAttributes.ATTR_CLIENT_MODE;
assertColumnTypes(execSql("SELECT NODE_ID, NAME, VALUE FROM " + systemSchemaName() + ".NODE_ATTRIBUTES").get(0), UUID.class, String.class, String.class);
assertEquals(1, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODE_ATTRIBUTES WHERE NAME = ? AND VALUE = 'true'", cliAttrName).size());
assertEquals(3, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODE_ATTRIBUTES WHERE NAME = ?", cliAttrName).size());
assertEquals(1, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODE_ATTRIBUTES WHERE NODE_ID = ? AND NAME = ? AND VALUE = 'true'", nodeId(1), cliAttrName).size());
assertEquals(0, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODE_ATTRIBUTES WHERE NODE_ID = '-' AND NAME = ?", cliAttrName).size());
assertEquals(0, execSql("SELECT NODE_ID FROM " + systemSchemaName() + ".NODE_ATTRIBUTES WHERE NODE_ID = ? AND NAME = '-'", nodeId(1)).size());
// Check node metrics view.
String sqlAllMetrics = "SELECT NODE_ID, LAST_UPDATE_TIME, " + "MAX_ACTIVE_JOBS, CUR_ACTIVE_JOBS, AVG_ACTIVE_JOBS, " + "MAX_WAITING_JOBS, CUR_WAITING_JOBS, AVG_WAITING_JOBS, " + "MAX_REJECTED_JOBS, CUR_REJECTED_JOBS, AVG_REJECTED_JOBS, TOTAL_REJECTED_JOBS, " + "MAX_CANCELED_JOBS, CUR_CANCELED_JOBS, AVG_CANCELED_JOBS, TOTAL_CANCELED_JOBS, " + "MAX_JOBS_WAIT_TIME, CUR_JOBS_WAIT_TIME, AVG_JOBS_WAIT_TIME, " + "MAX_JOBS_EXECUTE_TIME, CUR_JOBS_EXECUTE_TIME, AVG_JOBS_EXECUTE_TIME, TOTAL_JOBS_EXECUTE_TIME, " + "TOTAL_EXECUTED_JOBS, TOTAL_EXECUTED_TASKS, " + "TOTAL_BUSY_TIME, TOTAL_IDLE_TIME, CUR_IDLE_TIME, BUSY_TIME_PERCENTAGE, IDLE_TIME_PERCENTAGE, " + "TOTAL_CPU, CUR_CPU_LOAD, AVG_CPU_LOAD, CUR_GC_CPU_LOAD, " + "HEAP_MEMORY_INIT, HEAP_MEMORY_USED, HEAP_MEMORY_COMMITED, HEAP_MEMORY_MAX, HEAP_MEMORY_TOTAL, " + "NONHEAP_MEMORY_INIT, NONHEAP_MEMORY_USED, NONHEAP_MEMORY_COMMITED, NONHEAP_MEMORY_MAX, NONHEAP_MEMORY_TOTAL, " + "UPTIME, JVM_START_TIME, NODE_START_TIME, LAST_DATA_VERSION, " + "CUR_THREAD_COUNT, MAX_THREAD_COUNT, TOTAL_THREAD_COUNT, CUR_DAEMON_THREAD_COUNT, " + "SENT_MESSAGES_COUNT, SENT_BYTES_COUNT, RECEIVED_MESSAGES_COUNT, RECEIVED_BYTES_COUNT, " + "OUTBOUND_MESSAGES_QUEUE FROM " + systemSchemaName() + ".NODE_METRICS";
List<List<?>> resMetrics = execSql(sqlAllMetrics);
assertColumnTypes(resMetrics.get(0), UUID.class, Timestamp.class, // Active jobs.
Integer.class, // Active jobs.
Integer.class, // Active jobs.
Float.class, // Waiting jobs.
Integer.class, // Waiting jobs.
Integer.class, // Waiting jobs.
Float.class, // Rejected jobs.
Integer.class, // Rejected jobs.
Integer.class, // Rejected jobs.
Float.class, // Rejected jobs.
Integer.class, // Canceled jobs.
Integer.class, // Canceled jobs.
Integer.class, // Canceled jobs.
Float.class, // Canceled jobs.
Integer.class, // Jobs wait time.
Long.class, // Jobs wait time.
Long.class, // Jobs wait time.
Long.class, // Jobs execute time.
Long.class, // Jobs execute time.
Long.class, // Jobs execute time.
Long.class, // Jobs execute time.
Long.class, // Executed jobs/task.
Integer.class, // Executed jobs/task.
Integer.class, // Busy/idle time.
Long.class, // Busy/idle time.
Long.class, // Busy/idle time.
Long.class, // Busy/idle time.
Float.class, // Busy/idle time.
Float.class, // CPU.
Integer.class, // CPU.
Double.class, // CPU.
Double.class, // CPU.
Double.class, // Heap memory.
Long.class, // Heap memory.
Long.class, // Heap memory.
Long.class, // Heap memory.
Long.class, // Heap memory.
Long.class, // Nonheap memory.
Long.class, // Nonheap memory.
Long.class, // Nonheap memory.
Long.class, // Nonheap memory.
Long.class, // Nonheap memory.
Long.class, // Uptime.
Long.class, // Uptime.
Timestamp.class, // Uptime.
Timestamp.class, // Uptime.
Long.class, // Threads.
Integer.class, // Threads.
Integer.class, // Threads.
Long.class, // Threads.
Integer.class, // Sent/received messages.
Integer.class, // Sent/received messages.
Long.class, // Sent/received messages.
Integer.class, // Sent/received messages.
Long.class, // Outbound message queue.
Integer.class);
assertEquals(3, resAll.size());
// Check join with nodes.
assertEquals(3, execSql("SELECT NM.LAST_UPDATE_TIME FROM " + systemSchemaName() + ".NODES N " + "JOIN " + systemSchemaName() + ".NODE_METRICS NM ON N.NODE_ID = NM.NODE_ID").size());
// Check index on NODE_ID column.
assertEquals(1, execSql("SELECT LAST_UPDATE_TIME FROM " + systemSchemaName() + ".NODE_METRICS WHERE NODE_ID = ?", nodeId(1)).size());
// Check malformed value for indexed column.
assertEquals(0, execSql("SELECT LAST_UPDATE_TIME FROM " + systemSchemaName() + ".NODE_METRICS WHERE NODE_ID = ?", "-").size());
// Check quick-count.
assertEquals(3L, execSql("SELECT COUNT(*) FROM " + systemSchemaName() + ".NODE_METRICS").get(0).get(0));
// Broadcast jobs to server and client nodes to get non zero metric values.
for (int i = 0; i < 100; i++) {
IgniteFuture<Void> fut = igniteSrv.compute(igniteSrv.cluster().forNodeId(nodeId0, nodeId(1))).broadcastAsync(new IgniteRunnable() {
@Override
public void run() {
Random rnd = new Random();
try {
doSleep(rnd.nextInt(100));
} catch (Throwable ignore) {
// No-op.
}
}
});
if (i % 10 == 0)
fut.cancel();
}
doSleep(igniteSrv.configuration().getMetricsUpdateFrequency() * 3L);
for (Ignite grid : G.allGrids()) {
UUID nodeId = grid.cluster().localNode().id();
// Metrics for node must be collected from another node to avoid race and get consistent metrics snapshot.
Ignite ignite = F.eq(nodeId, nodeId0) ? igniteCli : igniteSrv;
for (int i = 0; i < METRICS_CHECK_ATTEMPTS; i++) {
ClusterMetrics metrics = ignite.cluster().node(nodeId).metrics();
assertTrue(metrics instanceof ClusterMetricsSnapshot);
resMetrics = execSql(ignite, sqlAllMetrics + " WHERE NODE_ID = ?", nodeId);
log.info("Check metrics for node " + grid.name() + ", attempt " + (i + 1));
if (metrics.getLastUpdateTime() == ((Timestamp) resMetrics.get(0).get(1)).getTime()) {
assertEquals(metrics.getMaximumActiveJobs(), resMetrics.get(0).get(2));
assertEquals(metrics.getCurrentActiveJobs(), resMetrics.get(0).get(3));
assertEquals(metrics.getAverageActiveJobs(), resMetrics.get(0).get(4));
assertEquals(metrics.getMaximumWaitingJobs(), resMetrics.get(0).get(5));
assertEquals(metrics.getCurrentWaitingJobs(), resMetrics.get(0).get(6));
assertEquals(metrics.getAverageWaitingJobs(), resMetrics.get(0).get(7));
assertEquals(metrics.getMaximumRejectedJobs(), resMetrics.get(0).get(8));
assertEquals(metrics.getCurrentRejectedJobs(), resMetrics.get(0).get(9));
assertEquals(metrics.getAverageRejectedJobs(), resMetrics.get(0).get(10));
assertEquals(metrics.getTotalRejectedJobs(), resMetrics.get(0).get(11));
assertEquals(metrics.getMaximumCancelledJobs(), resMetrics.get(0).get(12));
assertEquals(metrics.getCurrentCancelledJobs(), resMetrics.get(0).get(13));
assertEquals(metrics.getAverageCancelledJobs(), resMetrics.get(0).get(14));
assertEquals(metrics.getTotalCancelledJobs(), resMetrics.get(0).get(15));
assertEquals(metrics.getMaximumJobWaitTime(), resMetrics.get(0).get(16));
assertEquals(metrics.getCurrentJobWaitTime(), resMetrics.get(0).get(17));
assertEquals((long) metrics.getAverageJobWaitTime(), resMetrics.get(0).get(18));
assertEquals(metrics.getMaximumJobExecuteTime(), resMetrics.get(0).get(19));
assertEquals(metrics.getCurrentJobExecuteTime(), resMetrics.get(0).get(20));
assertEquals((long) metrics.getAverageJobExecuteTime(), resMetrics.get(0).get(21));
assertEquals(metrics.getTotalJobsExecutionTime(), resMetrics.get(0).get(22));
assertEquals(metrics.getTotalExecutedJobs(), resMetrics.get(0).get(23));
assertEquals(metrics.getTotalExecutedTasks(), resMetrics.get(0).get(24));
assertEquals(metrics.getTotalBusyTime(), resMetrics.get(0).get(25));
assertEquals(metrics.getTotalIdleTime(), resMetrics.get(0).get(26));
assertEquals(metrics.getCurrentIdleTime(), resMetrics.get(0).get(27));
assertEquals(metrics.getBusyTimePercentage(), resMetrics.get(0).get(28));
assertEquals(metrics.getIdleTimePercentage(), resMetrics.get(0).get(29));
assertEquals(metrics.getTotalCpus(), resMetrics.get(0).get(30));
assertEquals(metrics.getCurrentCpuLoad(), resMetrics.get(0).get(31));
assertEquals(metrics.getAverageCpuLoad(), resMetrics.get(0).get(32));
assertEquals(metrics.getCurrentGcCpuLoad(), resMetrics.get(0).get(33));
assertEquals(metrics.getHeapMemoryInitialized(), resMetrics.get(0).get(34));
assertEquals(metrics.getHeapMemoryUsed(), resMetrics.get(0).get(35));
assertEquals(metrics.getHeapMemoryCommitted(), resMetrics.get(0).get(36));
assertEquals(metrics.getHeapMemoryMaximum(), resMetrics.get(0).get(37));
assertEquals(metrics.getHeapMemoryTotal(), resMetrics.get(0).get(38));
assertEquals(metrics.getNonHeapMemoryInitialized(), resMetrics.get(0).get(39));
assertEquals(metrics.getNonHeapMemoryUsed(), resMetrics.get(0).get(40));
assertEquals(metrics.getNonHeapMemoryCommitted(), resMetrics.get(0).get(41));
assertEquals(metrics.getNonHeapMemoryMaximum(), resMetrics.get(0).get(42));
assertEquals(metrics.getNonHeapMemoryTotal(), resMetrics.get(0).get(43));
assertEquals(metrics.getUpTime(), resMetrics.get(0).get(44));
assertEquals(metrics.getStartTime(), ((Timestamp) resMetrics.get(0).get(45)).getTime());
assertEquals(metrics.getNodeStartTime(), ((Timestamp) resMetrics.get(0).get(46)).getTime());
assertEquals(metrics.getLastDataVersion(), resMetrics.get(0).get(47));
assertEquals(metrics.getCurrentThreadCount(), resMetrics.get(0).get(48));
assertEquals(metrics.getMaximumThreadCount(), resMetrics.get(0).get(49));
assertEquals(metrics.getTotalStartedThreadCount(), resMetrics.get(0).get(50));
assertEquals(metrics.getCurrentDaemonThreadCount(), resMetrics.get(0).get(51));
assertEquals(metrics.getSentMessagesCount(), resMetrics.get(0).get(52));
assertEquals(metrics.getSentBytesCount(), resMetrics.get(0).get(53));
assertEquals(metrics.getReceivedMessagesCount(), resMetrics.get(0).get(54));
assertEquals(metrics.getReceivedBytesCount(), resMetrics.get(0).get(55));
assertEquals(metrics.getOutboundMessagesQueueSize(), resMetrics.get(0).get(56));
break;
} else {
log.info("Metrics was updated in background, will retry check");
if (i == METRICS_CHECK_ATTEMPTS - 1)
fail("Failed to check metrics, attempts limit reached (" + METRICS_CHECK_ATTEMPTS + ')');
}
}
}
}
use of org.apache.ignite.cluster.ClusterMetrics in project ignite by apache.
the class ClusterMetricsSnapshotSerializeSelfTest method testMetricsSize.
/**
*/
@Test
public void testMetricsSize() {
byte[] data = new byte[ClusterMetricsSnapshot.METRICS_SIZE];
// Test serialization.
int off = ClusterMetricsSnapshot.serialize(data, 0, createMetrics());
assert off == ClusterMetricsSnapshot.METRICS_SIZE;
// Test deserialization.
ClusterMetrics res = ClusterMetricsSnapshot.deserialize(data, 0);
assert res != null;
}
use of org.apache.ignite.cluster.ClusterMetrics in project ignite by apache.
the class IgniteKernal method start.
/**
* @param cfg Configuration to use.
* @param utilityCachePool Utility cache pool.
* @param execSvc Executor service.
* @param sysExecSvc System executor service.
* @param stripedExecSvc Striped executor.
* @param p2pExecSvc P2P executor service.
* @param mgmtExecSvc Management executor service.
* @param igfsExecSvc IGFS executor service.
* @param dataStreamExecSvc data stream executor service.
* @param restExecSvc Reset executor service.
* @param affExecSvc Affinity executor service.
* @param idxExecSvc Indexing executor service.
* @param callbackExecSvc Callback executor service.
* @param qryExecSvc Query executor service.
* @param schemaExecSvc Schema executor service.
* @param customExecSvcs Custom named executors.
* @param errHnd Error handler to use for notification about startup problems.
* @throws IgniteCheckedException Thrown in case of any errors.
*/
@SuppressWarnings({ "CatchGenericClass", "unchecked" })
public void start(final IgniteConfiguration cfg, ExecutorService utilityCachePool, final ExecutorService execSvc, final ExecutorService svcExecSvc, final ExecutorService sysExecSvc, final StripedExecutor stripedExecSvc, ExecutorService p2pExecSvc, ExecutorService mgmtExecSvc, ExecutorService igfsExecSvc, StripedExecutor dataStreamExecSvc, ExecutorService restExecSvc, ExecutorService affExecSvc, @Nullable ExecutorService idxExecSvc, IgniteStripedThreadPoolExecutor callbackExecSvc, ExecutorService qryExecSvc, ExecutorService schemaExecSvc, @Nullable final Map<String, ? extends ExecutorService> customExecSvcs, GridAbsClosure errHnd) throws IgniteCheckedException {
gw.compareAndSet(null, new GridKernalGatewayImpl(cfg.getIgniteInstanceName()));
GridKernalGateway gw = this.gw.get();
gw.writeLock();
try {
switch(gw.getState()) {
case STARTED:
{
U.warn(log, "Grid has already been started (ignored).");
return;
}
case STARTING:
{
U.warn(log, "Grid is already in process of being started (ignored).");
return;
}
case STOPPING:
{
throw new IgniteCheckedException("Grid is in process of being stopped");
}
case STOPPED:
{
break;
}
}
gw.setState(STARTING);
} finally {
gw.writeUnlock();
}
assert cfg != null;
// Make sure we got proper configuration.
validateCommon(cfg);
igniteInstanceName = cfg.getIgniteInstanceName();
this.cfg = cfg;
log = (GridLoggerProxy) cfg.getGridLogger().getLogger(getClass().getName() + (igniteInstanceName != null ? '%' + igniteInstanceName : ""));
RuntimeMXBean rtBean = ManagementFactory.getRuntimeMXBean();
// Ack various information.
ackAsciiLogo();
ackConfigUrl();
ackConfiguration(cfg);
ackDaemon();
ackOsInfo();
ackLanguageRuntime();
ackRemoteManagement();
ackLogger();
ackVmArguments(rtBean);
ackClassPaths(rtBean);
ackSystemProperties();
ackEnvironmentVariables();
ackMemoryConfiguration();
ackCacheConfiguration();
ackP2pConfiguration();
ackRebalanceConfiguration();
// Run background network diagnostics.
GridDiagnostic.runBackgroundCheck(igniteInstanceName, execSvc, log);
// Ack 3-rd party licenses location.
if (log.isInfoEnabled() && cfg.getIgniteHome() != null)
log.info("3-rd party licenses can be found at: " + cfg.getIgniteHome() + File.separatorChar + "libs" + File.separatorChar + "licenses");
// with internally reserved names.
for (String name : cfg.getUserAttributes().keySet()) if (name.startsWith(ATTR_PREFIX))
throw new IgniteCheckedException("User attribute has illegal name: '" + name + "'. Note that all names " + "starting with '" + ATTR_PREFIX + "' are reserved for internal use.");
// Ack local node user attributes.
logNodeUserAttributes();
// Ack configuration.
ackSpis();
List<PluginProvider> plugins = U.allPluginProviders();
// Spin out SPIs & managers.
try {
ctx = new GridKernalContextImpl(log, this, cfg, gw, utilityCachePool, execSvc, svcExecSvc, sysExecSvc, stripedExecSvc, p2pExecSvc, mgmtExecSvc, igfsExecSvc, dataStreamExecSvc, restExecSvc, affExecSvc, idxExecSvc, callbackExecSvc, qryExecSvc, schemaExecSvc, customExecSvcs, plugins, classNameFilter());
cfg.getMarshaller().setContext(ctx.marshallerContext());
GridInternalSubscriptionProcessor subscriptionProc = new GridInternalSubscriptionProcessor(ctx);
startProcessor(subscriptionProc);
ClusterProcessor clusterProc = new ClusterProcessor(ctx);
startProcessor(clusterProc);
U.onGridStart();
// Start and configure resource processor first as it contains resources used
// by all other managers and processors.
GridResourceProcessor rsrcProc = new GridResourceProcessor(ctx);
rsrcProc.setSpringContext(rsrcCtx);
scheduler = new IgniteSchedulerImpl(ctx);
startProcessor(rsrcProc);
// Inject resources into lifecycle beans.
if (!cfg.isDaemon() && cfg.getLifecycleBeans() != null) {
for (LifecycleBean bean : cfg.getLifecycleBeans()) {
if (bean != null)
rsrcProc.inject(bean);
}
}
// Lifecycle notification.
notifyLifecycleBeans(BEFORE_NODE_START);
// Starts lifecycle aware components.
U.startLifecycleAware(lifecycleAwares(cfg));
addHelper(IGFS_HELPER.create(F.isEmpty(cfg.getFileSystemConfiguration())));
addHelper(HADOOP_HELPER.createIfInClassPath(ctx, false));
startProcessor(new IgnitePluginProcessor(ctx, cfg, plugins));
startProcessor(new PoolProcessor(ctx));
// Closure processor should be started before all others
// (except for resource processor), as many components can depend on it.
startProcessor(new GridClosureProcessor(ctx));
// Start some other processors (order & place is important).
startProcessor(new GridPortProcessor(ctx));
startProcessor(new GridJobMetricsProcessor(ctx));
// Timeout processor needs to be started before managers,
// as managers may depend on it.
startProcessor(new GridTimeoutProcessor(ctx));
// Start security processors.
startProcessor(createComponent(GridSecurityProcessor.class, ctx));
// Start SPI managers.
// NOTE: that order matters as there are dependencies between managers.
startManager(new GridIoManager(ctx));
startManager(new GridCheckpointManager(ctx));
startManager(new GridEventStorageManager(ctx));
startManager(new GridDeploymentManager(ctx));
startManager(new GridLoadBalancerManager(ctx));
startManager(new GridFailoverManager(ctx));
startManager(new GridCollisionManager(ctx));
startManager(new GridIndexingManager(ctx));
ackSecurity();
// Assign discovery manager to context before other processors start so they
// are able to register custom event listener.
final GridManager discoMgr = new GridDiscoveryManager(ctx);
ctx.add(discoMgr, false);
// be able to start receiving messages once discovery completes.
try {
startProcessor(new PdsConsistentIdProcessor(ctx));
startProcessor(createComponent(DiscoveryNodeValidationProcessor.class, ctx));
startProcessor(new GridAffinityProcessor(ctx));
startProcessor(createComponent(GridSegmentationProcessor.class, ctx));
startProcessor(createComponent(IgniteCacheObjectProcessor.class, ctx));
startProcessor(createComponent(IGridClusterStateProcessor.class, ctx));
startProcessor(new IgniteAuthenticationProcessor(ctx));
startProcessor(new GridCacheProcessor(ctx));
startProcessor(new GridQueryProcessor(ctx));
startProcessor(new ClientListenerProcessor(ctx));
startProcessor(new GridServiceProcessor(ctx));
startProcessor(new GridTaskSessionProcessor(ctx));
startProcessor(new GridJobProcessor(ctx));
startProcessor(new GridTaskProcessor(ctx));
startProcessor((GridProcessor) SCHEDULE.createOptional(ctx));
startProcessor(new GridRestProcessor(ctx));
startProcessor(new DataStreamProcessor(ctx));
startProcessor((GridProcessor) IGFS.create(ctx, F.isEmpty(cfg.getFileSystemConfiguration())));
startProcessor(new GridContinuousProcessor(ctx));
startProcessor(createHadoopComponent());
startProcessor(new DataStructuresProcessor(ctx));
startProcessor(createComponent(PlatformProcessor.class, ctx));
startProcessor(new GridMarshallerMappingProcessor(ctx));
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) {
ctx.add(new GridPluginComponent(provider));
provider.start(ctx.plugins().pluginContextForProvider(provider));
}
// Start platform plugins.
if (ctx.config().getPlatformConfiguration() != null)
startProcessor(new PlatformPluginProcessor(ctx));
ctx.cluster().initDiagnosticListeners();
fillNodeAttributes(clusterProc.updateNotifierEnabled());
} catch (Throwable e) {
U.error(log, "Exception during start processors, node will be stopped and close connections", e);
// Stop discovery spi to close tcp socket.
ctx.discovery().stop(true);
throw e;
}
gw.writeLock();
try {
gw.setState(STARTED);
// Start discovery manager last to make sure that grid is fully initialized.
startManager(discoMgr);
} finally {
gw.writeUnlock();
}
// Check whether physical RAM is not exceeded.
checkPhysicalRam();
// Suggest configuration optimizations.
suggestOptimizations(cfg);
// Suggest JVM optimizations.
ctx.performance().addAll(JvmConfigurationSuggestions.getSuggestions());
// Suggest Operation System optimizations.
ctx.performance().addAll(OsConfigurationSuggestions.getSuggestions());
DiscoveryLocalJoinData joinData = ctx.discovery().localJoin();
IgniteInternalFuture<Boolean> transitionWaitFut = joinData.transitionWaitFuture();
boolean active;
if (transitionWaitFut != null) {
if (log.isInfoEnabled()) {
log.info("Join cluster while cluster state transition is in progress, " + "waiting when transition finish.");
}
active = transitionWaitFut.get();
} else
active = joinData.active();
// Notify discovery manager the first to make sure that topology is discovered.
ctx.discovery().onKernalStart(active);
// Notify IO manager the second so further components can send and receive messages.
ctx.io().onKernalStart(active);
boolean recon = false;
// Callbacks.
for (GridComponent comp : ctx) {
// Skip discovery manager.
if (comp instanceof GridDiscoveryManager)
continue;
// Skip IO manager.
if (comp instanceof GridIoManager)
continue;
if (comp instanceof GridPluginComponent)
continue;
if (!skipDaemon(comp)) {
try {
comp.onKernalStart(active);
} catch (IgniteNeedReconnectException e) {
assert ctx.discovery().reconnectSupported();
if (log.isDebugEnabled())
log.debug("Failed to start node components on node start, will wait for reconnect: " + e);
recon = true;
}
}
}
// Start plugins.
for (PluginProvider provider : ctx.plugins().allProviders()) provider.onIgniteStart();
if (recon)
reconnectState.waitFirstReconnect();
// Register MBeans.
mBeansMgr.registerAllMBeans(utilityCachePool, execSvc, svcExecSvc, sysExecSvc, stripedExecSvc, p2pExecSvc, mgmtExecSvc, igfsExecSvc, dataStreamExecSvc, restExecSvc, affExecSvc, idxExecSvc, callbackExecSvc, qryExecSvc, schemaExecSvc, customExecSvcs);
// Lifecycle bean notifications.
notifyLifecycleBeans(AFTER_NODE_START);
} catch (Throwable e) {
IgniteSpiVersionCheckException verCheckErr = X.cause(e, IgniteSpiVersionCheckException.class);
if (verCheckErr != null)
U.error(log, verCheckErr.getMessage());
else if (X.hasCause(e, InterruptedException.class, IgniteInterruptedCheckedException.class))
U.warn(log, "Grid startup routine has been interrupted (will rollback).");
else
U.error(log, "Got exception while starting (will rollback startup routine).", e);
errHnd.apply();
stop(true);
if (e instanceof Error)
throw e;
else if (e instanceof IgniteCheckedException)
throw (IgniteCheckedException) e;
else
throw new IgniteCheckedException(e);
}
// Mark start timestamp.
startTime = U.currentTimeMillis();
String intervalStr = IgniteSystemProperties.getString(IGNITE_STARVATION_CHECK_INTERVAL);
// Start starvation checker if enabled.
boolean starveCheck = !isDaemon() && !"0".equals(intervalStr);
if (starveCheck) {
final long interval = F.isEmpty(intervalStr) ? PERIODIC_STARVATION_CHECK_FREQ : Long.parseLong(intervalStr);
starveTask = ctx.timeout().schedule(new Runnable() {
/**
* Last completed task count.
*/
private long lastCompletedCntPub;
/**
* Last completed task count.
*/
private long lastCompletedCntSys;
@Override
public void run() {
if (execSvc instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) execSvc;
lastCompletedCntPub = checkPoolStarvation(exec, lastCompletedCntPub, "public");
}
if (sysExecSvc instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) sysExecSvc;
lastCompletedCntSys = checkPoolStarvation(exec, lastCompletedCntSys, "system");
}
if (stripedExecSvc != null)
stripedExecSvc.checkStarvation();
}
/**
* @param exec Thread pool executor to check.
* @param lastCompletedCnt Last completed tasks count.
* @param pool Pool name for message.
* @return Current completed tasks count.
*/
private long checkPoolStarvation(ThreadPoolExecutor exec, long lastCompletedCnt, String pool) {
long completedCnt = exec.getCompletedTaskCount();
// at least one waiting request, then it is possible starvation.
if (exec.getPoolSize() == exec.getActiveCount() && completedCnt == lastCompletedCnt && !exec.getQueue().isEmpty())
LT.warn(log, "Possible thread pool starvation detected (no task completed in last " + interval + "ms, is " + pool + " thread pool size large enough?)");
return completedCnt;
}
}, interval, interval);
}
long metricsLogFreq = cfg.getMetricsLogFrequency();
if (metricsLogFreq > 0) {
metricsLogTask = ctx.timeout().schedule(new Runnable() {
private final DecimalFormat dblFmt = new DecimalFormat("#.##");
@Override
public void run() {
if (log.isInfoEnabled()) {
try {
ClusterMetrics m = cluster().localNode().metrics();
double cpuLoadPct = m.getCurrentCpuLoad() * 100;
double avgCpuLoadPct = m.getAverageCpuLoad() * 100;
double gcPct = m.getCurrentGcCpuLoad() * 100;
// Heap params
long heapUsed = m.getHeapMemoryUsed();
long heapMax = m.getHeapMemoryMaximum();
long heapUsedInMBytes = heapUsed / 1024 / 1024;
long heapCommInMBytes = m.getHeapMemoryCommitted() / 1024 / 1024;
double freeHeapPct = heapMax > 0 ? ((double) ((heapMax - heapUsed) * 100)) / heapMax : -1;
// Non heap params
long nonHeapUsed = m.getNonHeapMemoryUsed();
long nonHeapMax = m.getNonHeapMemoryMaximum();
long nonHeapUsedInMBytes = nonHeapUsed / 1024 / 1024;
long nonHeapCommInMBytes = m.getNonHeapMemoryCommitted() / 1024 / 1024;
double freeNonHeapPct = nonHeapMax > 0 ? ((double) ((nonHeapMax - nonHeapUsed) * 100)) / nonHeapMax : -1;
int hosts = 0;
int nodes = 0;
int cpus = 0;
try {
ClusterMetrics metrics = cluster().metrics();
Collection<ClusterNode> nodes0 = cluster().nodes();
hosts = U.neighborhood(nodes0).size();
nodes = metrics.getTotalNodes();
cpus = metrics.getTotalCpus();
} catch (IgniteException ignore) {
// No-op.
}
int loadedPages = 0;
Collection<DataRegion> policies = ctx.cache().context().database().dataRegions();
if (!F.isEmpty(policies)) {
for (DataRegion memPlc : policies) loadedPages += memPlc.pageMemory().loadedPages();
}
String id = U.id8(localNode().id());
String msg = NL + "Metrics for local node (to disable set 'metricsLogFrequency' to 0)" + NL + " ^-- Node [id=" + id + (name() != null ? ", name=" + name() : "") + ", uptime=" + getUpTimeFormatted() + "]" + NL + " ^-- H/N/C [hosts=" + hosts + ", nodes=" + nodes + ", CPUs=" + cpus + "]" + NL + " ^-- CPU [cur=" + dblFmt.format(cpuLoadPct) + "%, avg=" + dblFmt.format(avgCpuLoadPct) + "%, GC=" + dblFmt.format(gcPct) + "%]" + NL + " ^-- PageMemory [pages=" + loadedPages + "]" + NL + " ^-- Heap [used=" + dblFmt.format(heapUsedInMBytes) + "MB, free=" + dblFmt.format(freeHeapPct) + "%, comm=" + dblFmt.format(heapCommInMBytes) + "MB]" + NL + " ^-- Non heap [used=" + dblFmt.format(nonHeapUsedInMBytes) + "MB, free=" + dblFmt.format(freeNonHeapPct) + "%, comm=" + dblFmt.format(nonHeapCommInMBytes) + "MB]" + NL + " ^-- Outbound messages queue [size=" + m.getOutboundMessagesQueueSize() + "]" + NL + " ^-- " + createExecutorDescription("Public thread pool", execSvc) + NL + " ^-- " + createExecutorDescription("System thread pool", sysExecSvc);
if (customExecSvcs != null) {
StringBuilder customSvcsMsg = new StringBuilder();
for (Map.Entry<String, ? extends ExecutorService> entry : customExecSvcs.entrySet()) {
customSvcsMsg.append(NL).append(" ^-- ").append(createExecutorDescription(entry.getKey(), entry.getValue()));
}
msg = msg + customSvcsMsg;
}
if (log.isInfoEnabled())
log.info(msg);
ctx.cache().context().database().dumpStatistics(log);
} catch (IgniteClientDisconnectedException ignore) {
// No-op.
}
}
}
}, metricsLogFreq, metricsLogFreq);
}
final long longOpDumpTimeout = IgniteSystemProperties.getLong(IgniteSystemProperties.IGNITE_LONG_OPERATIONS_DUMP_TIMEOUT, 60_000);
if (longOpDumpTimeout > 0) {
longOpDumpTask = ctx.timeout().schedule(new Runnable() {
@Override
public void run() {
GridKernalContext ctx = IgniteKernal.this.ctx;
if (ctx != null)
ctx.cache().context().exchange().dumpLongRunningOperations(longOpDumpTimeout);
}
}, longOpDumpTimeout, longOpDumpTimeout);
}
ctx.performance().add("Disable assertions (remove '-ea' from JVM options)", !U.assertionsEnabled());
ctx.performance().logSuggestions(log, igniteInstanceName);
U.quietAndInfo(log, "To start Console Management & Monitoring run ignitevisorcmd.{sh|bat}");
ackStart(rtBean);
if (!isDaemon())
ctx.discovery().ackTopology(localNode().order());
}
use of org.apache.ignite.cluster.ClusterMetrics in project ignite by apache.
the class TcpDiscoveryNode method writeExternal.
/**
* {@inheritDoc}
*/
@Override
public void writeExternal(ObjectOutput out) throws IOException {
U.writeUuid(out, id);
U.writeMap(out, attrs);
U.writeCollection(out, addrs);
U.writeCollection(out, hostNames);
out.writeInt(discPort);
// Cluster metrics
byte[] mtr = null;
ClusterMetrics metrics = this.metrics;
if (metrics != null)
mtr = ClusterMetricsSnapshot.serialize(metrics);
U.writeByteArray(out, mtr);
// Legacy: Number of cache metrics
out.writeInt(0);
out.writeLong(order);
out.writeLong(intOrder);
out.writeObject(ver);
U.writeUuid(out, clientRouterNodeId);
}
Aggregations