use of com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats in project apex-core by apache.
the class StreamingContainer method heartbeatLoop.
public void heartbeatLoop() throws Exception {
logger.debug("Entering heartbeat loop (interval is {} ms)", this.heartbeatIntervalMillis);
umbilical.log(containerId, "[" + containerId + "] Entering heartbeat loop..");
final YarnConfiguration conf = new YarnConfiguration();
if (UserGroupInformation.isSecurityEnabled()) {
tokenRenewer = new TokenRenewer(containerContext, false, conf, containerId);
}
String stackTrace = null;
while (!exitHeartbeatLoop) {
if (tokenRenewer != null) {
tokenRenewer.checkAndRenew();
}
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(heartbeatIntervalMillis);
} catch (InterruptedException e1) {
logger.warn("Interrupted in heartbeat loop, exiting..");
break;
}
}
long currentTime = System.currentTimeMillis();
ContainerHeartbeat msg = new ContainerHeartbeat();
msg.jvmName = jvmName;
if (this.bufferServerAddress != null) {
msg.bufferServerHost = this.bufferServerAddress.getHostName();
msg.bufferServerPort = this.bufferServerAddress.getPort();
if (bufferServer != null && !eventloop.isActive()) {
logger.warn("Requesting restart due to terminated event loop");
msg.restartRequested = true;
}
}
msg.memoryMBFree = ((int) (Runtime.getRuntime().freeMemory() / (1024 * 1024)));
garbageCollectorMXBeans = ManagementFactory.getGarbageCollectorMXBeans();
for (GarbageCollectorMXBean bean : garbageCollectorMXBeans) {
msg.gcCollectionTime += bean.getCollectionTime();
msg.gcCollectionCount += bean.getCollectionCount();
}
ContainerHeartbeatResponse rsp;
do {
ContainerStats stats = new ContainerStats(containerId);
// gather heartbeat info for all operators
for (Map.Entry<Integer, Node<?>> e : nodes.entrySet()) {
OperatorHeartbeat hb = new OperatorHeartbeat();
hb.setNodeId(e.getKey());
hb.setGeneratedTms(currentTime);
hb.setIntervalMs(heartbeatIntervalMillis);
if (e.getValue().commandResponse.size() > 0) {
BlockingQueue<StatsListener.OperatorResponse> commandResponse = e.getValue().commandResponse;
ArrayList<StatsListener.OperatorResponse> response = new ArrayList<>();
for (int i = 0; i < commandResponse.size(); i++) {
response.add(commandResponse.poll());
}
hb.requestResponse = response;
}
OperatorContext context = e.getValue().context;
context.drainStats(hb.getOperatorStatsContainer());
if (context.getThread() == null || context.getThread().getState() != Thread.State.TERMINATED) {
hb.setState(DeployState.ACTIVE);
} else if (failedNodes.contains(hb.nodeId)) {
hb.setState(DeployState.FAILED);
} else {
logger.debug("Reporting SHUTDOWN state because thread is {} and failedNodes is {}", context.getThread(), failedNodes);
hb.setState(DeployState.SHUTDOWN);
}
stats.addNodeStats(hb);
}
/**
* Container stats published for whoever is interested in listening.
* Currently interested candidates are TupleRecorderCollection and BufferServerStatsSubscriber
*/
eventBus.publish(new ContainerStatsEvent(stats));
msg.setContainerStats(stats);
// heartbeat call and follow-up processing
// logger.debug("Sending heartbeat for {} operators.", msg.getContainerStats().size());
msg.sentTms = System.currentTimeMillis();
msg.stackTrace = stackTrace;
rsp = umbilical.processHeartbeat(msg);
if (rsp.stackTraceRequired) {
stackTrace = StramUtils.getStackTrace().toString();
} else {
stackTrace = null;
}
processHeartbeatResponse(rsp);
if (rsp.hasPendingRequests) {
logger.info("Waiting for pending request.");
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(500);
} catch (InterruptedException ie) {
logger.warn("Interrupted in heartbeat loop", ie);
break;
}
}
}
} while (rsp.hasPendingRequests);
}
logger.debug("[{}] Exiting heartbeat loop", containerId);
umbilical.log(containerId, "[" + containerId + "] Exiting heartbeat loop..");
}
Aggregations