Search in sources :

Example 1 with ContainerStatsEvent

use of com.datatorrent.stram.api.ContainerEvent.ContainerStatsEvent in project apex-core by apache.

the class StreamingContainer method heartbeatLoop.

public void heartbeatLoop() throws Exception {
    logger.debug("Entering heartbeat loop (interval is {} ms)", this.heartbeatIntervalMillis);
    umbilical.log(containerId, "[" + containerId + "] Entering heartbeat loop..");
    final YarnConfiguration conf = new YarnConfiguration();
    if (UserGroupInformation.isSecurityEnabled()) {
        tokenRenewer = new TokenRenewer(containerContext, false, conf, containerId);
    }
    String stackTrace = null;
    while (!exitHeartbeatLoop) {
        if (tokenRenewer != null) {
            tokenRenewer.checkAndRenew();
        }
        synchronized (this.heartbeatTrigger) {
            try {
                this.heartbeatTrigger.wait(heartbeatIntervalMillis);
            } catch (InterruptedException e1) {
                logger.warn("Interrupted in heartbeat loop, exiting..");
                break;
            }
        }
        long currentTime = System.currentTimeMillis();
        ContainerHeartbeat msg = new ContainerHeartbeat();
        msg.jvmName = jvmName;
        if (this.bufferServerAddress != null) {
            msg.bufferServerHost = this.bufferServerAddress.getHostName();
            msg.bufferServerPort = this.bufferServerAddress.getPort();
            if (bufferServer != null && !eventloop.isActive()) {
                logger.warn("Requesting restart due to terminated event loop");
                msg.restartRequested = true;
            }
        }
        msg.memoryMBFree = ((int) (Runtime.getRuntime().freeMemory() / (1024 * 1024)));
        garbageCollectorMXBeans = ManagementFactory.getGarbageCollectorMXBeans();
        for (GarbageCollectorMXBean bean : garbageCollectorMXBeans) {
            msg.gcCollectionTime += bean.getCollectionTime();
            msg.gcCollectionCount += bean.getCollectionCount();
        }
        ContainerHeartbeatResponse rsp;
        do {
            ContainerStats stats = new ContainerStats(containerId);
            // gather heartbeat info for all operators
            for (Map.Entry<Integer, Node<?>> e : nodes.entrySet()) {
                OperatorHeartbeat hb = new OperatorHeartbeat();
                hb.setNodeId(e.getKey());
                hb.setGeneratedTms(currentTime);
                hb.setIntervalMs(heartbeatIntervalMillis);
                if (e.getValue().commandResponse.size() > 0) {
                    BlockingQueue<StatsListener.OperatorResponse> commandResponse = e.getValue().commandResponse;
                    ArrayList<StatsListener.OperatorResponse> response = new ArrayList<>();
                    for (int i = 0; i < commandResponse.size(); i++) {
                        response.add(commandResponse.poll());
                    }
                    hb.requestResponse = response;
                }
                OperatorContext context = e.getValue().context;
                context.drainStats(hb.getOperatorStatsContainer());
                if (context.getThread() == null || context.getThread().getState() != Thread.State.TERMINATED) {
                    hb.setState(DeployState.ACTIVE);
                } else if (failedNodes.contains(hb.nodeId)) {
                    hb.setState(DeployState.FAILED);
                } else {
                    logger.debug("Reporting SHUTDOWN state because thread is {} and failedNodes is {}", context.getThread(), failedNodes);
                    hb.setState(DeployState.SHUTDOWN);
                }
                stats.addNodeStats(hb);
            }
            /**
             * Container stats published for whoever is interested in listening.
             * Currently interested candidates are TupleRecorderCollection and BufferServerStatsSubscriber
             */
            eventBus.publish(new ContainerStatsEvent(stats));
            msg.setContainerStats(stats);
            // heartbeat call and follow-up processing
            // logger.debug("Sending heartbeat for {} operators.", msg.getContainerStats().size());
            msg.sentTms = System.currentTimeMillis();
            msg.stackTrace = stackTrace;
            rsp = umbilical.processHeartbeat(msg);
            if (rsp.stackTraceRequired) {
                stackTrace = StramUtils.getStackTrace().toString();
            } else {
                stackTrace = null;
            }
            processHeartbeatResponse(rsp);
            if (rsp.hasPendingRequests) {
                logger.info("Waiting for pending request.");
                synchronized (this.heartbeatTrigger) {
                    try {
                        this.heartbeatTrigger.wait(500);
                    } catch (InterruptedException ie) {
                        logger.warn("Interrupted in heartbeat loop", ie);
                        break;
                    }
                }
            }
        } while (rsp.hasPendingRequests);
    }
    logger.debug("[{}] Exiting heartbeat loop", containerId);
    umbilical.log(containerId, "[" + containerId + "] Exiting heartbeat loop..");
}
Also used : ContainerStats(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats) OperatorHeartbeat(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat) ContainerStatsEvent(com.datatorrent.stram.api.ContainerEvent.ContainerStatsEvent) GarbageCollectorMXBean(java.lang.management.GarbageCollectorMXBean) ArrayList(java.util.ArrayList) ContainerHeartbeatResponse(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse) Checkpoint(com.datatorrent.stram.api.Checkpoint) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TokenRenewer(org.apache.apex.engine.security.TokenRenewer) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ContainerHeartbeat(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeat)

Aggregations

Checkpoint (com.datatorrent.stram.api.Checkpoint)1 ContainerStatsEvent (com.datatorrent.stram.api.ContainerEvent.ContainerStatsEvent)1 ContainerHeartbeat (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeat)1 ContainerHeartbeatResponse (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse)1 ContainerStats (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats)1 OperatorHeartbeat (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat)1 GarbageCollectorMXBean (java.lang.management.GarbageCollectorMXBean)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 TokenRenewer (org.apache.apex.engine.security.TokenRenewer)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1