use of com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat in project apex-core by apache.
the class StreamingContainer method heartbeatLoop.
public void heartbeatLoop() throws Exception {
logger.debug("Entering heartbeat loop (interval is {} ms)", this.heartbeatIntervalMillis);
umbilical.log(containerId, "[" + containerId + "] Entering heartbeat loop..");
final YarnConfiguration conf = new YarnConfiguration();
long tokenLifeTime = (long) (containerContext.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * containerContext.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME));
long expiryTime = System.currentTimeMillis();
final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
String stackTrace = null;
Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
while (iter.hasNext()) {
Token<?> token = iter.next();
logger.debug("token: {}", token);
}
String principal = containerContext.getValue(LogicalPlan.PRINCIPAL);
String hdfsKeyTabFile = containerContext.getValue(LogicalPlan.KEY_TAB_FILE);
while (!exitHeartbeatLoop) {
if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) {
expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, FileUtils.getTempDirectoryPath(), containerId, conf, principal, hdfsKeyTabFile, credentials, null, false);
}
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(heartbeatIntervalMillis);
} catch (InterruptedException e1) {
logger.warn("Interrupted in heartbeat loop, exiting..");
break;
}
}
long currentTime = System.currentTimeMillis();
ContainerHeartbeat msg = new ContainerHeartbeat();
msg.jvmName = jvmName;
if (this.bufferServerAddress != null) {
msg.bufferServerHost = this.bufferServerAddress.getHostName();
msg.bufferServerPort = this.bufferServerAddress.getPort();
if (bufferServer != null && !eventloop.isActive()) {
logger.warn("Requesting restart due to terminated event loop");
msg.restartRequested = true;
}
}
msg.memoryMBFree = ((int) (Runtime.getRuntime().freeMemory() / (1024 * 1024)));
garbageCollectorMXBeans = ManagementFactory.getGarbageCollectorMXBeans();
for (GarbageCollectorMXBean bean : garbageCollectorMXBeans) {
msg.gcCollectionTime += bean.getCollectionTime();
msg.gcCollectionCount += bean.getCollectionCount();
}
ContainerHeartbeatResponse rsp;
do {
ContainerStats stats = new ContainerStats(containerId);
// gather heartbeat info for all operators
for (Map.Entry<Integer, Node<?>> e : nodes.entrySet()) {
OperatorHeartbeat hb = new OperatorHeartbeat();
hb.setNodeId(e.getKey());
hb.setGeneratedTms(currentTime);
hb.setIntervalMs(heartbeatIntervalMillis);
if (e.getValue().commandResponse.size() > 0) {
BlockingQueue<StatsListener.OperatorResponse> commandResponse = e.getValue().commandResponse;
ArrayList<StatsListener.OperatorResponse> response = new ArrayList<>();
for (int i = 0; i < commandResponse.size(); i++) {
response.add(commandResponse.poll());
}
hb.requestResponse = response;
}
OperatorContext context = e.getValue().context;
context.drainStats(hb.getOperatorStatsContainer());
if (context.getThread() == null || context.getThread().getState() != Thread.State.TERMINATED) {
hb.setState(DeployState.ACTIVE);
} else if (failedNodes.contains(hb.nodeId)) {
hb.setState(DeployState.FAILED);
} else {
logger.debug("Reporting SHUTDOWN state because thread is {} and failedNodes is {}", context.getThread(), failedNodes);
hb.setState(DeployState.SHUTDOWN);
}
stats.addNodeStats(hb);
}
/**
* Container stats published for whoever is interested in listening.
* Currently interested candidates are TupleRecorderCollection and BufferServerStatsSubscriber
*/
eventBus.publish(new ContainerStatsEvent(stats));
msg.setContainerStats(stats);
// heartbeat call and follow-up processing
//logger.debug("Sending heartbeat for {} operators.", msg.getContainerStats().size());
msg.sentTms = System.currentTimeMillis();
msg.stackTrace = stackTrace;
rsp = umbilical.processHeartbeat(msg);
if (rsp.stackTraceRequired) {
stackTrace = StramUtils.getStackTrace().toString();
} else {
stackTrace = null;
}
processHeartbeatResponse(rsp);
if (rsp.hasPendingRequests) {
logger.info("Waiting for pending request.");
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(500);
} catch (InterruptedException ie) {
logger.warn("Interrupted in heartbeat loop", ie);
break;
}
}
}
} while (rsp.hasPendingRequests);
}
logger.debug("[{}] Exiting heartbeat loop", containerId);
umbilical.log(containerId, "[" + containerId + "] Exiting heartbeat loop..");
}
use of com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat in project apex-core by apache.
the class StreamingContainerManagerTest method testProcessHeartbeat.
@Test
public void testProcessHeartbeat() throws Exception {
TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
dag.setOperatorAttribute(o1, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] { new PartitioningTest.PartitionLoadWatch() }));
dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
StreamingContainerManager scm = new StreamingContainerManager(dag);
PhysicalPlan plan = scm.getPhysicalPlan();
Assert.assertEquals("number required containers", 1, plan.getContainers().size());
PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
// assign container
String containerId = "container1";
StreamingContainerAgent sca = scm.assignContainer(new ContainerResource(0, containerId, "localhost", 512, 0, null), InetSocketAddress.createUnresolved("localhost", 0));
Assert.assertNotNull(sca);
Assert.assertEquals(PTContainer.State.ALLOCATED, o1p1.getContainer().getState());
Assert.assertEquals(PTOperator.State.PENDING_DEPLOY, o1p1.getState());
ContainerStats cstats = new ContainerStats(containerId);
ContainerHeartbeat hb = new ContainerHeartbeat();
hb.setContainerStats(cstats);
// get deploy request
ContainerHeartbeatResponse chr = scm.processHeartbeat(hb);
Assert.assertNotNull(chr.deployRequest);
Assert.assertEquals("" + chr.deployRequest, 1, chr.deployRequest.size());
Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
Assert.assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());
// first operator heartbeat
OperatorHeartbeat ohb = new OperatorHeartbeat();
ohb.setNodeId(o1p1.getId());
ohb.setState(OperatorHeartbeat.DeployState.ACTIVE);
OperatorStats stats = new OperatorStats();
stats.checkpoint = new Checkpoint(2, 0, 0);
stats.windowId = 3;
stats.outputPorts = Lists.newArrayList();
PortStats ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT);
ps.bufferServerBytes = 101;
ps.tupleCount = 1;
stats.outputPorts.add(ps);
ohb.windowStats = Lists.newArrayList(stats);
cstats.operators.add(ohb);
// activate operator
scm.processHeartbeat(hb);
Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
Assert.assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState());
Assert.assertEquals("tuples " + o1p1, 1, o1p1.stats.totalTuplesEmitted.get());
Assert.assertEquals("tuples " + o1p1, 0, o1p1.stats.totalTuplesProcessed.get());
Assert.assertEquals("window " + o1p1, 3, o1p1.stats.currentWindowId.get());
Assert.assertEquals("port stats", 1, o1p1.stats.outputPortStatusList.size());
PortStatus o1p1ps = o1p1.stats.outputPortStatusList.get(TestGeneratorInputOperator.OUTPUT_PORT);
Assert.assertNotNull("port stats", o1p1ps);
Assert.assertEquals("port stats", 1, o1p1ps.totalTuples);
// second operator heartbeat
stats = new OperatorStats();
stats.checkpoint = new Checkpoint(2, 0, 0);
stats.windowId = 4;
stats.outputPorts = Lists.newArrayList();
ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT);
ps.bufferServerBytes = 1;
ps.tupleCount = 1;
stats.outputPorts.add(ps);
ohb.windowStats = Lists.newArrayList(stats);
cstats.operators.clear();
cstats.operators.add(ohb);
scm.processHeartbeat(hb);
Assert.assertEquals("tuples " + o1p1, 2, o1p1.stats.totalTuplesEmitted.get());
Assert.assertEquals("window " + o1p1, 4, o1p1.stats.currentWindowId.get());
Assert.assertEquals("statsQueue " + o1p1, 2, o1p1.stats.listenerStats.size());
scm.processEvents();
Assert.assertEquals("statsQueue " + o1p1, 0, o1p1.stats.listenerStats.size());
Assert.assertEquals("lastStats " + o1p1, 2, o1p1.stats.lastWindowedStats.size());
}
Aggregations