use of com.datatorrent.stram.plan.logical.requests.CreateStreamRequest in project apex-core by apache.
the class StramRecoveryTest method testContainerManager.
/**
* Test serialization of the container manager with mock execution layer.
* @throws Exception
*/
private void testContainerManager(StorageAgent agent) throws Exception {
dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
StatsListeningOperator o1 = dag.addOperator("o1", StatsListeningOperator.class);
FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false));
StreamingContainerManager scm = StreamingContainerManager.getInstance(recoveryHandler, dag, false);
File expFile = new File(recoveryHandler.getDir(), FSRecoveryHandler.FILE_SNAPSHOT);
Assert.assertTrue("snapshot file " + expFile, expFile.exists());
PhysicalPlan plan = scm.getPhysicalPlan();
assertEquals("number required containers", 1, plan.getContainers().size());
PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
@SuppressWarnings("UnusedAssignment") MockContainer /* sneaky: the constructor does some changes to the container */
mc = new MockContainer(scm, o1p1.getContainer());
PTContainer originalContainer = o1p1.getContainer();
Assert.assertNotNull(o1p1.getContainer().bufferServerAddress);
assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());
// test restore initial snapshot + log
dag = StramTestSupport.createDAG(testMeta);
scm = StreamingContainerManager.getInstance(new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false);
dag = scm.getLogicalPlan();
plan = scm.getPhysicalPlan();
o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0);
assertEquals("post restore state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());
o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator();
assertEquals("containerId", originalContainer.getExternalId(), o1p1.getContainer().getExternalId());
assertEquals("stats listener", 1, o1p1.statsListeners.size());
// stats are not logged
assertEquals("number stats calls", 0, o1.processStatsCnt);
assertEquals("post restore 1", PTContainer.State.ALLOCATED, o1p1.getContainer().getState());
assertEquals("post restore 1", originalContainer.bufferServerAddress, o1p1.getContainer().bufferServerAddress);
StreamingContainerAgent sca = scm.getContainerAgent(originalContainer.getExternalId());
Assert.assertNotNull("allocated container restored " + originalContainer, sca);
assertEquals("memory usage allocated container", (int) OperatorContext.MEMORY_MB.defaultValue, sca.container.getAllocatedMemoryMB());
// YARN-1490 - simulate container terminated on AM recovery
scm.scheduleContainerRestart(originalContainer.getExternalId());
assertEquals("memory usage of failed container", 0, sca.container.getAllocatedMemoryMB());
Checkpoint firstCheckpoint = new Checkpoint(3, 0, 0);
mc = new MockContainer(scm, o1p1.getContainer());
checkpoint(scm, o1p1, firstCheckpoint);
mc.stats(o1p1.getId()).deployState(OperatorHeartbeat.DeployState.ACTIVE).currentWindowId(3).checkpointWindowId(3);
mc.sendHeartbeat();
assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState());
// logical plan modification triggers snapshot
CreateOperatorRequest cor = new CreateOperatorRequest();
cor.setOperatorFQCN(GenericTestOperator.class.getName());
cor.setOperatorName("o2");
CreateStreamRequest csr = new CreateStreamRequest();
csr.setSourceOperatorName("o1");
csr.setSourceOperatorPortName("outport");
csr.setSinkOperatorName("o2");
csr.setSinkOperatorPortName("inport1");
FutureTask<?> lpmf = scm.logicalPlanModification(Lists.newArrayList(cor, csr));
while (!lpmf.isDone()) {
scm.monitorHeartbeat(false);
}
// unmask exception, if any
Assert.assertNull(lpmf.get());
Assert.assertSame("dag references", dag, scm.getLogicalPlan());
assertEquals("number operators after plan modification", 2, dag.getAllOperators().size());
// set operator state triggers journal write
o1p1.setState(PTOperator.State.INACTIVE);
Checkpoint offlineCheckpoint = new Checkpoint(10, 0, 0);
// write checkpoint while AM is out,
// it needs to be picked up as part of restore
checkpoint(scm, o1p1, offlineCheckpoint);
// test restore
dag = StramTestSupport.createDAG(testMeta);
scm = StreamingContainerManager.getInstance(new FSRecoveryHandler(dag.assertAppPath(), new Configuration(false)), dag, false);
Assert.assertNotSame("dag references", dag, scm.getLogicalPlan());
assertEquals("number operators after restore", 2, scm.getLogicalPlan().getAllOperators().size());
dag = scm.getLogicalPlan();
plan = scm.getPhysicalPlan();
o1p1 = plan.getOperators(dag.getOperatorMeta("o1")).get(0);
assertEquals("post restore state " + o1p1, PTOperator.State.INACTIVE, o1p1.getState());
o1 = (StatsListeningOperator) o1p1.getOperatorMeta().getOperator();
assertEquals("stats listener", 1, o1p1.statsListeners.size());
assertEquals("number stats calls post restore", 1, o1.processStatsCnt);
assertEquals("post restore 1", PTContainer.State.ACTIVE, o1p1.getContainer().getState());
assertEquals("post restore 1", originalContainer.bufferServerAddress, o1p1.getContainer().bufferServerAddress);
// offline checkpoint detection
assertEquals("checkpoints after recovery", Lists.newArrayList(firstCheckpoint, offlineCheckpoint), o1p1.checkpoints);
}
Aggregations