use of com.datatorrent.stram.engine.OperatorContext in project apex-core by apache.
the class StramLocalClusterTest method testRecovery.
@Test
@SuppressWarnings("SleepWhileInLoop")
public void testRecovery() throws Exception {
AsyncFSStorageAgent agent = new AsyncFSStorageAgent(testMeta.getPath(), null);
agent.setSyncCheckpoint(true);
dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
TestGeneratorInputOperator node1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
// data will be added externally from test
node1.setMaxTuples(0);
GenericTestOperator node2 = dag.addOperator("o2", GenericTestOperator.class);
dag.addStream("o1o2", node1.outport, node2.inport1);
dag.validate();
dag.getAttributes().put(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 2);
final ManualScheduledExecutorService wclock = new ManualScheduledExecutorService(1);
MockComponentFactory mcf = new MockComponentFactory() {
@Override
public WindowGenerator setupWindowGenerator() {
WindowGenerator wingen = StramTestSupport.setupWindowGenerator(wclock);
wingen.setCheckpointCount(2, 0);
return wingen;
}
};
StramLocalCluster localCluster = new StramLocalCluster(dag, mcf);
localCluster.setPerContainerBufferServer(true);
// driven by test
localCluster.setHeartbeatMonitoringEnabled(false);
localCluster.runAsync();
PTOperator ptNode1 = localCluster.findByLogicalNode(dag.getMeta(node1));
PTOperator ptNode2 = localCluster.findByLogicalNode(dag.getMeta(node2));
LocalStreamingContainer c0 = StramTestSupport.waitForActivation(localCluster, ptNode1);
Map<Integer, Node<?>> nodeMap = c0.getNodes();
Assert.assertEquals("number operators", 1, nodeMap.size());
TestGeneratorInputOperator n1 = (TestGeneratorInputOperator) nodeMap.get(ptNode1.getId()).getOperator();
Assert.assertNotNull(n1);
LocalStreamingContainer c2 = StramTestSupport.waitForActivation(localCluster, ptNode2);
Map<Integer, Node<?>> c2NodeMap = c2.getNodes();
Assert.assertEquals("number operators downstream", 1, c2NodeMap.size());
GenericTestOperator n2 = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
Assert.assertNotNull(n2);
// input data
String tuple1 = "tuple1";
n1.addTuple(tuple1);
OperatorContext n1Context = c0.getNodeContext(ptNode1.getId());
Assert.assertEquals("initial window id", -1, n1Context.getLastProcessedWindowId());
// checkpoint window
wclock.tick(1);
wclock.tick(1);
Assert.assertEquals("current window", 2, wclock.getCurrentTimeMillis());
OperatorContext o2Context = c2.getNodeContext(ptNode2.getId());
Assert.assertNotNull("context ", o2Context);
StramTestSupport.waitForWindowComplete(o2Context, 1);
Assert.assertEquals("o2 received ", tuple1, n2.inport1Tuple);
wclock.tick(1);
Assert.assertEquals("current window", 3, wclock.getCurrentTimeMillis());
// checkpoint between window 1 and 2
StramTestSupport.waitForWindowComplete(o2Context, 2);
// propagate checkpoints to master
c0.triggerHeartbeat();
// wait for heartbeat cycle to complete
c0.waitForHeartbeat(5000);
Assert.assertEquals("checkpoint " + ptNode1, 1, ptNode1.getRecentCheckpoint().windowId);
c2.triggerHeartbeat();
//Thread.yield();
// yield without using yield for heartbeat cycle
Thread.sleep(1);
c2.waitForHeartbeat(5000);
Assert.assertEquals("checkpoint " + ptNode2, 1, ptNode2.getRecentCheckpoint().windowId);
Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode1.checkpoints);
Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode2.checkpoints);
//
// simulate container failure (operator o1)
//
localCluster.failContainer(c0);
// replacement container starts empty
// operators will deploy after downstream operator was removed
LocalStreamingContainer c0Replaced = StramTestSupport.waitForActivation(localCluster, ptNode1);
c0Replaced.triggerHeartbeat();
// next heartbeat after setup
c0Replaced.waitForHeartbeat(5000);
Assert.assertNotSame("old container", c0, c0Replaced);
Assert.assertNotSame("old container", c0.getContainerId(), c0Replaced.getContainerId());
// verify change in downstream container
LOG.debug("triggering c2 heartbeat processing");
StreamingContainerAgent c2Agent = localCluster.getContainerAgent(c2);
// wait for downstream re-deploy to complete
long startTms = System.currentTimeMillis();
while (c2Agent.hasPendingWork() && StramTestSupport.DEFAULT_TIMEOUT_MILLIS > System.currentTimeMillis() - startTms) {
Thread.sleep(200);
c2.triggerHeartbeat();
LOG.debug("Waiting for {} to complete pending work.", c2.getContainerId());
}
Assert.assertEquals(c2.getContainerId() + " operators after redeploy " + c2.getNodes(), 1, c2.getNodes().size());
// verify downstream operator re-deployed in existing container
Assert.assertEquals("active " + ptNode2, c2, StramTestSupport.waitForActivation(localCluster, ptNode2));
GenericTestOperator o2Recovered = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
Assert.assertNotNull("redeployed " + ptNode2, o2Recovered);
Assert.assertNotSame("new instance " + ptNode2, n2, o2Recovered);
Assert.assertEquals("restored state " + ptNode2, tuple1, o2Recovered.inport1Tuple);
TestGeneratorInputOperator o1Recovered = (TestGeneratorInputOperator) c0Replaced.getNodes().get(ptNode1.getId()).getOperator();
Assert.assertNotNull(o1Recovered);
OperatorContext o1RecoveredContext = c0Replaced.getNodeContext(ptNode1.getId());
Assert.assertNotNull("active " + ptNode1, o1RecoveredContext);
wclock.tick(1);
Assert.assertEquals("current window", 4, wclock.getCurrentTimeMillis());
// refresh context after operator re-deploy
o2Context = c2.getNodeContext(ptNode2.getId());
Assert.assertNotNull("active " + ptNode2, o2Context);
StramTestSupport.waitForWindowComplete(o1RecoveredContext, 3);
StramTestSupport.waitForWindowComplete(o2Context, 3);
// checkpoint window
wclock.tick(1);
Assert.assertEquals("current window", 5, wclock.getCurrentTimeMillis());
String tuple2 = "tuple2";
o1Recovered.addTuple(tuple2);
StramTestSupport.waitForWindowComplete(o1RecoveredContext, 4);
StramTestSupport.waitForWindowComplete(o2Context, 4);
// check data flow after recovery
Assert.assertEquals("retrieved tuple (after recovery) " + ptNode2, tuple2, o2Recovered.inport1Tuple);
// propagate checkpoints to master
c0Replaced.triggerHeartbeat();
c0Replaced.waitForHeartbeat(5000);
c2.triggerHeartbeat();
c2.waitForHeartbeat(5000);
// purge checkpoints
// checkpoint purging
localCluster.dnmgr.monitorHeartbeat(false);
Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode1.checkpoints);
Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode2.checkpoints);
localCluster.shutdown();
}
use of com.datatorrent.stram.engine.OperatorContext in project apex-core by apache.
the class InlineStreamTest method test.
@Test
@SuppressWarnings("SleepWhileInLoop")
public void test() throws Exception {
final int totalTupleCount = 5000;
final PassThroughNode<Object> operator1 = new PassThroughNode<>();
final GenericNode node1 = new GenericNode(operator1, new OperatorContext(1, "operator1", new DefaultAttributeMap(), null));
node1.setId(1);
operator1.setup(node1.context);
final PassThroughNode<Object> operator2 = new PassThroughNode<>();
final GenericNode node2 = new GenericNode(operator2, new OperatorContext(2, "operator2", new DefaultAttributeMap(), null));
node2.setId(2);
operator2.setup(node2.context);
StreamContext streamContext = new StreamContext("node1->node2");
final InlineStream stream = new InlineStream(1024);
stream.setup(streamContext);
node1.connectOutputPort("output", stream);
node2.connectInputPort("input", stream.getReservoir());
prev = null;
Sink<Object> sink = new Sink<Object>() {
@Override
public void put(Object payload) {
if (payload instanceof Tuple) {
return;
}
if (prev == null) {
prev = payload;
} else {
if (Integer.valueOf(payload.toString()) - Integer.valueOf(prev.toString()) != 1) {
synchronized (InlineStreamTest.this) {
InlineStreamTest.this.notify();
}
}
prev = payload;
}
if (Integer.valueOf(prev.toString()) == totalTupleCount - 1) {
synchronized (InlineStreamTest.this) {
InlineStreamTest.this.notify();
}
}
}
@Override
public int getCount(boolean reset) {
return 0;
}
};
node2.connectOutputPort("output", sink);
AbstractReservoir reservoir1 = AbstractReservoir.newReservoir("input", 1024 * 5);
node1.connectInputPort("input", reservoir1);
Map<Integer, Node<?>> activeNodes = new ConcurrentHashMap<>();
launchNodeThread(node1, activeNodes);
launchNodeThread(node2, activeNodes);
stream.activate(streamContext);
reservoir1.put(StramTestSupport.generateBeginWindowTuple("irrelevant", 0));
for (int i = 0; i < totalTupleCount; i++) {
reservoir1.put(i);
}
reservoir1.put(StramTestSupport.generateEndWindowTuple("irrelevant", 0));
synchronized (this) {
this.wait(200);
}
Assert.assertNotNull(prev);
Assert.assertEquals("processing complete", totalTupleCount, Integer.valueOf(prev.toString()) + 1);
Assert.assertEquals("active operators", 2, activeNodes.size());
WaitCondition c = new WaitCondition() {
@Override
public boolean isComplete() {
final SweepableReservoir reservoir = stream.getReservoir();
logger.debug("stream {} empty {}, size {}", stream, reservoir.isEmpty(), reservoir.size(false));
return reservoir.isEmpty();
}
};
Assert.assertTrue("operator should finish processing all events within 1 second", StramTestSupport.awaitCompletion(c, 1000));
stream.deactivate();
for (Node<?> node : activeNodes.values()) {
node.shutdown();
}
for (int i = 0; i < 10; i++) {
Thread.sleep(20);
if (activeNodes.isEmpty()) {
break;
}
}
stream.teardown();
operator2.teardown();
operator1.teardown();
Assert.assertEquals("active operators", 0, activeNodes.size());
}
Aggregations