Search in sources :

Example 1 with OperatorContext

use of com.datatorrent.stram.engine.OperatorContext in project apex-core by apache.

the class StramLocalClusterTest method testRecovery.

@Test
@SuppressWarnings("SleepWhileInLoop")
public void testRecovery() throws Exception {
    AsyncFSStorageAgent agent = new AsyncFSStorageAgent(testMeta.getPath(), null);
    agent.setSyncCheckpoint(true);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
    TestGeneratorInputOperator node1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    // data will be added externally from test
    node1.setMaxTuples(0);
    GenericTestOperator node2 = dag.addOperator("o2", GenericTestOperator.class);
    dag.addStream("o1o2", node1.outport, node2.inport1);
    dag.validate();
    dag.getAttributes().put(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 2);
    final ManualScheduledExecutorService wclock = new ManualScheduledExecutorService(1);
    MockComponentFactory mcf = new MockComponentFactory() {

        @Override
        public WindowGenerator setupWindowGenerator() {
            WindowGenerator wingen = StramTestSupport.setupWindowGenerator(wclock);
            wingen.setCheckpointCount(2, 0);
            return wingen;
        }
    };
    StramLocalCluster localCluster = new StramLocalCluster(dag, mcf);
    localCluster.setPerContainerBufferServer(true);
    // driven by test
    localCluster.setHeartbeatMonitoringEnabled(false);
    localCluster.runAsync();
    PTOperator ptNode1 = localCluster.findByLogicalNode(dag.getMeta(node1));
    PTOperator ptNode2 = localCluster.findByLogicalNode(dag.getMeta(node2));
    LocalStreamingContainer c0 = StramTestSupport.waitForActivation(localCluster, ptNode1);
    Map<Integer, Node<?>> nodeMap = c0.getNodes();
    Assert.assertEquals("number operators", 1, nodeMap.size());
    TestGeneratorInputOperator n1 = (TestGeneratorInputOperator) nodeMap.get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(n1);
    LocalStreamingContainer c2 = StramTestSupport.waitForActivation(localCluster, ptNode2);
    Map<Integer, Node<?>> c2NodeMap = c2.getNodes();
    Assert.assertEquals("number operators downstream", 1, c2NodeMap.size());
    GenericTestOperator n2 = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull(n2);
    // input data
    String tuple1 = "tuple1";
    n1.addTuple(tuple1);
    OperatorContext n1Context = c0.getNodeContext(ptNode1.getId());
    Assert.assertEquals("initial window id", -1, n1Context.getLastProcessedWindowId());
    // checkpoint window
    wclock.tick(1);
    wclock.tick(1);
    Assert.assertEquals("current window", 2, wclock.getCurrentTimeMillis());
    OperatorContext o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("context ", o2Context);
    StramTestSupport.waitForWindowComplete(o2Context, 1);
    Assert.assertEquals("o2 received ", tuple1, n2.inport1Tuple);
    wclock.tick(1);
    Assert.assertEquals("current window", 3, wclock.getCurrentTimeMillis());
    // checkpoint between window 1 and 2
    StramTestSupport.waitForWindowComplete(o2Context, 2);
    // propagate checkpoints to master
    c0.triggerHeartbeat();
    // wait for heartbeat cycle to complete
    c0.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode1, 1, ptNode1.getRecentCheckpoint().windowId);
    c2.triggerHeartbeat();
    //Thread.yield();
    // yield without using yield for heartbeat cycle
    Thread.sleep(1);
    c2.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode2, 1, ptNode2.getRecentCheckpoint().windowId);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode2.checkpoints);
    //
    // simulate container failure (operator o1)
    //
    localCluster.failContainer(c0);
    // replacement container starts empty
    // operators will deploy after downstream operator was removed
    LocalStreamingContainer c0Replaced = StramTestSupport.waitForActivation(localCluster, ptNode1);
    c0Replaced.triggerHeartbeat();
    // next heartbeat after setup
    c0Replaced.waitForHeartbeat(5000);
    Assert.assertNotSame("old container", c0, c0Replaced);
    Assert.assertNotSame("old container", c0.getContainerId(), c0Replaced.getContainerId());
    // verify change in downstream container
    LOG.debug("triggering c2 heartbeat processing");
    StreamingContainerAgent c2Agent = localCluster.getContainerAgent(c2);
    // wait for downstream re-deploy to complete
    long startTms = System.currentTimeMillis();
    while (c2Agent.hasPendingWork() && StramTestSupport.DEFAULT_TIMEOUT_MILLIS > System.currentTimeMillis() - startTms) {
        Thread.sleep(200);
        c2.triggerHeartbeat();
        LOG.debug("Waiting for {} to complete pending work.", c2.getContainerId());
    }
    Assert.assertEquals(c2.getContainerId() + " operators after redeploy " + c2.getNodes(), 1, c2.getNodes().size());
    // verify downstream operator re-deployed in existing container
    Assert.assertEquals("active " + ptNode2, c2, StramTestSupport.waitForActivation(localCluster, ptNode2));
    GenericTestOperator o2Recovered = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull("redeployed " + ptNode2, o2Recovered);
    Assert.assertNotSame("new instance " + ptNode2, n2, o2Recovered);
    Assert.assertEquals("restored state " + ptNode2, tuple1, o2Recovered.inport1Tuple);
    TestGeneratorInputOperator o1Recovered = (TestGeneratorInputOperator) c0Replaced.getNodes().get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(o1Recovered);
    OperatorContext o1RecoveredContext = c0Replaced.getNodeContext(ptNode1.getId());
    Assert.assertNotNull("active " + ptNode1, o1RecoveredContext);
    wclock.tick(1);
    Assert.assertEquals("current window", 4, wclock.getCurrentTimeMillis());
    // refresh context after operator re-deploy
    o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("active " + ptNode2, o2Context);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 3);
    StramTestSupport.waitForWindowComplete(o2Context, 3);
    // checkpoint window
    wclock.tick(1);
    Assert.assertEquals("current window", 5, wclock.getCurrentTimeMillis());
    String tuple2 = "tuple2";
    o1Recovered.addTuple(tuple2);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 4);
    StramTestSupport.waitForWindowComplete(o2Context, 4);
    // check data flow after recovery
    Assert.assertEquals("retrieved tuple (after recovery) " + ptNode2, tuple2, o2Recovered.inport1Tuple);
    // propagate checkpoints to master
    c0Replaced.triggerHeartbeat();
    c0Replaced.waitForHeartbeat(5000);
    c2.triggerHeartbeat();
    c2.waitForHeartbeat(5000);
    // purge checkpoints
    // checkpoint purging
    localCluster.dnmgr.monitorHeartbeat(false);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode2.checkpoints);
    localCluster.shutdown();
}
Also used : PTOperator(com.datatorrent.stram.plan.physical.PTOperator) LocalStreamingContainer(com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer) Node(com.datatorrent.stram.engine.Node) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) Checkpoint(com.datatorrent.stram.api.Checkpoint) MockComponentFactory(com.datatorrent.stram.StramLocalCluster.MockComponentFactory) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) WindowGenerator(com.datatorrent.stram.engine.WindowGenerator) OperatorContext(com.datatorrent.stram.engine.OperatorContext) ManualScheduledExecutorService(com.datatorrent.stram.support.ManualScheduledExecutorService) Test(org.junit.Test)

Example 2 with OperatorContext

use of com.datatorrent.stram.engine.OperatorContext in project apex-core by apache.

the class InlineStreamTest method test.

@Test
@SuppressWarnings("SleepWhileInLoop")
public void test() throws Exception {
    final int totalTupleCount = 5000;
    final PassThroughNode<Object> operator1 = new PassThroughNode<>();
    final GenericNode node1 = new GenericNode(operator1, new OperatorContext(1, "operator1", new DefaultAttributeMap(), null));
    node1.setId(1);
    operator1.setup(node1.context);
    final PassThroughNode<Object> operator2 = new PassThroughNode<>();
    final GenericNode node2 = new GenericNode(operator2, new OperatorContext(2, "operator2", new DefaultAttributeMap(), null));
    node2.setId(2);
    operator2.setup(node2.context);
    StreamContext streamContext = new StreamContext("node1->node2");
    final InlineStream stream = new InlineStream(1024);
    stream.setup(streamContext);
    node1.connectOutputPort("output", stream);
    node2.connectInputPort("input", stream.getReservoir());
    prev = null;
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object payload) {
            if (payload instanceof Tuple) {
                return;
            }
            if (prev == null) {
                prev = payload;
            } else {
                if (Integer.valueOf(payload.toString()) - Integer.valueOf(prev.toString()) != 1) {
                    synchronized (InlineStreamTest.this) {
                        InlineStreamTest.this.notify();
                    }
                }
                prev = payload;
            }
            if (Integer.valueOf(prev.toString()) == totalTupleCount - 1) {
                synchronized (InlineStreamTest.this) {
                    InlineStreamTest.this.notify();
                }
            }
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    node2.connectOutputPort("output", sink);
    AbstractReservoir reservoir1 = AbstractReservoir.newReservoir("input", 1024 * 5);
    node1.connectInputPort("input", reservoir1);
    Map<Integer, Node<?>> activeNodes = new ConcurrentHashMap<>();
    launchNodeThread(node1, activeNodes);
    launchNodeThread(node2, activeNodes);
    stream.activate(streamContext);
    reservoir1.put(StramTestSupport.generateBeginWindowTuple("irrelevant", 0));
    for (int i = 0; i < totalTupleCount; i++) {
        reservoir1.put(i);
    }
    reservoir1.put(StramTestSupport.generateEndWindowTuple("irrelevant", 0));
    synchronized (this) {
        this.wait(200);
    }
    Assert.assertNotNull(prev);
    Assert.assertEquals("processing complete", totalTupleCount, Integer.valueOf(prev.toString()) + 1);
    Assert.assertEquals("active operators", 2, activeNodes.size());
    WaitCondition c = new WaitCondition() {

        @Override
        public boolean isComplete() {
            final SweepableReservoir reservoir = stream.getReservoir();
            logger.debug("stream {} empty {}, size {}", stream, reservoir.isEmpty(), reservoir.size(false));
            return reservoir.isEmpty();
        }
    };
    Assert.assertTrue("operator should finish processing all events within 1 second", StramTestSupport.awaitCompletion(c, 1000));
    stream.deactivate();
    for (Node<?> node : activeNodes.values()) {
        node.shutdown();
    }
    for (int i = 0; i < 10; i++) {
        Thread.sleep(20);
        if (activeNodes.isEmpty()) {
            break;
        }
    }
    stream.teardown();
    operator2.teardown();
    operator1.teardown();
    Assert.assertEquals("active operators", 0, activeNodes.size());
}
Also used : WaitCondition(com.datatorrent.stram.support.StramTestSupport.WaitCondition) AbstractReservoir(com.datatorrent.stram.engine.AbstractReservoir) SweepableReservoir(com.datatorrent.stram.engine.SweepableReservoir) StreamContext(com.datatorrent.stram.engine.StreamContext) Node(com.datatorrent.stram.engine.Node) GenericNode(com.datatorrent.stram.engine.GenericNode) GenericNode(com.datatorrent.stram.engine.GenericNode) DefaultAttributeMap(com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Sink(com.datatorrent.api.Sink) OperatorContext(com.datatorrent.stram.engine.OperatorContext) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Tuple(com.datatorrent.stram.tuple.Tuple) Test(org.junit.Test)

Aggregations

Node (com.datatorrent.stram.engine.Node)2 OperatorContext (com.datatorrent.stram.engine.OperatorContext)2 Test (org.junit.Test)2 DefaultAttributeMap (com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap)1 Sink (com.datatorrent.api.Sink)1 AsyncFSStorageAgent (com.datatorrent.common.util.AsyncFSStorageAgent)1 LocalStreamingContainer (com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer)1 MockComponentFactory (com.datatorrent.stram.StramLocalCluster.MockComponentFactory)1 Checkpoint (com.datatorrent.stram.api.Checkpoint)1 AbstractReservoir (com.datatorrent.stram.engine.AbstractReservoir)1 GenericNode (com.datatorrent.stram.engine.GenericNode)1 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)1 StreamContext (com.datatorrent.stram.engine.StreamContext)1 SweepableReservoir (com.datatorrent.stram.engine.SweepableReservoir)1 TestGeneratorInputOperator (com.datatorrent.stram.engine.TestGeneratorInputOperator)1 WindowGenerator (com.datatorrent.stram.engine.WindowGenerator)1 PTOperator (com.datatorrent.stram.plan.physical.PTOperator)1 ManualScheduledExecutorService (com.datatorrent.stram.support.ManualScheduledExecutorService)1 WaitCondition (com.datatorrent.stram.support.StramTestSupport.WaitCondition)1 Tuple (com.datatorrent.stram.tuple.Tuple)1