Search in sources :

Example 1 with LocalStreamingContainer

use of com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer in project apex-core by apache.

the class StramLocalClusterTest method testRecovery.

@Test
@SuppressWarnings("SleepWhileInLoop")
public void testRecovery() throws Exception {
    AsyncFSStorageAgent agent = new AsyncFSStorageAgent(testMeta.getPath(), null);
    agent.setSyncCheckpoint(true);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
    TestGeneratorInputOperator node1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    // data will be added externally from test
    node1.setMaxTuples(0);
    GenericTestOperator node2 = dag.addOperator("o2", GenericTestOperator.class);
    dag.addStream("o1o2", node1.outport, node2.inport1);
    dag.validate();
    dag.getAttributes().put(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 2);
    final ManualScheduledExecutorService wclock = new ManualScheduledExecutorService(1);
    MockComponentFactory mcf = new MockComponentFactory() {

        @Override
        public WindowGenerator setupWindowGenerator() {
            WindowGenerator wingen = StramTestSupport.setupWindowGenerator(wclock);
            wingen.setCheckpointCount(2, 0);
            return wingen;
        }
    };
    StramLocalCluster localCluster = new StramLocalCluster(dag, mcf);
    localCluster.setPerContainerBufferServer(true);
    // driven by test
    localCluster.setHeartbeatMonitoringEnabled(false);
    localCluster.runAsync();
    PTOperator ptNode1 = localCluster.findByLogicalNode(dag.getMeta(node1));
    PTOperator ptNode2 = localCluster.findByLogicalNode(dag.getMeta(node2));
    LocalStreamingContainer c0 = StramTestSupport.waitForActivation(localCluster, ptNode1);
    Map<Integer, Node<?>> nodeMap = c0.getNodes();
    Assert.assertEquals("number operators", 1, nodeMap.size());
    TestGeneratorInputOperator n1 = (TestGeneratorInputOperator) nodeMap.get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(n1);
    LocalStreamingContainer c2 = StramTestSupport.waitForActivation(localCluster, ptNode2);
    Map<Integer, Node<?>> c2NodeMap = c2.getNodes();
    Assert.assertEquals("number operators downstream", 1, c2NodeMap.size());
    GenericTestOperator n2 = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull(n2);
    // input data
    String tuple1 = "tuple1";
    n1.addTuple(tuple1);
    OperatorContext n1Context = c0.getNodeContext(ptNode1.getId());
    Assert.assertEquals("initial window id", -1, n1Context.getLastProcessedWindowId());
    // checkpoint window
    wclock.tick(1);
    wclock.tick(1);
    Assert.assertEquals("current window", 2, wclock.getCurrentTimeMillis());
    OperatorContext o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("context ", o2Context);
    StramTestSupport.waitForWindowComplete(o2Context, 1);
    Assert.assertEquals("o2 received ", tuple1, n2.inport1Tuple);
    wclock.tick(1);
    Assert.assertEquals("current window", 3, wclock.getCurrentTimeMillis());
    // checkpoint between window 1 and 2
    StramTestSupport.waitForWindowComplete(o2Context, 2);
    // propagate checkpoints to master
    c0.triggerHeartbeat();
    // wait for heartbeat cycle to complete
    c0.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode1, 1, ptNode1.getRecentCheckpoint().windowId);
    c2.triggerHeartbeat();
    // Thread.yield();
    // yield without using yield for heartbeat cycle
    Thread.sleep(1);
    c2.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode2, 1, ptNode2.getRecentCheckpoint().windowId);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode2.checkpoints);
    // 
    // simulate container failure (operator o1)
    // 
    localCluster.failContainer(c0);
    // replacement container starts empty
    // operators will deploy after downstream operator was removed
    LocalStreamingContainer c0Replaced = StramTestSupport.waitForActivation(localCluster, ptNode1);
    c0Replaced.triggerHeartbeat();
    // next heartbeat after setup
    c0Replaced.waitForHeartbeat(5000);
    Assert.assertNotSame("old container", c0, c0Replaced);
    Assert.assertNotSame("old container", c0.getContainerId(), c0Replaced.getContainerId());
    // verify change in downstream container
    LOG.debug("triggering c2 heartbeat processing");
    StreamingContainerAgent c2Agent = localCluster.getContainerAgent(c2);
    // wait for downstream re-deploy to complete
    long startTms = System.currentTimeMillis();
    while (c2Agent.hasPendingWork() && StramTestSupport.DEFAULT_TIMEOUT_MILLIS > System.currentTimeMillis() - startTms) {
        Thread.sleep(200);
        c2.triggerHeartbeat();
        LOG.debug("Waiting for {} to complete pending work.", c2.getContainerId());
    }
    Assert.assertEquals(c2.getContainerId() + " operators after redeploy " + c2.getNodes(), 1, c2.getNodes().size());
    // verify downstream operator re-deployed in existing container
    Assert.assertEquals("active " + ptNode2, c2, StramTestSupport.waitForActivation(localCluster, ptNode2));
    GenericTestOperator o2Recovered = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull("redeployed " + ptNode2, o2Recovered);
    Assert.assertNotSame("new instance " + ptNode2, n2, o2Recovered);
    Assert.assertEquals("restored state " + ptNode2, tuple1, o2Recovered.inport1Tuple);
    TestGeneratorInputOperator o1Recovered = (TestGeneratorInputOperator) c0Replaced.getNodes().get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(o1Recovered);
    OperatorContext o1RecoveredContext = c0Replaced.getNodeContext(ptNode1.getId());
    Assert.assertNotNull("active " + ptNode1, o1RecoveredContext);
    wclock.tick(1);
    Assert.assertEquals("current window", 4, wclock.getCurrentTimeMillis());
    // refresh context after operator re-deploy
    o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("active " + ptNode2, o2Context);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 3);
    StramTestSupport.waitForWindowComplete(o2Context, 3);
    // checkpoint window
    wclock.tick(1);
    Assert.assertEquals("current window", 5, wclock.getCurrentTimeMillis());
    String tuple2 = "tuple2";
    o1Recovered.addTuple(tuple2);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 4);
    StramTestSupport.waitForWindowComplete(o2Context, 4);
    // check data flow after recovery
    Assert.assertEquals("retrieved tuple (after recovery) " + ptNode2, tuple2, o2Recovered.inport1Tuple);
    // propagate checkpoints to master
    c0Replaced.triggerHeartbeat();
    c0Replaced.waitForHeartbeat(5000);
    c2.triggerHeartbeat();
    c2.waitForHeartbeat(5000);
    // purge checkpoints
    // checkpoint purging
    localCluster.dnmgr.monitorHeartbeat(false);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode2.checkpoints);
    localCluster.shutdown();
}
Also used : PTOperator(com.datatorrent.stram.plan.physical.PTOperator) LocalStreamingContainer(com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer) Node(com.datatorrent.stram.engine.Node) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) Checkpoint(com.datatorrent.stram.api.Checkpoint) MockComponentFactory(com.datatorrent.stram.StramLocalCluster.MockComponentFactory) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) WindowGenerator(com.datatorrent.stram.engine.WindowGenerator) OperatorContext(com.datatorrent.stram.engine.OperatorContext) ManualScheduledExecutorService(com.datatorrent.stram.support.ManualScheduledExecutorService) Test(org.junit.Test)

Example 2 with LocalStreamingContainer

use of com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer in project apex-core by apache.

the class StramTestSupport method waitForActivation.

/**
 * Wait until instance of operator is deployed into a container and return the container reference.
 * Asserts non null return value.
 *
 * @param localCluster
 * @param operator
 * @return
 * @throws InterruptedException
 */
@SuppressWarnings("SleepWhileInLoop")
public static LocalStreamingContainer waitForActivation(StramLocalCluster localCluster, PTOperator operator) throws InterruptedException {
    LocalStreamingContainer container;
    long startMillis = System.currentTimeMillis();
    while (System.currentTimeMillis() < (startMillis + DEFAULT_TIMEOUT_MILLIS)) {
        if (operator.getState() == PTOperator.State.ACTIVE) {
            if ((container = localCluster.getContainer(operator)) != null) {
                return container;
            }
        }
        LOG.debug("Waiting for {}({}) in container {}", new Object[] { operator, operator.getState(), operator.getContainer() });
        Thread.sleep(500);
    }
    Assert.fail("timeout waiting for operator deployment " + operator);
    return null;
}
Also used : LocalStreamingContainer(com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer)

Example 3 with LocalStreamingContainer

use of com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer in project apex-core by apache.

the class PartitioningTest method testDynamicDefaultPartitioning.

// @Ignore
@Test
@SuppressWarnings("SleepWhileInLoop")
public void testDynamicDefaultPartitioning() throws Exception {
    LogicalPlan dag = new LogicalPlan();
    dag.setAttribute(LogicalPlan.CONTAINERS_MAX_COUNT, 5);
    File checkpointDir = new File(TEST_OUTPUT_DIR, "testDynamicDefaultPartitioning");
    dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, new AsyncFSStorageAgent(checkpointDir.getPath(), null));
    CollectorOperator.receivedTuples.clear();
    TestInputOperator<Integer> input = dag.addOperator("input", new TestInputOperator<Integer>());
    input.blockEndStream = true;
    CollectorOperator collector = dag.addOperator("partitionedCollector", new CollectorOperator());
    collector.prefix = "" + System.identityHashCode(collector);
    dag.setOperatorAttribute(collector, OperatorContext.PARTITIONER, new StatelessPartitioner<CollectorOperator>(2));
    dag.setOperatorAttribute(collector, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] { new PartitionLoadWatch() }));
    dag.addStream("fromInput", input.output, collector.input);
    CollectorOperator singleCollector = dag.addOperator("singleCollector", new CollectorOperator());
    singleCollector.prefix = "" + System.identityHashCode(singleCollector);
    dag.addStream("toSingleCollector", collector.output, singleCollector.input);
    StramLocalCluster lc = new StramLocalCluster(dag);
    lc.setHeartbeatMonitoringEnabled(false);
    lc.runAsync();
    List<PTOperator> partitions = assertNumberPartitions(2, lc, dag.getMeta(collector));
    Set<PTContainer> containers = Sets.newHashSet();
    for (PTOperator oper : partitions) {
        containers.add(oper.getContainer());
    }
    Assert.assertTrue("Number of containers are 4", 4 == lc.dnmgr.getPhysicalPlan().getContainers().size());
    PTOperator splitPartition = partitions.get(0);
    PartitionLoadWatch.put(splitPartition, 1);
    LOG.debug("Triggered split for {}", splitPartition);
    int count = 0;
    long startMillis = System.currentTimeMillis();
    while (count == 0 && startMillis > System.currentTimeMillis() - StramTestSupport.DEFAULT_TIMEOUT_MILLIS) {
        // yield
        sleep(20);
        count += lc.dnmgr.processEvents();
    }
    partitions = assertNumberPartitions(3, lc, dag.getMeta(collector));
    Assert.assertTrue("container reused", lc.dnmgr.getPhysicalPlan().getContainers().containsAll(containers));
    // check deployment
    for (PTOperator p : partitions) {
        StramTestSupport.waitForActivation(lc, p);
    }
    PartitionLoadWatch.remove(splitPartition);
    for (PTContainer container : lc.dnmgr.getPhysicalPlan().getContainers()) {
        int memory = 0;
        for (PTOperator operator : container.getOperators()) {
            memory += operator.getBufferServerMemory();
            memory += operator.getOperatorMeta().getValue(OperatorContext.MEMORY_MB);
        }
        Assert.assertEquals("memory", memory, container.getRequiredMemoryMB());
    }
    PTOperator planInput = lc.findByLogicalNode(dag.getMeta(input));
    LocalStreamingContainer c = StramTestSupport.waitForActivation(lc, planInput);
    Map<Integer, Node<?>> nodeMap = c.getNodes();
    Assert.assertEquals("number operators " + nodeMap, 1, nodeMap.size());
    @SuppressWarnings({ "unchecked" }) TestInputOperator<Integer> inputDeployed = (TestInputOperator<Integer>) nodeMap.get(planInput.getId()).getOperator();
    Assert.assertNotNull("" + nodeMap, inputDeployed);
    // add tuple that matches the partition key and check that each partition receives it
    ArrayList<Integer> inputTuples = new ArrayList<>();
    LOG.debug("Number of partitions {}", partitions.size());
    for (PTOperator p : partitions) {
        // default partitioning has one port mapping with a single partition key
        LOG.debug("Partition key map size: {}", p.getPartitionKeys().size());
        inputTuples.add(p.getPartitionKeys().values().iterator().next().partitions.iterator().next());
    }
    inputDeployed.testTuples = Collections.synchronizedList(new ArrayList<List<Integer>>());
    inputDeployed.testTuples.add(inputTuples);
    for (PTOperator p : partitions) {
        Integer expectedTuple = p.getPartitionKeys().values().iterator().next().partitions.iterator().next();
        List<Object> receivedTuples;
        int i = 0;
        while ((receivedTuples = CollectorOperator.receivedTuples.get(collector.prefix + p.getId())) == null || receivedTuples.isEmpty()) {
            if (i++ % 100 == 0) {
                LOG.debug("Waiting for tuple: " + p);
            }
            sleep(10);
        }
        Assert.assertEquals("received " + p, Arrays.asList(expectedTuple), receivedTuples);
    }
    // single output operator to receive tuple from each partition
    List<PTOperator> operators = lc.getPlanOperators(dag.getMeta(singleCollector));
    Assert.assertEquals("number output operator instances " + operators, 1, operators.size());
    // ensure redeploy
    StramTestSupport.waitForActivation(lc, operators.get(0));
    List<Object> receivedTuples;
    while ((receivedTuples = CollectorOperator.receivedTuples.get(singleCollector.prefix + operators.get(0).getId())) == null || receivedTuples.size() < inputTuples.size()) {
        LOG.debug("Waiting for tuple: " + operators.get(0) + " expected: " + inputTuples + " received: " + receivedTuples);
        sleep(20);
    }
    Assert.assertEquals("output tuples " + receivedTuples, Sets.newHashSet(inputTuples), Sets.newHashSet(receivedTuples));
    lc.shutdown();
}
Also used : Node(com.datatorrent.stram.engine.Node) ArrayList(java.util.ArrayList) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) PTContainer(com.datatorrent.stram.plan.physical.PTContainer) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) LocalStreamingContainer(com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer) StatsListener(com.datatorrent.api.StatsListener) Checkpoint(com.datatorrent.stram.api.Checkpoint) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) File(java.io.File) Test(org.junit.Test)

Aggregations

LocalStreamingContainer (com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer)3 AsyncFSStorageAgent (com.datatorrent.common.util.AsyncFSStorageAgent)2 Checkpoint (com.datatorrent.stram.api.Checkpoint)2 Node (com.datatorrent.stram.engine.Node)2 PTOperator (com.datatorrent.stram.plan.physical.PTOperator)2 Test (org.junit.Test)2 StatsListener (com.datatorrent.api.StatsListener)1 MockComponentFactory (com.datatorrent.stram.StramLocalCluster.MockComponentFactory)1 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)1 OperatorContext (com.datatorrent.stram.engine.OperatorContext)1 TestGeneratorInputOperator (com.datatorrent.stram.engine.TestGeneratorInputOperator)1 WindowGenerator (com.datatorrent.stram.engine.WindowGenerator)1 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)1 PTContainer (com.datatorrent.stram.plan.physical.PTContainer)1 ManualScheduledExecutorService (com.datatorrent.stram.support.ManualScheduledExecutorService)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1