Search in sources :

Example 21 with PTOperator

use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.

the class StreamingContainerManager method scheduleContainerRestart.

/**
   * Schedule container restart. Called by Stram after a container was terminated
   * and requires recovery (killed externally, or after heartbeat timeout). <br>
   * Recovery will resolve affected operators (within the container and
   * everything downstream with respective recovery checkpoint states).
   * Dependent operators will be undeployed and buffer server connections reset prior to
   * redeploy to recovery checkpoint.
   *
   * @param containerId
   */
public void scheduleContainerRestart(String containerId) {
    StreamingContainerAgent cs = this.getContainerAgent(containerId);
    if (cs == null || cs.isShutdownRequested()) {
        // the container is no longer used / was released by us
        return;
    }
    LOG.info("Initiating recovery for {}@{}", containerId, cs.container.host);
    cs.container.setState(PTContainer.State.KILLED);
    cs.container.bufferServerAddress = null;
    cs.container.setResourceRequestPriority(-1);
    cs.container.setAllocatedMemoryMB(0);
    cs.container.setAllocatedVCores(0);
    // resolve dependencies
    UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock, false, getCheckpointGroups());
    for (PTOperator oper : cs.container.getOperators()) {
        updateRecoveryCheckpoints(oper, ctx, false);
    }
    includeLocalUpstreamOperators(ctx);
    // redeploy cycle for all affected operators
    LOG.info("Affected operators {}", ctx.visited);
    deploy(Collections.<PTContainer>emptySet(), ctx.visited, Sets.newHashSet(cs.container), ctx.visited);
}
Also used : PTOperator(com.datatorrent.stram.plan.physical.PTOperator)

Example 22 with PTOperator

use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.

the class PartitioningTest method testDefaultPartitioning.

@Test
public void testDefaultPartitioning() throws Exception {
    LogicalPlan dag = new LogicalPlan();
    File checkpointDir = new File(TEST_OUTPUT_DIR, "testDefaultPartitioning");
    dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, new AsyncFSStorageAgent(checkpointDir.getPath(), null));
    Integer[][] testData = { { 4, 5 } };
    CollectorOperator.receivedTuples.clear();
    TestInputOperator<Integer> input = dag.addOperator("input", new TestInputOperator<Integer>());
    input.testTuples = new ArrayList<>();
    for (Integer[] tuples : testData) {
        input.testTuples.add(new ArrayList<>(Arrays.asList(tuples)));
    }
    CollectorOperator collector = dag.addOperator("collector", new CollectorOperator());
    collector.prefix = "" + System.identityHashCode(collector);
    dag.getMeta(collector).getAttributes().put(OperatorContext.PARTITIONER, new StatelessPartitioner<CollectorOperator>(2));
    dag.addStream("fromInput", input.output, collector.input);
    CollectorOperator merged = dag.addOperator("merged", new CollectorOperator());
    merged.prefix = "" + System.identityHashCode(merged);
    dag.addStream("toMerged", collector.output, merged.input);
    StramLocalCluster lc = new StramLocalCluster(dag);
    lc.setHeartbeatMonitoringEnabled(false);
    // terminates on end of stream
    lc.run();
    List<PTOperator> operators = lc.getPlanOperators(dag.getMeta(collector));
    Assert.assertEquals("number operator instances " + operators, 2, operators.size());
    // one entry for each partition + merged output
    Assert.assertEquals("received tuples " + CollectorOperator.receivedTuples, 3, CollectorOperator.receivedTuples.size());
    //Assert.assertEquals("received tuples " + operators.get(0), Arrays.asList(4), CollectorOperator.receivedTuples.get(collector.prefix + operators.get(0).getId()));
    Assert.assertEquals("received tuples " + operators.get(1), Arrays.asList(5), CollectorOperator.receivedTuples.get(collector.prefix + operators.get(1).getId()));
    PTOperator pmerged = lc.findByLogicalNode(dag.getMeta(merged));
    List<Object> tuples = CollectorOperator.receivedTuples.get(merged.prefix + pmerged.getId());
    Assert.assertNotNull("merged tuples " + pmerged, tuples);
    Assert.assertEquals("merged tuples " + pmerged, Sets.newHashSet(testData[0]), Sets.newHashSet(tuples));
}
Also used : PTOperator(com.datatorrent.stram.plan.physical.PTOperator) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) File(java.io.File) Test(org.junit.Test)

Example 23 with PTOperator

use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.

the class StramLocalClusterTest method testRecovery.

@Test
@SuppressWarnings("SleepWhileInLoop")
public void testRecovery() throws Exception {
    AsyncFSStorageAgent agent = new AsyncFSStorageAgent(testMeta.getPath(), null);
    agent.setSyncCheckpoint(true);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
    TestGeneratorInputOperator node1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    // data will be added externally from test
    node1.setMaxTuples(0);
    GenericTestOperator node2 = dag.addOperator("o2", GenericTestOperator.class);
    dag.addStream("o1o2", node1.outport, node2.inport1);
    dag.validate();
    dag.getAttributes().put(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 2);
    final ManualScheduledExecutorService wclock = new ManualScheduledExecutorService(1);
    MockComponentFactory mcf = new MockComponentFactory() {

        @Override
        public WindowGenerator setupWindowGenerator() {
            WindowGenerator wingen = StramTestSupport.setupWindowGenerator(wclock);
            wingen.setCheckpointCount(2, 0);
            return wingen;
        }
    };
    StramLocalCluster localCluster = new StramLocalCluster(dag, mcf);
    localCluster.setPerContainerBufferServer(true);
    // driven by test
    localCluster.setHeartbeatMonitoringEnabled(false);
    localCluster.runAsync();
    PTOperator ptNode1 = localCluster.findByLogicalNode(dag.getMeta(node1));
    PTOperator ptNode2 = localCluster.findByLogicalNode(dag.getMeta(node2));
    LocalStreamingContainer c0 = StramTestSupport.waitForActivation(localCluster, ptNode1);
    Map<Integer, Node<?>> nodeMap = c0.getNodes();
    Assert.assertEquals("number operators", 1, nodeMap.size());
    TestGeneratorInputOperator n1 = (TestGeneratorInputOperator) nodeMap.get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(n1);
    LocalStreamingContainer c2 = StramTestSupport.waitForActivation(localCluster, ptNode2);
    Map<Integer, Node<?>> c2NodeMap = c2.getNodes();
    Assert.assertEquals("number operators downstream", 1, c2NodeMap.size());
    GenericTestOperator n2 = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull(n2);
    // input data
    String tuple1 = "tuple1";
    n1.addTuple(tuple1);
    OperatorContext n1Context = c0.getNodeContext(ptNode1.getId());
    Assert.assertEquals("initial window id", -1, n1Context.getLastProcessedWindowId());
    // checkpoint window
    wclock.tick(1);
    wclock.tick(1);
    Assert.assertEquals("current window", 2, wclock.getCurrentTimeMillis());
    OperatorContext o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("context ", o2Context);
    StramTestSupport.waitForWindowComplete(o2Context, 1);
    Assert.assertEquals("o2 received ", tuple1, n2.inport1Tuple);
    wclock.tick(1);
    Assert.assertEquals("current window", 3, wclock.getCurrentTimeMillis());
    // checkpoint between window 1 and 2
    StramTestSupport.waitForWindowComplete(o2Context, 2);
    // propagate checkpoints to master
    c0.triggerHeartbeat();
    // wait for heartbeat cycle to complete
    c0.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode1, 1, ptNode1.getRecentCheckpoint().windowId);
    c2.triggerHeartbeat();
    //Thread.yield();
    // yield without using yield for heartbeat cycle
    Thread.sleep(1);
    c2.waitForHeartbeat(5000);
    Assert.assertEquals("checkpoint " + ptNode2, 1, ptNode2.getRecentCheckpoint().windowId);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(1L, 0, 0) }), ptNode2.checkpoints);
    //
    // simulate container failure (operator o1)
    //
    localCluster.failContainer(c0);
    // replacement container starts empty
    // operators will deploy after downstream operator was removed
    LocalStreamingContainer c0Replaced = StramTestSupport.waitForActivation(localCluster, ptNode1);
    c0Replaced.triggerHeartbeat();
    // next heartbeat after setup
    c0Replaced.waitForHeartbeat(5000);
    Assert.assertNotSame("old container", c0, c0Replaced);
    Assert.assertNotSame("old container", c0.getContainerId(), c0Replaced.getContainerId());
    // verify change in downstream container
    LOG.debug("triggering c2 heartbeat processing");
    StreamingContainerAgent c2Agent = localCluster.getContainerAgent(c2);
    // wait for downstream re-deploy to complete
    long startTms = System.currentTimeMillis();
    while (c2Agent.hasPendingWork() && StramTestSupport.DEFAULT_TIMEOUT_MILLIS > System.currentTimeMillis() - startTms) {
        Thread.sleep(200);
        c2.triggerHeartbeat();
        LOG.debug("Waiting for {} to complete pending work.", c2.getContainerId());
    }
    Assert.assertEquals(c2.getContainerId() + " operators after redeploy " + c2.getNodes(), 1, c2.getNodes().size());
    // verify downstream operator re-deployed in existing container
    Assert.assertEquals("active " + ptNode2, c2, StramTestSupport.waitForActivation(localCluster, ptNode2));
    GenericTestOperator o2Recovered = (GenericTestOperator) c2NodeMap.get(localCluster.findByLogicalNode(dag.getMeta(node2)).getId()).getOperator();
    Assert.assertNotNull("redeployed " + ptNode2, o2Recovered);
    Assert.assertNotSame("new instance " + ptNode2, n2, o2Recovered);
    Assert.assertEquals("restored state " + ptNode2, tuple1, o2Recovered.inport1Tuple);
    TestGeneratorInputOperator o1Recovered = (TestGeneratorInputOperator) c0Replaced.getNodes().get(ptNode1.getId()).getOperator();
    Assert.assertNotNull(o1Recovered);
    OperatorContext o1RecoveredContext = c0Replaced.getNodeContext(ptNode1.getId());
    Assert.assertNotNull("active " + ptNode1, o1RecoveredContext);
    wclock.tick(1);
    Assert.assertEquals("current window", 4, wclock.getCurrentTimeMillis());
    // refresh context after operator re-deploy
    o2Context = c2.getNodeContext(ptNode2.getId());
    Assert.assertNotNull("active " + ptNode2, o2Context);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 3);
    StramTestSupport.waitForWindowComplete(o2Context, 3);
    // checkpoint window
    wclock.tick(1);
    Assert.assertEquals("current window", 5, wclock.getCurrentTimeMillis());
    String tuple2 = "tuple2";
    o1Recovered.addTuple(tuple2);
    StramTestSupport.waitForWindowComplete(o1RecoveredContext, 4);
    StramTestSupport.waitForWindowComplete(o2Context, 4);
    // check data flow after recovery
    Assert.assertEquals("retrieved tuple (after recovery) " + ptNode2, tuple2, o2Recovered.inport1Tuple);
    // propagate checkpoints to master
    c0Replaced.triggerHeartbeat();
    c0Replaced.waitForHeartbeat(5000);
    c2.triggerHeartbeat();
    c2.waitForHeartbeat(5000);
    // purge checkpoints
    // checkpoint purging
    localCluster.dnmgr.monitorHeartbeat(false);
    Assert.assertEquals("checkpoints " + ptNode1, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode1.checkpoints);
    Assert.assertEquals("checkpoints " + ptNode2, Arrays.asList(new Checkpoint[] { new Checkpoint(3L, 0, 0) }), ptNode2.checkpoints);
    localCluster.shutdown();
}
Also used : PTOperator(com.datatorrent.stram.plan.physical.PTOperator) LocalStreamingContainer(com.datatorrent.stram.StramLocalCluster.LocalStreamingContainer) Node(com.datatorrent.stram.engine.Node) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) Checkpoint(com.datatorrent.stram.api.Checkpoint) MockComponentFactory(com.datatorrent.stram.StramLocalCluster.MockComponentFactory) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) WindowGenerator(com.datatorrent.stram.engine.WindowGenerator) OperatorContext(com.datatorrent.stram.engine.OperatorContext) ManualScheduledExecutorService(com.datatorrent.stram.support.ManualScheduledExecutorService) Test(org.junit.Test)

Example 24 with PTOperator

use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.

the class StramRecoveryTest method testPhysicalPlanSerialization.

private void testPhysicalPlanSerialization(StorageAgent agent) throws Exception {
    GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
    PartitioningTestOperator o2 = dag.addOperator("o2", PartitioningTestOperator.class);
    o2.setPartitionCount(3);
    GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);
    dag.addStream("o1.outport1", o1.outport1, o2.inport1, o2.inportWithCodec);
    dag.addStream("mergeStream", o2.outport1, o3.inport1);
    dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2);
    TestPlanContext ctx = new TestPlanContext();
    dag.setAttribute(OperatorContext.STORAGE_AGENT, agent);
    PhysicalPlan plan = new PhysicalPlan(dag, ctx);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    LogicalPlan.write(dag, bos);
    LOG.debug("logicalPlan size: " + bos.toByteArray().length);
    bos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(bos);
    oos.writeObject(plan);
    LOG.debug("physicalPlan size: " + bos.toByteArray().length);
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    plan = (PhysicalPlan) new ObjectInputStream(bis).readObject();
    dag = plan.getLogicalPlan();
    Field f = PhysicalPlan.class.getDeclaredField("ctx");
    f.setAccessible(true);
    f.set(plan, ctx);
    f.setAccessible(false);
    OperatorMeta o2Meta = dag.getOperatorMeta("o2");
    List<PTOperator> o2Partitions = plan.getOperators(o2Meta);
    assertEquals(3, o2Partitions.size());
    for (PTOperator o : o2Partitions) {
        Assert.assertNotNull("partition null " + o, o.getPartitionKeys());
        assertEquals("partition keys " + o + " " + o.getPartitionKeys(), 2, o.getPartitionKeys().size());
        PartitioningTestOperator partitionedInstance = (PartitioningTestOperator) plan.loadOperator(o);
        assertEquals("instance per partition", o.getPartitionKeys().values().toString(), partitionedInstance.pks);
        Assert.assertNotNull("partition stats null " + o, o.stats);
    }
}
Also used : PartitioningTestOperator(com.datatorrent.stram.plan.physical.PhysicalPlanTest.PartitioningTestOperator) Field(java.lang.reflect.Field) PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) OperatorMeta(com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) TestPlanContext(com.datatorrent.stram.plan.TestPlanContext) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) ObjectInputStream(java.io.ObjectInputStream)

Example 25 with PTOperator

use of com.datatorrent.stram.plan.physical.PTOperator in project apex-core by apache.

the class StramRecoveryTest method testWriteAheadLog.

@Test
public void testWriteAheadLog() throws Exception {
    final MutableInt flushCount = new MutableInt();
    final MutableBoolean isClosed = new MutableBoolean(false);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(testMeta.getPath(), null));
    TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    StreamingContainerManager scm = new StreamingContainerManager(dag);
    PhysicalPlan plan = scm.getPhysicalPlan();
    Journal j = scm.getJournal();
    ByteArrayOutputStream bos = new ByteArrayOutputStream() {

        @Override
        public void flush() throws IOException {
            super.flush();
            flushCount.increment();
        }

        @Override
        public void close() throws IOException {
            super.close();
            isClosed.setValue(true);
        }
    };
    j.setOutputStream(new DataOutputStream(bos));
    PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
    assertEquals(PTOperator.State.PENDING_DEPLOY, o1p1.getState());
    String externalId = new MockContainer(scm, o1p1.getContainer()).container.getExternalId();
    assertEquals("flush count", 1, flushCount.intValue());
    o1p1.setState(PTOperator.State.ACTIVE);
    assertEquals(PTOperator.State.ACTIVE, o1p1.getState());
    assertEquals("flush count", 2, flushCount.intValue());
    assertEquals("is closed", false, isClosed.booleanValue());
    // this will close the stream. There are 2 calls to flush() during the close() - one in Kryo Output and one
    // in FilterOutputStream
    j.setOutputStream(null);
    assertEquals("flush count", 4, flushCount.intValue());
    assertEquals("is closed", true, isClosed.booleanValue());
    // output stream is closed, so state will be changed without recording it in the journal
    o1p1.setState(PTOperator.State.INACTIVE);
    assertEquals(PTOperator.State.INACTIVE, o1p1.getState());
    assertEquals("flush count", 4, flushCount.intValue());
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    j.replay(new DataInputStream(bis));
    assertEquals(PTOperator.State.ACTIVE, o1p1.getState());
    InetSocketAddress addr1 = InetSocketAddress.createUnresolved("host1", 1);
    PTContainer c1 = plan.getContainers().get(0);
    c1.setState(PTContainer.State.ALLOCATED);
    c1.host = "host1";
    c1.bufferServerAddress = addr1;
    c1.setAllocatedMemoryMB(2);
    c1.setRequiredMemoryMB(1);
    c1.setAllocatedVCores(3);
    c1.setRequiredVCores(4);
    j.setOutputStream(new DataOutputStream(bos));
    j.write(c1.getSetContainerState());
    c1.setExternalId(null);
    c1.setState(PTContainer.State.NEW);
    c1.setExternalId(null);
    c1.host = null;
    c1.bufferServerAddress = null;
    bis = new ByteArrayInputStream(bos.toByteArray());
    j.replay(new DataInputStream(bis));
    assertEquals(externalId, c1.getExternalId());
    assertEquals(PTContainer.State.ALLOCATED, c1.getState());
    assertEquals("host1", c1.host);
    assertEquals(addr1, c1.bufferServerAddress);
    assertEquals(1, c1.getRequiredMemoryMB());
    assertEquals(2, c1.getAllocatedMemoryMB());
    assertEquals(3, c1.getAllocatedVCores());
    assertEquals(4, c1.getRequiredVCores());
    j.write(scm.getSetOperatorProperty("o1", "maxTuples", "100"));
    o1.setMaxTuples(10);
    j.setOutputStream(null);
    bis = new ByteArrayInputStream(bos.toByteArray());
    j.replay(new DataInputStream(bis));
    assertEquals(100, o1.getMaxTuples());
    j.setOutputStream(new DataOutputStream(bos));
    scm.setOperatorProperty("o1", "maxTuples", "10");
    assertEquals(10, o1.getMaxTuples());
    o1.setMaxTuples(100);
    assertEquals(100, o1.getMaxTuples());
    j.setOutputStream(null);
    bis = new ByteArrayInputStream(bos.toByteArray());
    j.replay(new DataInputStream(bis));
    assertEquals(10, o1.getMaxTuples());
    j.setOutputStream(new DataOutputStream(bos));
    scm.setPhysicalOperatorProperty(o1p1.getId(), "maxTuples", "50");
}
Also used : PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) DataOutputStream(java.io.DataOutputStream) InetSocketAddress(java.net.InetSocketAddress) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) FSStorageAgent(com.datatorrent.common.util.FSStorageAgent) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) MutableInt(org.apache.commons.lang.mutable.MutableInt) PTContainer(com.datatorrent.stram.plan.physical.PTContainer) Test(org.junit.Test)

Aggregations

PTOperator (com.datatorrent.stram.plan.physical.PTOperator)84 Test (org.junit.Test)39 PhysicalPlan (com.datatorrent.stram.plan.physical.PhysicalPlan)38 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)36 PTContainer (com.datatorrent.stram.plan.physical.PTContainer)34 Checkpoint (com.datatorrent.stram.api.Checkpoint)23 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)22 MemoryStorageAgent (com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent)16 OperatorDeployInfo (com.datatorrent.stram.api.OperatorDeployInfo)15 OperatorMeta (com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta)15 PhysicalPlanTest (com.datatorrent.stram.plan.physical.PhysicalPlanTest)14 TestGeneratorInputOperator (com.datatorrent.stram.engine.TestGeneratorInputOperator)11 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10 AsyncFSStorageAgent (com.datatorrent.common.util.AsyncFSStorageAgent)9 StramTestSupport (com.datatorrent.stram.support.StramTestSupport)9 Map (java.util.Map)9 TestPlanContext (com.datatorrent.stram.plan.TestPlanContext)7 Operator (com.datatorrent.api.Operator)6 StatsListener (com.datatorrent.api.StatsListener)6