Search in sources :

Example 16 with Checkpoint

use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.

the class CheckpointTest method testUpdateCheckpointsRecovery.

@Test
public void testUpdateCheckpointsRecovery() {
    MockClock clock = new MockClock();
    dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    dag.setAttribute(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS, 1);
    GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
    StatelessOperator o2SL = dag.addOperator("o2SL", StatelessOperator.class);
    StatelessOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
    GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
    dag.addStream("o1.outport1", o1.outport1, o2SL.inport1);
    dag.addStream("o2SL.outport1", o2SL.outport1, o3SL.inport1, o4.inport1);
    StreamingContainerManager dnm = new StreamingContainerManager(dag, clock);
    PhysicalPlan plan = dnm.getPhysicalPlan();
    for (PTOperator oper : plan.getAllOperators().values()) {
        Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
        Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
    }
    PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
    PTOperator o2SLp1 = plan.getOperators(dag.getMeta(o2SL)).get(0);
    PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
    PTOperator o4p1 = plan.getOperators(dag.getMeta(o4)).get(0);
    Checkpoint leafCheckpoint = new Checkpoint(2L, 0, 0);
    clock.time = 3;
    o4p1.checkpoints.add(leafCheckpoint);
    UpdateCheckpointsContext ctx;
    dnm.updateRecoveryCheckpoints(o1p1, ctx = new UpdateCheckpointsContext(clock, true, Collections.<OperatorMeta, Set<OperatorMeta>>emptyMap()), false);
    Assert.assertEquals("initial checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("initial checkpoint " + o2SLp1, leafCheckpoint, o2SLp1.getRecoveryCheckpoint());
    Assert.assertEquals("initial checkpoint " + o3SLp1, new Checkpoint(clock.getTime(), 0, 0), o3SLp1.getRecoveryCheckpoint());
    Assert.assertEquals("number dependencies " + ctx.visited, plan.getAllOperators().size(), ctx.visited.size());
}
Also used : PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) Checkpoint(com.datatorrent.stram.api.Checkpoint) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) OperatorMeta(com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) UpdateCheckpointsContext(com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext) Test(org.junit.Test)

Example 17 with Checkpoint

use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.

the class CheckpointTest method testUpdateRecoveryCheckpoint.

@Test
public void testUpdateRecoveryCheckpoint() throws Exception {
    Clock clock = new SystemClock();
    dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
    GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
    GenericTestOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
    dag.addStream("o1.output1", o1.outport1, o2.inport1);
    dag.addStream("o2.output1", o2.outport1, o3SL.inport1);
    StreamingContainerManager dnm = new StreamingContainerManager(dag);
    PhysicalPlan plan = dnm.getPhysicalPlan();
    for (PTOperator oper : plan.getAllOperators().values()) {
        Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
        Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
    }
    List<PTOperator> nodes1 = plan.getOperators(dag.getMeta(o1));
    Assert.assertNotNull(nodes1);
    Assert.assertEquals(1, nodes1.size());
    PTOperator o1p1 = nodes1.get(0);
    PTOperator o2p1 = plan.getOperators(dag.getMeta(o2)).get(0);
    PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
    // recovery checkpoint won't update in deploy state
    for (PTOperator oper : plan.getAllOperators().values()) {
        Assert.assertEquals("", PTOperator.State.PENDING_DEPLOY, oper.getState());
    }
    dnm.updateRecoveryCheckpoints(o2p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("no checkpoints " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
    UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock);
    dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
    Assert.assertEquals("no checkpoints " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("number dependencies " + ctx.visited, 3, ctx.visited.size());
    // adding checkpoints to upstream only does not move recovery checkpoint
    Checkpoint cp3 = new Checkpoint(3L, 0, 0);
    Checkpoint cp5 = new Checkpoint(5L, 0, 0);
    Checkpoint cp4 = new Checkpoint(4L, 0, 0);
    o1p1.checkpoints.add(cp3);
    o1p1.checkpoints.add(cp5);
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
    o2p1.checkpoints.add(new Checkpoint(3L, 0, 0));
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
    // set leaf operator checkpoint
    dnm.addCheckpoint(o3SLp1, cp5);
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
    // set all operators as active to enable recovery window id update
    for (PTOperator oper : plan.getAllOperators().values()) {
        oper.setState(PTOperator.State.ACTIVE);
    }
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("checkpoint " + o2p1, cp3, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("checkpoint " + o3SLp1, cp5, o3SLp1.getRecoveryCheckpoint());
    Assert.assertNull("checkpoint null for stateless operator " + o3SLp1, o3SLp1.stats.checkpointStats);
    o2p1.checkpoints.add(cp4);
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals("checkpoint " + o2p1, cp4, o2p1.getRecoveryCheckpoint());
    o1p1.checkpoints.add(1, cp4);
    Assert.assertEquals(o1p1.checkpoints, getCheckpoints(3L, 4L, 5L));
    dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
    Assert.assertEquals("checkpoint " + o1p1, cp4, o1p1.getRecoveryCheckpoint());
    Assert.assertEquals(o1p1.checkpoints, getCheckpoints(4L, 5L));
    // out of sequence windowIds should be sorted
    dnm.addCheckpoint(o2p1, new Checkpoint(2L, 0, 0));
    Assert.assertEquals("add first", getCheckpoints(2L, 4L), o2p1.checkpoints);
    dnm.addCheckpoint(o2p1, new Checkpoint(3L, 0, 0));
    Assert.assertEquals("add middle", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
    dnm.addCheckpoint(o2p1, new Checkpoint(4L, 0, 0));
    Assert.assertEquals("ignore duplicate", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
    dnm.addCheckpoint(o2p1, new Checkpoint(5L, 0, 0));
    Assert.assertEquals("add latest", getCheckpoints(2L, 3L, 4L, 5L), o2p1.checkpoints);
}
Also used : PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) Checkpoint(com.datatorrent.stram.api.Checkpoint) SystemClock(org.apache.hadoop.yarn.util.SystemClock) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) Clock(org.apache.hadoop.yarn.util.Clock) SystemClock(org.apache.hadoop.yarn.util.SystemClock) UpdateCheckpointsContext(com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext) Test(org.junit.Test)

Example 18 with Checkpoint

use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.

the class CheckpointTest method testUpdateRecoveryCheckpointWithCycle.

@Test
public void testUpdateRecoveryCheckpointWithCycle() throws Exception {
    Clock clock = new SystemClock();
    dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    // Simulate a DAG with a loop which has a unifier operator
    TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
    GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);
    GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
    DefaultDelayOperator d = dag.addOperator("d", DefaultDelayOperator.class);
    dag.addStream("o1.output1", o1.outport, o2.inport1);
    dag.addStream("o2.output1", o2.outport1, o3.inport1);
    dag.addStream("o3.output1", o3.outport1, o4.inport1);
    dag.addStream("o4.output1", o4.outport1, d.input);
    dag.addStream("d.output", d.output, o2.inport2);
    dag.setOperatorAttribute(o3, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<Operator>(2));
    dag.validate();
    StreamingContainerManager dnm = new StreamingContainerManager(dag);
    PhysicalPlan plan = dnm.getPhysicalPlan();
    for (PTOperator oper : plan.getAllOperators().values()) {
        Assert.assertEquals("Initial activation windowId" + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
        Assert.assertEquals("Checkpoints empty" + oper, Collections.emptyList(), oper.checkpoints);
    }
    Checkpoint cp1 = new Checkpoint(1L, 0, 0);
    Checkpoint cp2 = new Checkpoint(2L, 0, 0);
    Map<OperatorMeta, Set<OperatorMeta>> checkpointGroups = dnm.getCheckpointGroups();
    Map<Integer, PTOperator> allOperators = plan.getAllOperators();
    for (PTOperator operator : allOperators.values()) {
        operator.setState(PTOperator.State.ACTIVE);
        operator.checkpoints.add(cp1);
        dnm.updateRecoveryCheckpoints(operator, new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
    }
    List<PTOperator> physicalO1 = plan.getOperators(dag.getOperatorMeta("o1"));
    physicalO1.get(0).checkpoints.add(cp2);
    dnm.updateRecoveryCheckpoints(physicalO1.get(0), new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
    Assert.assertEquals("Recovery checkpoint updated ", physicalO1.get(0).getRecoveryCheckpoint(), cp1);
}
Also used : GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) Operator(com.datatorrent.api.Operator) DefaultDelayOperator(com.datatorrent.common.util.DefaultDelayOperator) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) BaseOperator(com.datatorrent.common.util.BaseOperator) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) InputOperator(com.datatorrent.api.InputOperator) PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) Set(java.util.Set) SystemClock(org.apache.hadoop.yarn.util.SystemClock) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) OperatorMeta(com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) Clock(org.apache.hadoop.yarn.util.Clock) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Checkpoint(com.datatorrent.stram.api.Checkpoint) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) DefaultDelayOperator(com.datatorrent.common.util.DefaultDelayOperator) UpdateCheckpointsContext(com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext) Test(org.junit.Test)

Example 19 with Checkpoint

use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.

the class CheckpointTest method testBeforeCheckpointNotification.

@Test
public void testBeforeCheckpointNotification() throws IOException, ClassNotFoundException {
    FSStorageAgent storageAgent = new FSStorageAgent(testMeta.getPath(), null);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, storageAgent);
    dag.setAttribute(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 1);
    dag.setAttribute(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS, 50);
    MockInputOperator o1 = dag.addOperator("o1", new MockInputOperator());
    GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
    dag.setOperatorAttribute(o2, OperatorContext.STATELESS, true);
    dag.addStream("o1.outport", o1.outport, o2.inport1);
    StramLocalCluster sc = new StramLocalCluster(dag);
    sc.setHeartbeatMonitoringEnabled(false);
    sc.run();
    StreamingContainerManager dnm = sc.dnmgr;
    PhysicalPlan plan = dnm.getPhysicalPlan();
    List<PTOperator> o1ps = plan.getOperators(dag.getMeta(o1));
    Assert.assertEquals("Number partitions", 1, o1ps.size());
    PTOperator o1p1 = o1ps.get(0);
    long[] ckWIds = storageAgent.getWindowIds(o1p1.getId());
    Arrays.sort(ckWIds);
    int expectedState = 0;
    for (long windowId : ckWIds) {
        Object ckState = storageAgent.load(o1p1.getId(), windowId);
        Assert.assertEquals("Checkpointed state class", MockInputOperator.class, ckState.getClass());
        Assert.assertEquals("Checkpoint state", expectedState++, ((MockInputOperator) ckState).checkpointState);
    }
}
Also used : PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) FSStorageAgent(com.datatorrent.common.util.FSStorageAgent) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) Checkpoint(com.datatorrent.stram.api.Checkpoint) Test(org.junit.Test)

Example 20 with Checkpoint

use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.

the class GenericNodeTest method testCheckpointDistance.

private void testCheckpointDistance(int dagCheckPoint, int opCheckPoint) throws InterruptedException {
    int windowWidth = 50;
    long sleeptime = 25L;
    int maxWindows = 60;
    // Adding some extra time for the windows to finish
    long maxSleep = windowWidth * maxWindows + 5000;
    ScheduledExecutorService executorService = new ScheduledThreadPoolExecutor(1, "default");
    final WindowGenerator windowGenerator = new WindowGenerator(executorService, 1024);
    windowGenerator.setWindowWidth(windowWidth);
    windowGenerator.setFirstWindow(executorService.getCurrentTimeMillis());
    windowGenerator.setCheckpointCount(dagCheckPoint, 0);
    //GenericOperator go = new GenericOperator();
    CheckpointDistanceOperator go = new CheckpointDistanceOperator();
    go.maxWindows = maxWindows;
    List<Integer> checkpoints = new ArrayList<>();
    int window = 0;
    while (window < maxWindows) {
        window = (int) Math.ceil((double) (window + 1) / dagCheckPoint) * dagCheckPoint;
        window = (int) Math.ceil((double) window / opCheckPoint) * opCheckPoint;
        checkpoints.add(window);
    }
    final StreamContext stcontext = new StreamContext("s1");
    DefaultAttributeMap attrMap = new DefaultAttributeMap();
    attrMap.put(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, dagCheckPoint);
    attrMap.put(Context.OperatorContext.CHECKPOINT_WINDOW_COUNT, opCheckPoint);
    final OperatorContext context = new com.datatorrent.stram.engine.OperatorContext(0, "operator", attrMap, null);
    final GenericNode gn = new GenericNode(go, context);
    gn.setId(1);
    //DefaultReservoir reservoir1 = new DefaultReservoir("ip1Res", 1024);
    //DefaultReservoir reservoir2 = new DefaultReservoir("ip2Res", 1024);
    //gn.connectInputPort("ip1", reservoir1);
    //gn.connectInputPort("ip2", reservoir2);
    gn.connectInputPort("ip1", windowGenerator.acquireReservoir("ip1", 1024));
    gn.connectInputPort("ip2", windowGenerator.acquireReservoir("ip2", 1024));
    gn.connectOutputPort("op", Sink.BLACKHOLE);
    final AtomicBoolean ab = new AtomicBoolean(false);
    Thread t = new Thread() {

        @Override
        public void run() {
            gn.setup(context);
            windowGenerator.activate(stcontext);
            gn.activate();
            ab.set(true);
            gn.run();
            windowGenerator.deactivate();
            gn.deactivate();
            gn.teardown();
        }
    };
    t.start();
    long interval = 0;
    do {
        Thread.sleep(sleeptime);
        interval += sleeptime;
    } while ((go.numWindows < maxWindows) && (interval < maxSleep));
    Assert.assertEquals("Number distances", maxWindows, go.numWindows);
    int chkindex = 0;
    int nextCheckpoint = checkpoints.get(chkindex++);
    for (int i = 0; i < maxWindows; ++i) {
        if ((i + 1) > nextCheckpoint) {
            nextCheckpoint = checkpoints.get(chkindex++);
        }
        Assert.assertEquals("Windows from checkpoint for " + i, nextCheckpoint - i, (int) go.distances.get(i));
    }
    gn.shutdown();
    t.join();
}
Also used : ScheduledExecutorService(com.datatorrent.common.util.ScheduledExecutorService) ScheduledThreadPoolExecutor(com.datatorrent.common.util.ScheduledThreadPoolExecutor) ArrayList(java.util.ArrayList) Checkpoint(com.datatorrent.stram.api.Checkpoint) DefaultAttributeMap(com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean)

Aggregations

Checkpoint (com.datatorrent.stram.api.Checkpoint)28 PTOperator (com.datatorrent.stram.plan.physical.PTOperator)15 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)9 PhysicalPlan (com.datatorrent.stram.plan.physical.PhysicalPlan)9 Test (org.junit.Test)9 ContainerStats (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats)6 OperatorMeta (com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta)6 MemoryStorageAgent (com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent)6 OperatorStats (com.datatorrent.api.Stats.OperatorStats)5 UpdateCheckpointsContext (com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext)5 HashMap (java.util.HashMap)5 Map (java.util.Map)5 OperatorHeartbeat (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat)4 TestGeneratorInputOperator (com.datatorrent.stram.engine.TestGeneratorInputOperator)4 IOException (java.io.IOException)4 LinkedHashMap (java.util.LinkedHashMap)4 Operator (com.datatorrent.api.Operator)3 StatsListener (com.datatorrent.api.StatsListener)3 AsyncFSStorageAgent (com.datatorrent.common.util.AsyncFSStorageAgent)3 ContainerHeartbeatResponse (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse)3