use of com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext in project apex-core by apache.
the class CheckpointTest method testUpdateRecoveryCheckpoint.
@Test
public void testUpdateRecoveryCheckpoint() throws Exception {
Clock clock = new SystemClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
GenericTestOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
dag.addStream("o1.output1", o1.outport1, o2.inport1);
dag.addStream("o2.output1", o2.outport1, o3SL.inport1);
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
}
List<PTOperator> nodes1 = plan.getOperators(dag.getMeta(o1));
Assert.assertNotNull(nodes1);
Assert.assertEquals(1, nodes1.size());
PTOperator o1p1 = nodes1.get(0);
PTOperator o2p1 = plan.getOperators(dag.getMeta(o2)).get(0);
PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
// recovery checkpoint won't update in deploy state
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("", PTOperator.State.PENDING_DEPLOY, oper.getState());
}
dnm.updateRecoveryCheckpoints(o2p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("no checkpoints " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("no checkpoints " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("number dependencies " + ctx.visited, 3, ctx.visited.size());
// adding checkpoints to upstream only does not move recovery checkpoint
Checkpoint cp3 = new Checkpoint(3L, 0, 0);
Checkpoint cp5 = new Checkpoint(5L, 0, 0);
Checkpoint cp4 = new Checkpoint(4L, 0, 0);
o1p1.checkpoints.add(cp3);
o1p1.checkpoints.add(cp5);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
o2p1.checkpoints.add(new Checkpoint(3L, 0, 0));
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
// set leaf operator checkpoint
dnm.addCheckpoint(o3SLp1, cp5);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
// set all operators as active to enable recovery window id update
for (PTOperator oper : plan.getAllOperators().values()) {
oper.setState(PTOperator.State.ACTIVE);
}
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o3SLp1, cp5, o3SLp1.getRecoveryCheckpoint());
Assert.assertNull("checkpoint null for stateless operator " + o3SLp1, o3SLp1.stats.checkpointStats);
o2p1.checkpoints.add(cp4);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, cp4, o2p1.getRecoveryCheckpoint());
o1p1.checkpoints.add(1, cp4);
Assert.assertEquals(o1p1.checkpoints, getCheckpoints(3L, 4L, 5L));
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp4, o1p1.getRecoveryCheckpoint());
Assert.assertEquals(o1p1.checkpoints, getCheckpoints(4L, 5L));
// out of sequence windowIds should be sorted
dnm.addCheckpoint(o2p1, new Checkpoint(2L, 0, 0));
Assert.assertEquals("add first", getCheckpoints(2L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(3L, 0, 0));
Assert.assertEquals("add middle", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(4L, 0, 0));
Assert.assertEquals("ignore duplicate", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(5L, 0, 0));
Assert.assertEquals("add latest", getCheckpoints(2L, 3L, 4L, 5L), o2p1.checkpoints);
}
use of com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext in project apex-core by apache.
the class CheckpointTest method testUpdateCheckpointsProcessingTimeout.
@Test
public void testUpdateCheckpointsProcessingTimeout() {
MockClock clock = new MockClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
dag.addStream("o1.outport1", o1.outport1, o2.inport1);
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
// set all operators as active to enable recovery window id update
for (PTOperator oper : plan.getAllOperators().values()) {
oper.setState(PTOperator.State.ACTIVE);
}
List<PTOperator> partitions = plan.getOperators(dag.getMeta(o1));
Assert.assertNotNull(partitions);
Assert.assertEquals(1, partitions.size());
PTOperator o1p1 = partitions.get(0);
partitions = plan.getOperators(dag.getMeta(o2));
Assert.assertNotNull(partitions);
Assert.assertEquals(1, partitions.size());
PTOperator o2p1 = partitions.get(0);
UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertTrue("no blocked operators", ctx.blocked.isEmpty());
o1p1.stats.statsRevs.checkout();
o1p1.stats.currentWindowId.set(1);
o1p1.stats.lastWindowIdChangeTms = 1;
o1p1.stats.statsRevs.commit();
clock.time = o1p1.stats.windowProcessingTimeoutMillis + 1;
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("o2 blocked", Sets.newHashSet(o2p1), ctx.blocked);
// assign future activation window (state-less or at-most-once).
Checkpoint cp2 = o2p1.getRecoveryCheckpoint();
o2p1.setRecoveryCheckpoint(new Checkpoint(o1p1.getRecoveryCheckpoint().windowId + 1, cp2.applicationWindowCount, cp2.checkpointWindowCount));
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("no operators blocked (o2 activation window ahead)", Sets.newHashSet(), ctx.blocked);
// reset to blocked
o2p1.setRecoveryCheckpoint(cp2);
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("o2 blocked", Sets.newHashSet(o2p1), ctx.blocked);
clock.time++;
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("operators blocked", Sets.newHashSet(o1p1, o2p1), ctx.blocked);
o2p1.stats.statsRevs.checkout();
o2p1.stats.currentWindowId.set(o1p1.stats.getCurrentWindowId());
o2p1.stats.statsRevs.commit();
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("operators blocked", Sets.newHashSet(o1p1), ctx.blocked);
clock.time--;
ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("operators blocked", Sets.newHashSet(), ctx.blocked);
}
use of com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext in project apex-core by apache.
the class CheckpointTest method testUpdateRecoveryCheckpointWithCycle.
@Test
public void testUpdateRecoveryCheckpointWithCycle() throws Exception {
Clock clock = new SystemClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
// Simulate a DAG with a loop which has a unifier operator
TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);
GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
DefaultDelayOperator d = dag.addOperator("d", DefaultDelayOperator.class);
dag.addStream("o1.output1", o1.outport, o2.inport1);
dag.addStream("o2.output1", o2.outport1, o3.inport1);
dag.addStream("o3.output1", o3.outport1, o4.inport1);
dag.addStream("o4.output1", o4.outport1, d.input);
dag.addStream("d.output", d.output, o2.inport2);
dag.setOperatorAttribute(o3, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<Operator>(2));
dag.validate();
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("Initial activation windowId" + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("Checkpoints empty" + oper, Collections.emptyList(), oper.checkpoints);
}
Checkpoint cp1 = new Checkpoint(1L, 0, 0);
Checkpoint cp2 = new Checkpoint(2L, 0, 0);
Map<OperatorMeta, Set<OperatorMeta>> checkpointGroups = dnm.getCheckpointGroups();
Map<Integer, PTOperator> allOperators = plan.getAllOperators();
for (PTOperator operator : allOperators.values()) {
operator.setState(PTOperator.State.ACTIVE);
operator.checkpoints.add(cp1);
dnm.updateRecoveryCheckpoints(operator, new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
}
List<PTOperator> physicalO1 = plan.getOperators(dag.getOperatorMeta("o1"));
physicalO1.get(0).checkpoints.add(cp2);
dnm.updateRecoveryCheckpoints(physicalO1.get(0), new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
Assert.assertEquals("Recovery checkpoint updated ", physicalO1.get(0).getRecoveryCheckpoint(), cp1);
}
use of com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext in project apex-core by apache.
the class CheckpointTest method testUpdateCheckpointsRecovery.
@Test
public void testUpdateCheckpointsRecovery() {
MockClock clock = new MockClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
dag.setAttribute(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS, 1);
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
StatelessOperator o2SL = dag.addOperator("o2SL", StatelessOperator.class);
StatelessOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
dag.addStream("o1.outport1", o1.outport1, o2SL.inport1);
dag.addStream("o2SL.outport1", o2SL.outport1, o3SL.inport1, o4.inport1);
StreamingContainerManager dnm = new StreamingContainerManager(dag, clock);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
}
PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
PTOperator o2SLp1 = plan.getOperators(dag.getMeta(o2SL)).get(0);
PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
PTOperator o4p1 = plan.getOperators(dag.getMeta(o4)).get(0);
Checkpoint leafCheckpoint = new Checkpoint(2L, 0, 0);
clock.time = 3;
o4p1.checkpoints.add(leafCheckpoint);
UpdateCheckpointsContext ctx;
dnm.updateRecoveryCheckpoints(o1p1, ctx = new UpdateCheckpointsContext(clock, true, Collections.<OperatorMeta, Set<OperatorMeta>>emptyMap()), false);
Assert.assertEquals("initial checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("initial checkpoint " + o2SLp1, leafCheckpoint, o2SLp1.getRecoveryCheckpoint());
Assert.assertEquals("initial checkpoint " + o3SLp1, new Checkpoint(clock.getTime(), 0, 0), o3SLp1.getRecoveryCheckpoint());
Assert.assertEquals("number dependencies " + ctx.visited, plan.getAllOperators().size(), ctx.visited.size());
}
use of com.datatorrent.stram.StreamingContainerManager.UpdateCheckpointsContext in project apex-core by apache.
the class DelayOperatorTest method testCheckpointUpdate.
@Test
public void testCheckpointUpdate() {
LogicalPlan dag = StramTestSupport.createDAG(testMeta);
TestGeneratorInputOperator opA = dag.addOperator("A", TestGeneratorInputOperator.class);
GenericTestOperator opB = dag.addOperator("B", GenericTestOperator.class);
GenericTestOperator opC = dag.addOperator("C", GenericTestOperator.class);
GenericTestOperator opD = dag.addOperator("D", GenericTestOperator.class);
DefaultDelayOperator<Object> opDelay = dag.addOperator("opDelay", new DefaultDelayOperator<>());
dag.addStream("AtoB", opA.outport, opB.inport1);
dag.addStream("BtoC", opB.outport1, opC.inport1);
dag.addStream("CtoD", opC.outport1, opD.inport1);
dag.addStream("CtoDelay", opC.outport2, opDelay.input);
dag.addStream("DelayToB", opDelay.output, opB.inport2);
dag.validate();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
StreamingContainerManager scm = new StreamingContainerManager(dag);
PhysicalPlan plan = scm.getPhysicalPlan();
// set all operators as active to enable recovery window id update
for (PTOperator oper : plan.getAllOperators().values()) {
oper.setState(PTOperator.State.ACTIVE);
}
Clock clock = new SystemClock();
PTOperator opA1 = plan.getOperators(dag.getMeta(opA)).get(0);
PTOperator opB1 = plan.getOperators(dag.getMeta(opB)).get(0);
PTOperator opC1 = plan.getOperators(dag.getMeta(opC)).get(0);
PTOperator opDelay1 = plan.getOperators(dag.getMeta(opDelay)).get(0);
PTOperator opD1 = plan.getOperators(dag.getMeta(opD)).get(0);
Checkpoint cp3 = new Checkpoint(3L, 0, 0);
Checkpoint cp5 = new Checkpoint(5L, 0, 0);
Checkpoint cp4 = new Checkpoint(4L, 0, 0);
opB1.checkpoints.add(cp3);
opC1.checkpoints.add(cp3);
opC1.checkpoints.add(cp4);
opDelay1.checkpoints.add(cp3);
opDelay1.checkpoints.add(cp5);
opD1.checkpoints.add(cp5);
// construct grouping that would be supplied through LogicalPlan
Set<OperatorMeta> stronglyConnected = Sets.newHashSet(dag.getMeta(opB), dag.getMeta(opC), dag.getMeta(opDelay));
Map<OperatorMeta, Set<OperatorMeta>> groups = new HashMap<>();
for (OperatorMeta om : stronglyConnected) {
groups.put(om, stronglyConnected);
}
UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock, false, groups);
scm.updateRecoveryCheckpoints(opB1, ctx, false);
Assert.assertEquals("checkpoint " + opA1, Checkpoint.INITIAL_CHECKPOINT, opA1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opB1, cp3, opC1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opC1, cp3, opC1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opD1, cp5, opD1.getRecoveryCheckpoint());
}
Aggregations