use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.
the class CheckpointTest method testUpdateCheckpointsRecovery.
@Test
public void testUpdateCheckpointsRecovery() {
MockClock clock = new MockClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
dag.setAttribute(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS, 1);
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
StatelessOperator o2SL = dag.addOperator("o2SL", StatelessOperator.class);
StatelessOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
dag.addStream("o1.outport1", o1.outport1, o2SL.inport1);
dag.addStream("o2SL.outport1", o2SL.outport1, o3SL.inport1, o4.inport1);
StreamingContainerManager dnm = new StreamingContainerManager(dag, clock);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
}
PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
PTOperator o2SLp1 = plan.getOperators(dag.getMeta(o2SL)).get(0);
PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
PTOperator o4p1 = plan.getOperators(dag.getMeta(o4)).get(0);
Checkpoint leafCheckpoint = new Checkpoint(2L, 0, 0);
clock.time = 3;
o4p1.checkpoints.add(leafCheckpoint);
UpdateCheckpointsContext ctx;
dnm.updateRecoveryCheckpoints(o1p1, ctx = new UpdateCheckpointsContext(clock, true, Collections.<OperatorMeta, Set<OperatorMeta>>emptyMap()), false);
Assert.assertEquals("initial checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("initial checkpoint " + o2SLp1, leafCheckpoint, o2SLp1.getRecoveryCheckpoint());
Assert.assertEquals("initial checkpoint " + o3SLp1, new Checkpoint(clock.getTime(), 0, 0), o3SLp1.getRecoveryCheckpoint());
Assert.assertEquals("number dependencies " + ctx.visited, plan.getAllOperators().size(), ctx.visited.size());
}
use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.
the class CheckpointTest method testUpdateRecoveryCheckpoint.
@Test
public void testUpdateRecoveryCheckpoint() throws Exception {
Clock clock = new SystemClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
GenericTestOperator o3SL = dag.addOperator("o3SL", StatelessOperator.class);
dag.addStream("o1.output1", o1.outport1, o2.inport1);
dag.addStream("o2.output1", o2.outport1, o3SL.inport1);
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("activation windowId " + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("checkpoints " + oper, Collections.emptyList(), oper.checkpoints);
}
List<PTOperator> nodes1 = plan.getOperators(dag.getMeta(o1));
Assert.assertNotNull(nodes1);
Assert.assertEquals(1, nodes1.size());
PTOperator o1p1 = nodes1.get(0);
PTOperator o2p1 = plan.getOperators(dag.getMeta(o2)).get(0);
PTOperator o3SLp1 = plan.getOperators(dag.getMeta(o3SL)).get(0);
// recovery checkpoint won't update in deploy state
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("", PTOperator.State.PENDING_DEPLOY, oper.getState());
}
dnm.updateRecoveryCheckpoints(o2p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("no checkpoints " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock);
dnm.updateRecoveryCheckpoints(o1p1, ctx, false);
Assert.assertEquals("no checkpoints " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("number dependencies " + ctx.visited, 3, ctx.visited.size());
// adding checkpoints to upstream only does not move recovery checkpoint
Checkpoint cp3 = new Checkpoint(3L, 0, 0);
Checkpoint cp5 = new Checkpoint(5L, 0, 0);
Checkpoint cp4 = new Checkpoint(4L, 0, 0);
o1p1.checkpoints.add(cp3);
o1p1.checkpoints.add(cp5);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
o2p1.checkpoints.add(new Checkpoint(3L, 0, 0));
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
// set leaf operator checkpoint
dnm.addCheckpoint(o3SLp1, cp5);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, Checkpoint.INITIAL_CHECKPOINT, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, Checkpoint.INITIAL_CHECKPOINT, o2p1.getRecoveryCheckpoint());
// set all operators as active to enable recovery window id update
for (PTOperator oper : plan.getAllOperators().values()) {
oper.setState(PTOperator.State.ACTIVE);
}
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o3SLp1, cp5, o3SLp1.getRecoveryCheckpoint());
Assert.assertNull("checkpoint null for stateless operator " + o3SLp1, o3SLp1.stats.checkpointStats);
o2p1.checkpoints.add(cp4);
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp3, o1p1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + o2p1, cp4, o2p1.getRecoveryCheckpoint());
o1p1.checkpoints.add(1, cp4);
Assert.assertEquals(o1p1.checkpoints, getCheckpoints(3L, 4L, 5L));
dnm.updateRecoveryCheckpoints(o1p1, new UpdateCheckpointsContext(clock), false);
Assert.assertEquals("checkpoint " + o1p1, cp4, o1p1.getRecoveryCheckpoint());
Assert.assertEquals(o1p1.checkpoints, getCheckpoints(4L, 5L));
// out of sequence windowIds should be sorted
dnm.addCheckpoint(o2p1, new Checkpoint(2L, 0, 0));
Assert.assertEquals("add first", getCheckpoints(2L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(3L, 0, 0));
Assert.assertEquals("add middle", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(4L, 0, 0));
Assert.assertEquals("ignore duplicate", getCheckpoints(2L, 3L, 4L), o2p1.checkpoints);
dnm.addCheckpoint(o2p1, new Checkpoint(5L, 0, 0));
Assert.assertEquals("add latest", getCheckpoints(2L, 3L, 4L, 5L), o2p1.checkpoints);
}
use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.
the class CheckpointTest method testUpdateRecoveryCheckpointWithCycle.
@Test
public void testUpdateRecoveryCheckpointWithCycle() throws Exception {
Clock clock = new SystemClock();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
// Simulate a DAG with a loop which has a unifier operator
TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);
GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
DefaultDelayOperator d = dag.addOperator("d", DefaultDelayOperator.class);
dag.addStream("o1.output1", o1.outport, o2.inport1);
dag.addStream("o2.output1", o2.outport1, o3.inport1);
dag.addStream("o3.output1", o3.outport1, o4.inport1);
dag.addStream("o4.output1", o4.outport1, d.input);
dag.addStream("d.output", d.output, o2.inport2);
dag.setOperatorAttribute(o3, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<Operator>(2));
dag.validate();
StreamingContainerManager dnm = new StreamingContainerManager(dag);
PhysicalPlan plan = dnm.getPhysicalPlan();
for (PTOperator oper : plan.getAllOperators().values()) {
Assert.assertEquals("Initial activation windowId" + oper, Checkpoint.INITIAL_CHECKPOINT, oper.getRecoveryCheckpoint());
Assert.assertEquals("Checkpoints empty" + oper, Collections.emptyList(), oper.checkpoints);
}
Checkpoint cp1 = new Checkpoint(1L, 0, 0);
Checkpoint cp2 = new Checkpoint(2L, 0, 0);
Map<OperatorMeta, Set<OperatorMeta>> checkpointGroups = dnm.getCheckpointGroups();
Map<Integer, PTOperator> allOperators = plan.getAllOperators();
for (PTOperator operator : allOperators.values()) {
operator.setState(PTOperator.State.ACTIVE);
operator.checkpoints.add(cp1);
dnm.updateRecoveryCheckpoints(operator, new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
}
List<PTOperator> physicalO1 = plan.getOperators(dag.getOperatorMeta("o1"));
physicalO1.get(0).checkpoints.add(cp2);
dnm.updateRecoveryCheckpoints(physicalO1.get(0), new UpdateCheckpointsContext(clock, false, checkpointGroups), false);
Assert.assertEquals("Recovery checkpoint updated ", physicalO1.get(0).getRecoveryCheckpoint(), cp1);
}
use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.
the class CheckpointTest method testBeforeCheckpointNotification.
@Test
public void testBeforeCheckpointNotification() throws IOException, ClassNotFoundException {
FSStorageAgent storageAgent = new FSStorageAgent(testMeta.getPath(), null);
dag.setAttribute(OperatorContext.STORAGE_AGENT, storageAgent);
dag.setAttribute(LogicalPlan.CHECKPOINT_WINDOW_COUNT, 1);
dag.setAttribute(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS, 50);
MockInputOperator o1 = dag.addOperator("o1", new MockInputOperator());
GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
dag.setOperatorAttribute(o2, OperatorContext.STATELESS, true);
dag.addStream("o1.outport", o1.outport, o2.inport1);
StramLocalCluster sc = new StramLocalCluster(dag);
sc.setHeartbeatMonitoringEnabled(false);
sc.run();
StreamingContainerManager dnm = sc.dnmgr;
PhysicalPlan plan = dnm.getPhysicalPlan();
List<PTOperator> o1ps = plan.getOperators(dag.getMeta(o1));
Assert.assertEquals("Number partitions", 1, o1ps.size());
PTOperator o1p1 = o1ps.get(0);
long[] ckWIds = storageAgent.getWindowIds(o1p1.getId());
Arrays.sort(ckWIds);
int expectedState = 0;
for (long windowId : ckWIds) {
Object ckState = storageAgent.load(o1p1.getId(), windowId);
Assert.assertEquals("Checkpointed state class", MockInputOperator.class, ckState.getClass());
Assert.assertEquals("Checkpoint state", expectedState++, ((MockInputOperator) ckState).checkpointState);
}
}
use of com.datatorrent.stram.api.Checkpoint in project apex-core by apache.
the class GenericNodeTest method testCheckpointDistance.
private void testCheckpointDistance(int dagCheckPoint, int opCheckPoint) throws InterruptedException {
int windowWidth = 50;
long sleeptime = 25L;
int maxWindows = 60;
// Adding some extra time for the windows to finish
long maxSleep = windowWidth * maxWindows + 5000;
ScheduledExecutorService executorService = new ScheduledThreadPoolExecutor(1, "default");
final WindowGenerator windowGenerator = new WindowGenerator(executorService, 1024);
windowGenerator.setWindowWidth(windowWidth);
windowGenerator.setFirstWindow(executorService.getCurrentTimeMillis());
windowGenerator.setCheckpointCount(dagCheckPoint, 0);
//GenericOperator go = new GenericOperator();
CheckpointDistanceOperator go = new CheckpointDistanceOperator();
go.maxWindows = maxWindows;
List<Integer> checkpoints = new ArrayList<>();
int window = 0;
while (window < maxWindows) {
window = (int) Math.ceil((double) (window + 1) / dagCheckPoint) * dagCheckPoint;
window = (int) Math.ceil((double) window / opCheckPoint) * opCheckPoint;
checkpoints.add(window);
}
final StreamContext stcontext = new StreamContext("s1");
DefaultAttributeMap attrMap = new DefaultAttributeMap();
attrMap.put(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, dagCheckPoint);
attrMap.put(Context.OperatorContext.CHECKPOINT_WINDOW_COUNT, opCheckPoint);
final OperatorContext context = new com.datatorrent.stram.engine.OperatorContext(0, "operator", attrMap, null);
final GenericNode gn = new GenericNode(go, context);
gn.setId(1);
//DefaultReservoir reservoir1 = new DefaultReservoir("ip1Res", 1024);
//DefaultReservoir reservoir2 = new DefaultReservoir("ip2Res", 1024);
//gn.connectInputPort("ip1", reservoir1);
//gn.connectInputPort("ip2", reservoir2);
gn.connectInputPort("ip1", windowGenerator.acquireReservoir("ip1", 1024));
gn.connectInputPort("ip2", windowGenerator.acquireReservoir("ip2", 1024));
gn.connectOutputPort("op", Sink.BLACKHOLE);
final AtomicBoolean ab = new AtomicBoolean(false);
Thread t = new Thread() {
@Override
public void run() {
gn.setup(context);
windowGenerator.activate(stcontext);
gn.activate();
ab.set(true);
gn.run();
windowGenerator.deactivate();
gn.deactivate();
gn.teardown();
}
};
t.start();
long interval = 0;
do {
Thread.sleep(sleeptime);
interval += sleeptime;
} while ((go.numWindows < maxWindows) && (interval < maxSleep));
Assert.assertEquals("Number distances", maxWindows, go.numWindows);
int chkindex = 0;
int nextCheckpoint = checkpoints.get(chkindex++);
for (int i = 0; i < maxWindows; ++i) {
if ((i + 1) > nextCheckpoint) {
nextCheckpoint = checkpoints.get(chkindex++);
}
Assert.assertEquals("Windows from checkpoint for " + i, nextCheckpoint - i, (int) go.distances.get(i));
}
gn.shutdown();
t.join();
}
Aggregations