use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class RescalePartitionerTest method testExecutionGraphGeneration.
@Test
public void testExecutionGraphGeneration() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// get input data
DataStream<String> text = env.addSource(new ParallelSourceFunction<String>() {
private static final long serialVersionUID = 7772338606389180774L;
@Override
public void run(SourceContext<String> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).setParallelism(2);
DataStream<Tuple2<String, Integer>> counts = text.rescale().flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = -5255930322161596829L;
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
}
});
counts.rescale().print().setParallelism(2);
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
JobVertex sourceVertex = jobVertices.get(0);
JobVertex mapVertex = jobVertices.get(1);
JobVertex sinkVertex = jobVertices.get(2);
assertEquals(2, sourceVertex.getParallelism());
assertEquals(4, mapVertex.getParallelism());
assertEquals(2, sinkVertex.getParallelism());
ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setVertexParallelismStore(SchedulerBase.computeVertexParallelismStore(jobGraph)).build();
try {
eg.attachJobGraph(jobVertices);
} catch (JobException e) {
e.printStackTrace();
fail("Building ExecutionGraph failed: " + e.getMessage());
}
ExecutionJobVertex execSourceVertex = eg.getJobVertex(sourceVertex.getID());
ExecutionJobVertex execMapVertex = eg.getJobVertex(mapVertex.getID());
ExecutionJobVertex execSinkVertex = eg.getJobVertex(sinkVertex.getID());
assertEquals(0, execSourceVertex.getInputs().size());
assertEquals(1, execMapVertex.getInputs().size());
assertEquals(4, execMapVertex.getParallelism());
ExecutionVertex[] mapTaskVertices = execMapVertex.getTaskVertices();
// verify that we have each parallel input partition exactly twice, i.e. that one source
// sends to two unique mappers
Map<Integer, Integer> mapInputPartitionCounts = new HashMap<>();
for (ExecutionVertex mapTaskVertex : mapTaskVertices) {
assertEquals(1, mapTaskVertex.getNumberOfInputs());
assertEquals(1, mapTaskVertex.getConsumedPartitionGroup(0).size());
IntermediateResultPartitionID consumedPartitionId = mapTaskVertex.getConsumedPartitionGroup(0).getFirst();
assertEquals(sourceVertex.getID(), mapTaskVertex.getExecutionGraphAccessor().getResultPartitionOrThrow(consumedPartitionId).getProducer().getJobvertexId());
int inputPartition = consumedPartitionId.getPartitionNumber();
if (!mapInputPartitionCounts.containsKey(inputPartition)) {
mapInputPartitionCounts.put(inputPartition, 1);
} else {
mapInputPartitionCounts.put(inputPartition, mapInputPartitionCounts.get(inputPartition) + 1);
}
}
assertEquals(2, mapInputPartitionCounts.size());
for (int count : mapInputPartitionCounts.values()) {
assertEquals(2, count);
}
assertEquals(1, execSinkVertex.getInputs().size());
assertEquals(2, execSinkVertex.getParallelism());
ExecutionVertex[] sinkTaskVertices = execSinkVertex.getTaskVertices();
InternalExecutionGraphAccessor executionGraphAccessor = execSinkVertex.getGraph();
// verify each sink instance has two inputs from the map and that each map subpartition
// only occurs in one unique input edge
Set<Integer> mapSubpartitions = new HashSet<>();
for (ExecutionVertex sinkTaskVertex : sinkTaskVertices) {
assertEquals(1, sinkTaskVertex.getNumberOfInputs());
assertEquals(2, sinkTaskVertex.getConsumedPartitionGroup(0).size());
for (IntermediateResultPartitionID consumedPartitionId : sinkTaskVertex.getConsumedPartitionGroup(0)) {
IntermediateResultPartition consumedPartition = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId);
assertEquals(mapVertex.getID(), consumedPartition.getProducer().getJobvertexId());
int partitionNumber = consumedPartition.getPartitionNumber();
assertFalse(mapSubpartitions.contains(partitionNumber));
mapSubpartitions.add(partitionNumber);
}
}
assertEquals(4, mapSubpartitions.size());
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testTriggerAndConfirmSimpleCheckpoint.
@Test
public void testTriggerAndConfirmSimpleCheckpoint() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(graph);
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
// validate that we have a pending checkpoint
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(1, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
long checkpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().get(checkpointId);
assertNotNull(checkpoint);
assertEquals(checkpointId, checkpoint.getCheckpointId());
assertEquals(graph.getJobID(), checkpoint.getJobId());
assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks());
assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(0, checkpoint.getOperatorStates().size());
assertFalse(checkpoint.isDisposed());
assertFalse(checkpoint.areTasksFullyAcknowledged());
// check that the vertices received the trigger checkpoint message
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
}
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorSubtaskState subtaskState1 = mock(OperatorSubtaskState.class);
OperatorSubtaskState subtaskState2 = mock(OperatorSubtaskState.class);
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot(singletonMap(opID1, subtaskState1));
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot(singletonMap(opID2, subtaskState2));
// acknowledge from one of the tasks
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks());
assertFalse(checkpoint.isDisposed());
assertFalse(checkpoint.areTasksFullyAcknowledged());
verify(subtaskState2, times(1)).registerSharedStates(any(SharedStateRegistry.class), eq(checkpointId));
// acknowledge the same task again (should not matter)
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
assertFalse(checkpoint.isDisposed());
assertFalse(checkpoint.areTasksFullyAcknowledged());
verify(subtaskState2, times(2)).registerSharedStates(any(SharedStateRegistry.class), eq(checkpointId));
// acknowledge the other task.
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1), TASK_MANAGER_LOCATION_INFO);
// the checkpoint is internally converted to a successful checkpoint and the
// pending checkpoint object is disposed
assertTrue(checkpoint.isDisposed());
// the now we should have a completed checkpoint
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
// the canceler should be removed now
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// validate that the subtasks states have registered their shared states.
{
verify(subtaskState1, times(1)).registerSharedStates(any(SharedStateRegistry.class), eq(checkpointId));
verify(subtaskState2, times(2)).registerSharedStates(any(SharedStateRegistry.class), eq(checkpointId));
}
// validate that the relevant tasks got a confirmation message
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId, gateway.getOnlyNotifiedCompletedCheckpoint(attemptId).checkpointId);
}
CompletedCheckpoint success = checkpointCoordinator.getSuccessfulCheckpoints().get(0);
assertEquals(graph.getJobID(), success.getJobId());
assertEquals(checkpoint.getCheckpointId(), success.getCheckpointID());
assertEquals(2, success.getOperatorStates().size());
// ---------------
// trigger another checkpoint and see that this one replaces the other checkpoint
// ---------------
gateway.resetCount();
checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
long checkpointIdNew = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointIdNew), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointIdNew), TASK_MANAGER_LOCATION_INFO);
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
CompletedCheckpoint successNew = checkpointCoordinator.getSuccessfulCheckpoints().get(0);
assertEquals(graph.getJobID(), successNew.getJobId());
assertEquals(checkpointIdNew, successNew.getCheckpointID());
assertEquals(2, successNew.getOperatorStates().size());
assertTrue(successNew.getOperatorStates().values().stream().allMatch(this::hasNoSubState));
// validate that the relevant tasks got a confirmation message
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointIdNew, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
assertEquals(checkpointIdNew, gateway.getOnlyNotifiedCompletedCheckpoint(attemptId).checkpointId);
}
checkpointCoordinator.shutdown();
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testReportLatestCompletedCheckpointIdWithAbort.
@Test
public void testReportLatestCompletedCheckpointIdWithAbort() throws Exception {
JobVertexID jobVertexID = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTransitToRunning(false).build();
ExecutionVertex task = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
AtomicLong reportedCheckpointId = new AtomicLong(-1);
LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {
@Override
public void notifyCheckpointAborted(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long latestCompletedCheckpointId, long timestamp) {
reportedCheckpointId.set(latestCompletedCheckpointId);
}
}).createTestingLogicalSlot();
ExecutionGraphTestUtils.setVertexResource(task, slot);
task.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).setAllowCheckpointsAfterTasksFinished(true).build();
// Trigger a successful checkpoint
CompletableFuture<CompletedCheckpoint> result = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
long completedCheckpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), task.getCurrentExecutionAttempt().getAttemptId(), completedCheckpointId, new CheckpointMetrics(), new TaskStateSnapshot()), "localhost");
assertTrue(result.isDone());
assertFalse(result.isCompletedExceptionally());
result = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
long abortedCheckpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), task.getCurrentExecutionAttempt().getAttemptId(), abortedCheckpointId, new CheckpointException(CHECKPOINT_EXPIRED)), "localhost");
assertTrue(result.isCompletedExceptionally());
assertEquals(completedCheckpointId, reportedCheckpointId.get());
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testCheckpointTriggeredAfterSomeTasksFinishedIfAllowed.
@Test
public void testCheckpointTriggeredAfterSomeTasksFinishedIfAllowed() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, 3, 256).addJobVertex(jobVertexID2, 3, 256).build();
ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
jobVertex1.getTaskVertices()[0].getCurrentExecutionAttempt().markFinished();
jobVertex1.getTaskVertices()[1].getCurrentExecutionAttempt().markFinished();
jobVertex2.getTaskVertices()[1].getCurrentExecutionAttempt().markFinished();
CheckpointStatsTracker statsTracker = new CheckpointStatsTracker(Integer.MAX_VALUE, new UnregisteredMetricsGroup());
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).setAllowCheckpointsAfterTasksFinished(true).setCheckpointStatsTracker(statsTracker).build();
// nothing should be happening
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
// trigger the first checkpoint. this will not fail because we allow checkpointing even with
// finished tasks
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertFalse(checkpointFuture.isDone());
assertFalse(checkpointFuture.isCompletedExceptionally());
// Triggering should succeed
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
PendingCheckpoint pendingCheckpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
AbstractCheckpointStats checkpointStats = statsTracker.createSnapshot().getHistory().getCheckpointById(pendingCheckpoint.getCheckpointID());
assertEquals(3, checkpointStats.getNumberOfAcknowledgedSubtasks());
for (ExecutionVertex task : Arrays.asList(jobVertex1.getTaskVertices()[0], jobVertex1.getTaskVertices()[1], jobVertex2.getTaskVertices()[1])) {
// those tasks that are already finished are automatically marked as acknowledged
assertNotNull(checkpointStats.getTaskStateStats(task.getJobvertexId()).getSubtaskStats()[task.getParallelSubtaskIndex()]);
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.
the class CheckpointCoordinatorTest method testBaseLocationsNotInitialized.
@Test
public void testBaseLocationsNotInitialized() throws Exception {
File checkpointDir = tmpFolder.newFolder();
JobVertexID jobVertexID = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTransitToRunning(false).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(Long.MAX_VALUE).build()).setCheckpointStorage(new FsStateBackend(checkpointDir.toURI())).build();
Path jobCheckpointPath = new Path(checkpointDir.getAbsolutePath(), graph.getJobID().toString());
FileSystem fs = FileSystem.get(checkpointDir.toURI());
// directory will not be created if checkpointing is disabled
Assert.assertFalse(fs.exists(jobCheckpointPath));
}
Aggregations