use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator.
/**
* Tests that the checkpoint coordinator is shut down if the execution graph is suspended.
*/
@Test
public void testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator() throws Exception {
final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
final ExecutionGraph graph = scheduler.getExecutionGraph();
final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
assertThat(checkpointCoordinator, Matchers.notNullValue());
assertThat(checkpointCoordinator.isShutdown(), is(false));
scheduler.closeAsync().get();
assertThat(graph.getState(), is(JobStatus.SUSPENDED));
assertThat(checkpointCoordinator.isShutdown(), is(true));
assertThat(counterShutdownFuture.get(), is(JobStatus.SUSPENDED));
assertThat(storeShutdownFuture.get(), is(JobStatus.SUSPENDED));
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph.
/**
* Tests that the checkpoint coordinator is shut down if the execution graph is failed.
*/
@Test
public void testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph() throws Exception {
final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
final ExecutionGraph graph = scheduler.getExecutionGraph();
final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
assertThat(checkpointCoordinator, Matchers.notNullValue());
assertThat(checkpointCoordinator.isShutdown(), is(false));
graph.failJob(new Exception("Test Exception"), System.currentTimeMillis());
scheduler.closeAsync().get();
assertThat(checkpointCoordinator.isShutdown(), is(true));
assertThat(counterShutdownFuture.get(), is(JobStatus.FAILED));
assertThat(storeShutdownFuture.get(), is(JobStatus.FAILED));
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class DefaultExecutionGraphDeploymentTest method testNoResourceAvailableFailure.
/**
* Tests that a blocking batch job fails if there are not enough resources left to schedule the
* succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
* swallow the fail exception when scheduling a consumer task.
*/
@Test
public void testNoResourceAvailableFailure() throws Exception {
JobVertex v1 = new JobVertex("source");
JobVertex v2 = new JobVertex("sink");
int dop1 = 2;
int dop2 = 2;
v1.setParallelism(dop1);
v2.setParallelism(dop2);
v1.setInvokableClass(BatchTask.class);
v2.setInvokableClass(BatchTask.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
final JobGraph graph = JobGraphTestUtils.batchJobGraph(v1, v2);
DirectScheduledExecutorService directExecutor = new DirectScheduledExecutorService();
// execution graph that executes actions synchronously
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(graph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(1))).setFutureExecutor(directExecutor).setBlobWriter(blobWriter).build();
final ExecutionGraph eg = scheduler.getExecutionGraph();
checkJobOffloaded((DefaultExecutionGraph) eg);
// schedule, this triggers mock deployment
scheduler.startScheduling();
ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptID, ExecutionState.RUNNING));
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptID, ExecutionState.FINISHED, null));
assertEquals(JobStatus.FAILED, eg.getState());
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class DefaultExecutionGraphDeploymentTest method testRegistrationOfExecutionsCanceled.
@Test
public void testRegistrationOfExecutionsCanceled() {
try {
final JobVertexID jid1 = new JobVertexID();
final JobVertexID jid2 = new JobVertexID();
JobVertex v1 = new JobVertex("v1", jid1);
JobVertex v2 = new JobVertex("v2", jid2);
SchedulerBase scheduler = setupScheduler(v1, 19, v2, 37);
Collection<Execution> executions = new ArrayList<>(scheduler.getExecutionGraph().getRegisteredExecutions().values());
for (Execution e : executions) {
e.cancel();
e.completeCancelling();
}
assertEquals(0, scheduler.getExecutionGraph().getRegisteredExecutions().size());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphPartitionReleaseTest method testStrategyNotifiedOfUnFinishedVertices.
@Test
public void testStrategyNotifiedOfUnFinishedVertices() throws Exception {
// setup a pipeline of 2 failover regions (f1 -> f2), where
// f1 is just a source
// f2 consists of 3 operators (o1,o2,o3), where o1 consumes f1, and o2/o3 consume o1
final JobVertex sourceVertex = ExecutionGraphTestUtils.createNoOpVertex("source", 1);
final JobVertex operator1Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator1", 1);
final JobVertex operator2Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator2", 1);
final JobVertex operator3Vertex = ExecutionGraphTestUtils.createNoOpVertex("operator3", 1);
operator1Vertex.connectNewDataSetAsInput(sourceVertex, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
operator2Vertex.connectNewDataSetAsInput(operator1Vertex, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
operator3Vertex.connectNewDataSetAsInput(operator1Vertex, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
// setup partition tracker to intercept partition release calls
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
final Queue<ResultPartitionID> releasedPartitions = new ArrayDeque<>();
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionIds -> releasedPartitions.add(partitionIds.iterator().next()));
final SchedulerBase scheduler = createScheduler(partitionTracker, sourceVertex, operator1Vertex, operator2Vertex, operator3Vertex);
final ExecutionGraph executionGraph = scheduler.getExecutionGraph();
mainThreadExecutor.execute(() -> {
final Execution sourceExecution = getCurrentExecution(sourceVertex, executionGraph);
// finish the source; this should not result in any release calls since the
// consumer o1 was not finished
scheduler.updateTaskExecutionState(new TaskExecutionState(sourceExecution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator1Execution = getCurrentExecution(operator1Vertex, executionGraph);
// release calls since not all operators of the pipelined region are finished
for (final IntermediateResultPartitionID partitionId : operator1Execution.getVertex().getProducedPartitions().keySet()) {
scheduler.notifyPartitionDataAvailable(new ResultPartitionID(partitionId, operator1Execution.getAttemptId()));
}
scheduler.updateTaskExecutionState(new TaskExecutionState(operator1Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator2Execution = getCurrentExecution(operator2Vertex, executionGraph);
// finish o2; this should not result in any release calls since o3 was not
// finished
scheduler.updateTaskExecutionState(new TaskExecutionState(operator2Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator2Execution = getCurrentExecution(operator2Vertex, executionGraph);
// reset o2
operator2Execution.getVertex().resetForNewExecution();
assertThat(releasedPartitions, empty());
});
mainThreadExecutor.execute(() -> {
final Execution operator3Execution = getCurrentExecution(operator3Vertex, executionGraph);
// finish o3; this should not result in any release calls since o2 was reset
scheduler.updateTaskExecutionState(new TaskExecutionState(operator3Execution.getAttemptId(), ExecutionState.FINISHED));
assertThat(releasedPartitions, empty());
});
}
Aggregations