Search in sources :

Example 41 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class DefaultCheckpointPlanCalculatorTest method runSingleTest.

private void runSingleTest(List<VertexDeclaration> vertexDeclarations, List<EdgeDeclaration> edgeDeclarations, List<TaskDeclaration> expectedToTriggerTaskDeclarations, List<TaskDeclaration> expectedFinishedTaskDeclarations) throws Exception {
    ExecutionGraph graph = createExecutionGraph(vertexDeclarations, edgeDeclarations);
    DefaultCheckpointPlanCalculator planCalculator = createCheckpointPlanCalculator(graph);
    List<TaskDeclaration> expectedRunningTaskDeclarations = new ArrayList<>();
    List<ExecutionJobVertex> expectedFullyFinishedJobVertices = new ArrayList<>();
    expectedFinishedTaskDeclarations.forEach(finishedDeclaration -> {
        ExecutionJobVertex jobVertex = chooseJobVertex(graph, finishedDeclaration.vertexIndex);
        expectedRunningTaskDeclarations.add(new TaskDeclaration(finishedDeclaration.vertexIndex, minus(range(0, jobVertex.getParallelism()), finishedDeclaration.subtaskIndices)));
        if (finishedDeclaration.subtaskIndices.size() == jobVertex.getParallelism()) {
            expectedFullyFinishedJobVertices.add(jobVertex);
        }
    });
    List<ExecutionVertex> expectedRunningTasks = chooseTasks(graph, expectedRunningTaskDeclarations.toArray(new TaskDeclaration[0]));
    List<Execution> expectedFinishedTasks = chooseTasks(graph, expectedFinishedTaskDeclarations.toArray(new TaskDeclaration[0])).stream().map(ExecutionVertex::getCurrentExecutionAttempt).collect(Collectors.toList());
    List<ExecutionVertex> expectedToTriggerTasks = chooseTasks(graph, expectedToTriggerTaskDeclarations.toArray(new TaskDeclaration[0]));
    // Tests computing checkpoint plan(isUnalignedCheckpoint flag doesn't influence on result
    // because all tasks are in RUNNING state here).
    CheckpointPlan checkpointPlan = planCalculator.calculateCheckpointPlan().get();
    checkCheckpointPlan(expectedToTriggerTasks, expectedRunningTasks, expectedFinishedTasks, expectedFullyFinishedJobVertices, checkpointPlan);
}
Also used : Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ArrayList(java.util.ArrayList) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex)

Example 42 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class DefaultCheckpointPlanCalculatorTest method createExecutionGraph.

private ExecutionGraph createExecutionGraph(List<VertexDeclaration> vertexDeclarations, List<EdgeDeclaration> edgeDeclarations) throws Exception {
    JobVertex[] jobVertices = new JobVertex[vertexDeclarations.size()];
    for (int i = 0; i < vertexDeclarations.size(); ++i) {
        jobVertices[i] = ExecutionGraphTestUtils.createJobVertex(vertexName(i), vertexDeclarations.get(i).parallelism, NoOpInvokable.class);
    }
    for (EdgeDeclaration edgeDeclaration : edgeDeclarations) {
        jobVertices[edgeDeclaration.target].connectNewDataSetAsInput(jobVertices[edgeDeclaration.source], edgeDeclaration.distributionPattern, ResultPartitionType.PIPELINED);
    }
    ExecutionGraph graph = ExecutionGraphTestUtils.createSimpleTestGraph(jobVertices);
    graph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
    graph.transitionToRunning();
    graph.getAllExecutionVertices().forEach(task -> task.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING));
    for (int i = 0; i < vertexDeclarations.size(); ++i) {
        JobVertexID jobVertexId = jobVertices[i].getID();
        vertexDeclarations.get(i).finishedSubtaskIndices.forEach(index -> {
            graph.getJobVertex(jobVertexId).getTaskVertices()[index].getCurrentExecutionAttempt().markFinished();
        });
    }
    return graph;
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph)

Example 43 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator.

/**
 * Tests that the checkpoint coordinator is shut down if the execution graph is suspended.
 */
@Test
public void testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator() throws Exception {
    final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
    CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
    final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
    CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
    final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
    final ExecutionGraph graph = scheduler.getExecutionGraph();
    final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
    assertThat(checkpointCoordinator, Matchers.notNullValue());
    assertThat(checkpointCoordinator.isShutdown(), is(false));
    scheduler.closeAsync().get();
    assertThat(graph.getState(), is(JobStatus.SUSPENDED));
    assertThat(checkpointCoordinator.isShutdown(), is(true));
    assertThat(counterShutdownFuture.get(), is(JobStatus.SUSPENDED));
    assertThat(storeShutdownFuture.get(), is(JobStatus.SUSPENDED));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Example 44 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph.

/**
 * Tests that the checkpoint coordinator is shut down if the execution graph is failed.
 */
@Test
public void testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph() throws Exception {
    final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
    CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
    final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
    CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
    final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
    final ExecutionGraph graph = scheduler.getExecutionGraph();
    final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
    assertThat(checkpointCoordinator, Matchers.notNullValue());
    assertThat(checkpointCoordinator.isShutdown(), is(false));
    graph.failJob(new Exception("Test Exception"), System.currentTimeMillis());
    scheduler.closeAsync().get();
    assertThat(checkpointCoordinator.isShutdown(), is(true));
    assertThat(counterShutdownFuture.get(), is(JobStatus.FAILED));
    assertThat(storeShutdownFuture.get(), is(JobStatus.FAILED));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Example 45 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class FailoverStrategyCheckpointCoordinatorTest method testAbortPendingCheckpointsWithTriggerValidation.

/**
 * Tests that {@link CheckpointCoordinator#abortPendingCheckpoints(CheckpointException)} called
 * on job failover could handle the {@code currentPeriodicTrigger} null case well.
 */
@Test
public void testAbortPendingCheckpointsWithTriggerValidation() throws Exception {
    final int maxConcurrentCheckpoints = ThreadLocalRandom.current().nextInt(10) + 1;
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).setTransitToRunning(false).build();
    CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(Integer.MAX_VALUE, Integer.MAX_VALUE, 0, maxConcurrentCheckpoints, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0);
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinator(graph.getJobID(), checkpointCoordinatorConfiguration, Collections.emptyList(), new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), new CheckpointsCleaner(), manualThreadExecutor, mock(CheckpointFailureManager.class), new DefaultCheckpointPlanCalculator(graph.getJobID(), new ExecutionGraphCheckpointPlanCalculatorContext(graph), graph.getVerticesTopologically(), false), new ExecutionAttemptMappingProvider(graph.getAllExecutionVertices()), mock(CheckpointStatsTracker.class));
    // switch current execution's state to running to allow checkpoint could be triggered.
    graph.transitionToRunning();
    graph.getAllExecutionVertices().forEach(task -> task.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING));
    checkpointCoordinator.startCheckpointScheduler();
    assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
    // only trigger the periodic scheduling
    // we can't trigger all scheduled task, because there is also a cancellation scheduled
    manualThreadExecutor.triggerPeriodicScheduledTasks();
    manualThreadExecutor.triggerAll();
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    for (int i = 1; i < maxConcurrentCheckpoints; i++) {
        checkpointCoordinator.triggerCheckpoint(false);
        manualThreadExecutor.triggerAll();
        assertEquals(i + 1, checkpointCoordinator.getNumberOfPendingCheckpoints());
        assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
    }
    // as we only support limited concurrent checkpoints, after checkpoint triggered more than
    // the limits,
    // the currentPeriodicTrigger would been assigned as null.
    checkpointCoordinator.triggerCheckpoint(false);
    manualThreadExecutor.triggerAll();
    assertEquals(maxConcurrentCheckpoints, checkpointCoordinator.getNumberOfPendingCheckpoints());
    checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(CheckpointFailureReason.JOB_FAILOVER_REGION));
    // after aborting checkpoints, we ensure currentPeriodicTrigger still available.
    assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable());
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExecutionGraphCheckpointPlanCalculatorContext(org.apache.flink.runtime.executiongraph.ExecutionGraphCheckpointPlanCalculatorContext) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12