Search in sources :

Example 1 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CoordinatorShutdownTest method testCoordinatorShutsDownOnSuccess.

@Test
public void testCoordinatorShutsDownOnSuccess() {
    LocalFlinkMiniCluster cluster = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        cluster = new LocalFlinkMiniCluster(config, true);
        cluster.start();
        // build a test graph with snapshotting enabled
        JobVertex vertex = new JobVertex("Test Vertex");
        vertex.setInvokableClass(BlockingInvokable.class);
        List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
        JobGraph testGraph = new JobGraph("test job", vertex);
        testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
        ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
        JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
        // submit is successful, but then the job blocks due to the invokable
        Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
        Await.result(submitFuture, timeout);
        // get the execution graph and store the ExecutionGraph reference
        Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
        ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
        assertNotNull(graph);
        BlockingInvokable.unblock();
        graph.waitUntilFinished();
        // verify that the coordinator was shut down
        CheckpointCoordinator coord = graph.getCheckpointCoordinator();
        assertTrue(coord == null || coord.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
            cluster.awaitTermination();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 2 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class ExecutionGraphCheckpointCoordinatorTest method testShutdownCheckpointCoordinator.

/**
	 * Tests that a shut down checkpoint coordinator calls shutdown on
	 * the store and counter.
	 */
@Test
public void testShutdownCheckpointCoordinator() throws Exception {
    CheckpointIDCounter counter = mock(CheckpointIDCounter.class);
    CompletedCheckpointStore store = mock(CompletedCheckpointStore.class);
    ExecutionGraph graph = createExecutionGraphAndEnableCheckpointing(counter, store);
    graph.fail(new Exception("Test Exception"));
    verify(counter, times(1)).shutdown(JobStatus.FAILED);
    verify(store, times(1)).shutdown(JobStatus.FAILED);
}
Also used : ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 3 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class ExecutionGraphCheckpointCoordinatorTest method createExecutionGraphAndEnableCheckpointing.

private ExecutionGraph createExecutionGraphAndEnableCheckpointing(CheckpointIDCounter counter, CompletedCheckpointStore store) throws Exception {
    ExecutionGraph executionGraph = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "test", new Configuration(), new SerializedValue<>(new ExecutionConfig()), Time.days(1L), new NoRestartStrategy(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), new Scheduler(TestingUtils.defaultExecutionContext()), ClassLoader.getSystemClassLoader(), new UnregisteredMetricsGroup());
    executionGraph.enableCheckpointing(100, 100, 100, 1, ExternalizedCheckpointSettings.none(), Collections.<ExecutionJobVertex>emptyList(), Collections.<ExecutionJobVertex>emptyList(), Collections.<ExecutionJobVertex>emptyList(), counter, store, null, null, CheckpointStatsTrackerTest.createTestTracker());
    JobVertex jobVertex = new JobVertex("MockVertex");
    jobVertex.setInvokableClass(AbstractInvokable.class);
    executionGraph.attachJobGraph(Collections.singletonList(jobVertex));
    return executionGraph;
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobID(org.apache.flink.api.common.JobID)

Example 4 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class BackPressureStatsTrackerITCase method testBackPressuredProducer.

/**
	 * Tests a simple fake-back pressured task. Back pressure is assumed when
	 * sampled stack traces are in blocking buffer requests.
	 */
@Test
public void testBackPressuredProducer() throws Exception {
    new JavaTestKit(testActorSystem) {

        {
            final FiniteDuration deadline = new FiniteDuration(60, TimeUnit.SECONDS);
            // The JobGraph
            final JobGraph jobGraph = new JobGraph();
            final int parallelism = 4;
            final JobVertex task = new JobVertex("Task");
            task.setInvokableClass(BackPressuredTask.class);
            task.setParallelism(parallelism);
            jobGraph.addVertex(task);
            ActorGateway jobManger = null;
            ActorGateway taskManager = null;
            //
            // 1) Consume all buffers at first (no buffers for the test task)
            //
            testBufferPool = networkBufferPool.createBufferPool(1, Integer.MAX_VALUE);
            final List<Buffer> buffers = new ArrayList<>();
            while (true) {
                Buffer buffer = testBufferPool.requestBuffer();
                if (buffer != null) {
                    buffers.add(buffer);
                } else {
                    break;
                }
            }
            try {
                jobManger = TestingUtils.createJobManager(testActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new Configuration());
                final Configuration config = new Configuration();
                config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, parallelism);
                taskManager = TestingUtils.createTaskManager(testActorSystem, jobManger, config, true, true);
                final ActorGateway jm = jobManger;
                new Within(deadline) {

                    @Override
                    protected void run() {
                        try {
                            ActorGateway testActor = new AkkaActorGateway(getTestActor(), null);
                            // Submit the job and wait until it is running
                            JobClient.submitJobDetached(jm, config, jobGraph, deadline, ClassLoader.getSystemClassLoader());
                            jm.tell(new WaitForAllVerticesToBeRunning(jobGraph.getJobID()), testActor);
                            expectMsgEquals(new AllVerticesRunning(jobGraph.getJobID()));
                            // Get the ExecutionGraph
                            jm.tell(new RequestExecutionGraph(jobGraph.getJobID()), testActor);
                            ExecutionGraphFound executionGraphResponse = expectMsgClass(ExecutionGraphFound.class);
                            ExecutionGraph executionGraph = (ExecutionGraph) executionGraphResponse.executionGraph();
                            ExecutionJobVertex vertex = executionGraph.getJobVertex(task.getID());
                            StackTraceSampleCoordinator coordinator = new StackTraceSampleCoordinator(testActorSystem.dispatcher(), 60000);
                            // Verify back pressure (clean up interval can be ignored)
                            BackPressureStatsTracker statsTracker = new BackPressureStatsTracker(coordinator, 100 * 1000, 20, Time.milliseconds(10L));
                            int numAttempts = 10;
                            int nextSampleId = 0;
                            // the buffer.
                            for (int attempt = 0; attempt < numAttempts; attempt++) {
                                try {
                                    OperatorBackPressureStats stats = triggerStatsSample(statsTracker, vertex);
                                    assertEquals(nextSampleId + attempt, stats.getSampleId());
                                    assertEquals(parallelism, stats.getNumberOfSubTasks());
                                    assertEquals(1.0, stats.getMaxBackPressureRatio(), 0.0);
                                    for (int i = 0; i < parallelism; i++) {
                                        assertEquals(1.0, stats.getBackPressureRatio(i), 0.0);
                                    }
                                    nextSampleId = stats.getSampleId() + 1;
                                    break;
                                } catch (Throwable t) {
                                    if (attempt == numAttempts - 1) {
                                        throw t;
                                    } else {
                                        Thread.sleep(500);
                                    }
                                }
                            }
                            //
                            for (Buffer buf : buffers) {
                                buf.recycle();
                            }
                            // grab them and then immediately release them.
                            while (testBufferPool.getNumberOfAvailableMemorySegments() < 100) {
                                Thread.sleep(100);
                            }
                            // Verify that no task is back pressured any more.
                            for (int attempt = 0; attempt < numAttempts; attempt++) {
                                try {
                                    OperatorBackPressureStats stats = triggerStatsSample(statsTracker, vertex);
                                    assertEquals(nextSampleId + attempt, stats.getSampleId());
                                    assertEquals(parallelism, stats.getNumberOfSubTasks());
                                    // Verify that no task is back pressured
                                    for (int i = 0; i < parallelism; i++) {
                                        assertEquals(0.0, stats.getBackPressureRatio(i), 0.0);
                                    }
                                    break;
                                } catch (Throwable t) {
                                    if (attempt == numAttempts - 1) {
                                        throw t;
                                    } else {
                                        Thread.sleep(500);
                                    }
                                }
                            }
                            // Shut down
                            jm.tell(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), testActor);
                            // Cancel job
                            jm.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
                            // Response to removal notification
                            expectMsgEquals(true);
                            //
                            // 3) Trigger stats for archived job
                            //
                            statsTracker.invalidateOperatorStatsCache();
                            assertFalse("Unexpected trigger", statsTracker.triggerStackTraceSample(vertex));
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } finally {
                TestingUtils.stopActor(jobManger);
                TestingUtils.stopActor(taskManager);
                for (Buffer buf : buffers) {
                    buf.recycle();
                }
                testBufferPool.lazyDestroy();
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) ArrayList(java.util.ArrayList) AllVerticesRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.AllVerticesRunning) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) Buffer(org.apache.flink.runtime.io.network.buffer.Buffer) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 5 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.

/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph.
	 */
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just finish
                        sender.setInvokableClass(NoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobVertex sender2 = new JobVertex("Blocking Sender");
                        sender2.setParallelism(1);
                        // just block
                        sender2.setInvokableClass(BlockingNoOpInvokable.class);
                        sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
                        ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
                        ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
                        ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        while (vertex.getExecutionState() != ExecutionState.FINISHED) {
                            Thread.sleep(1);
                        }
                        IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // Producer finished, request state
                        Object request = new RequestPartitionProducerState(jid, rid, partitionId);
                        Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                        assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12