Search in sources :

Example 36 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class JobManagerTest method testRequestPartitionState.

/**
	 * Tests responses to partition state requests.
	 */
@Test
public void testRequestPartitionState() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        // This is the mock execution ID of the task requesting the state of the partition
                        final ExecutionAttemptID receiver = new ExecutionAttemptID();
                        // Request the execution graph to get the runtime info
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
                        final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // - The test ----------------------------------------------------------------------
                        // 1. All execution states
                        RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
                        for (ExecutionState state : ExecutionState.values()) {
                            ExecutionGraphTestUtils.setVertexState(vertex, state);
                            Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                            ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
                            assertEquals(state, resp);
                        }
                        // 2. Non-existing execution
                        request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
                        Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected RuntimeException");
                        } catch (RuntimeException e) {
                            assertEquals(IllegalArgumentException.class, e.getCause().getClass());
                        }
                        // 3. Non-existing job
                        request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
                        futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected IllegalArgumentException");
                        } catch (IllegalArgumentException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 37 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class JobManagerTest method testRequestPartitionStateMoreRecentExecutionAttempt.

/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph anymore and a new execution attempt is available.
	 */
@Test
public void testRequestPartitionStateMoreRecentExecutionAttempt() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just finish
                        sender.setInvokableClass(NoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobVertex sender2 = new JobVertex("Blocking Sender");
                        sender2.setParallelism(1);
                        // just block
                        sender2.setInvokableClass(BlockingNoOpInvokable.class);
                        sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobManagerMessages.JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(TestingJobManagerMessages.AllVerticesRunning.class);
                        Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
                        ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
                        ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
                        ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        while (vertex.getExecutionState() != ExecutionState.FINISHED) {
                            Thread.sleep(1);
                        }
                        IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // Reset execution => new execution attempt
                        vertex.resetForNewExecution();
                        // Producer finished, request state
                        Object request = new JobManagerMessages.RequestPartitionProducerState(jid, rid, partitionId);
                        Future<?> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(producerStateFuture, getRemainingTime());
                            fail("Did not fail with expected Exception");
                        } catch (PartitionProducerDisposedException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 38 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class SchedulerTestUtils method getDummyTask.

public static Execution getDummyTask() {
    ExecutionVertex vertex = mock(ExecutionVertex.class);
    when(vertex.getJobId()).thenReturn(new JobID());
    when(vertex.toString()).thenReturn("TEST-VERTEX");
    Execution execution = mock(Execution.class);
    when(execution.getVertex()).thenReturn(vertex);
    return execution;
}
Also used : Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobID(org.apache.flink.api.common.JobID)

Example 39 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class SchedulerTestUtils method getTestVertexWithLocation.

public static Execution getTestVertexWithLocation(JobVertexID jid, int taskIndex, int numTasks, TaskManagerLocation... locations) {
    ExecutionVertex vertex = mock(ExecutionVertex.class);
    when(vertex.getPreferredLocationsBasedOnInputs()).thenReturn(Arrays.asList(locations));
    when(vertex.getJobId()).thenReturn(new JobID());
    when(vertex.getJobvertexId()).thenReturn(jid);
    when(vertex.getParallelSubtaskIndex()).thenReturn(taskIndex);
    when(vertex.getTotalNumberOfParallelSubtasks()).thenReturn(numTasks);
    when(vertex.getMaxParallelism()).thenReturn(numTasks);
    when(vertex.toString()).thenReturn("TEST-VERTEX");
    Execution execution = mock(Execution.class);
    when(execution.getVertex()).thenReturn(vertex);
    return execution;
}
Also used : Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobID(org.apache.flink.api.common.JobID)

Example 40 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class BackPressureStatsTrackerTest method testTriggerStackTraceSample.

/** Tests simple statistics with fake stack traces. */
@Test
@SuppressWarnings("unchecked")
public void testTriggerStackTraceSample() throws Exception {
    CompletableFuture<StackTraceSample> sampleFuture = new FlinkCompletableFuture<>();
    StackTraceSampleCoordinator sampleCoordinator = mock(StackTraceSampleCoordinator.class);
    when(sampleCoordinator.triggerStackTraceSample(any(ExecutionVertex[].class), anyInt(), any(Time.class), anyInt())).thenReturn(sampleFuture);
    ExecutionGraph graph = mock(ExecutionGraph.class);
    when(graph.getState()).thenReturn(JobStatus.RUNNING);
    // Same Thread execution context
    when(graph.getFutureExecutor()).thenReturn(new Executor() {

        @Override
        public void execute(Runnable runnable) {
            runnable.run();
        }
    });
    ExecutionVertex[] taskVertices = new ExecutionVertex[4];
    ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
    when(jobVertex.getJobId()).thenReturn(new JobID());
    when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
    when(jobVertex.getGraph()).thenReturn(graph);
    when(jobVertex.getTaskVertices()).thenReturn(taskVertices);
    taskVertices[0] = mockExecutionVertex(jobVertex, 0);
    taskVertices[1] = mockExecutionVertex(jobVertex, 1);
    taskVertices[2] = mockExecutionVertex(jobVertex, 2);
    taskVertices[3] = mockExecutionVertex(jobVertex, 3);
    int numSamples = 100;
    Time delayBetweenSamples = Time.milliseconds(100L);
    BackPressureStatsTracker tracker = new BackPressureStatsTracker(sampleCoordinator, 9999, numSamples, delayBetweenSamples);
    // Trigger
    assertTrue("Failed to trigger", tracker.triggerStackTraceSample(jobVertex));
    verify(sampleCoordinator).triggerStackTraceSample(eq(taskVertices), eq(numSamples), eq(delayBetweenSamples), eq(BackPressureStatsTracker.MAX_STACK_TRACE_DEPTH));
    // Trigger again for pending request, should not fire
    assertFalse("Unexpected trigger", tracker.triggerStackTraceSample(jobVertex));
    assertTrue(tracker.getOperatorBackPressureStats(jobVertex).isEmpty());
    verify(sampleCoordinator).triggerStackTraceSample(eq(taskVertices), eq(numSamples), eq(delayBetweenSamples), eq(BackPressureStatsTracker.MAX_STACK_TRACE_DEPTH));
    assertTrue(tracker.getOperatorBackPressureStats(jobVertex).isEmpty());
    // Complete the future
    Map<ExecutionAttemptID, List<StackTraceElement[]>> traces = new HashMap<>();
    for (ExecutionVertex vertex : taskVertices) {
        List<StackTraceElement[]> taskTraces = new ArrayList<>();
        for (int i = 0; i < taskVertices.length; i++) {
            // Traces until sub task index are back pressured
            taskTraces.add(createStackTrace(i <= vertex.getParallelSubtaskIndex()));
        }
        traces.put(vertex.getCurrentExecutionAttempt().getAttemptId(), taskTraces);
    }
    int sampleId = 1231;
    int endTime = 841;
    StackTraceSample sample = new StackTraceSample(sampleId, 0, endTime, traces);
    // Succeed the promise
    sampleFuture.complete(sample);
    assertTrue(tracker.getOperatorBackPressureStats(jobVertex).isDefined());
    OperatorBackPressureStats stats = tracker.getOperatorBackPressureStats(jobVertex).get();
    // Verify the stats
    assertEquals(sampleId, stats.getSampleId());
    assertEquals(endTime, stats.getEndTimestamp());
    assertEquals(taskVertices.length, stats.getNumberOfSubTasks());
    for (int i = 0; i < taskVertices.length; i++) {
        double ratio = stats.getBackPressureRatio(i);
        // Traces until sub task index are back pressured
        assertEquals((i + 1) / ((double) 4), ratio, 0.0);
    }
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) Time(org.apache.flink.api.common.time.Time) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Executor(java.util.concurrent.Executor) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)65 Test (org.junit.Test)47 JobID (org.apache.flink.api.common.JobID)42 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)41 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)23 IOException (java.io.IOException)15 Execution (org.apache.flink.runtime.executiongraph.Execution)15 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)15 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)12 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)12 HashMap (java.util.HashMap)10 ArrayList (java.util.ArrayList)8 TriggerStackTraceSample (org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample)8 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)7 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)5 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)5 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)5 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)5 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)5 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)5