Search in sources :

Example 6 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class StackTraceSampleCoordinatorTest method testCancelStackTraceSample.

/** Tests cancelling of a pending sample. */
@Test
public void testCancelStackTraceSample() throws Exception {
    ExecutionVertex[] vertices = new ExecutionVertex[] { mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true) };
    Future<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(vertices, 1, Time.milliseconds(100L), 0);
    assertFalse(sampleFuture.isDone());
    // Cancel
    coord.cancelStackTraceSample(0, null);
    // Verify completed
    assertTrue(sampleFuture.isDone());
    // Verify no more pending samples
    assertEquals(0, coord.getNumberOfPendingSamples());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Test(org.junit.Test)

Example 7 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class StackTraceSampleCoordinatorTest method testTriggerStackTraceSample.

/** Tests simple trigger and collect of stack trace samples. */
@Test
public void testTriggerStackTraceSample() throws Exception {
    ExecutionVertex[] vertices = new ExecutionVertex[] { mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true), mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true), mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true), mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true) };
    int numSamples = 1;
    Time delayBetweenSamples = Time.milliseconds(100L);
    int maxStackTraceDepth = 0;
    Future<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(vertices, numSamples, delayBetweenSamples, maxStackTraceDepth);
    // Verify messages have been sent
    for (ExecutionVertex vertex : vertices) {
        ExecutionAttemptID expectedExecutionId = vertex.getCurrentExecutionAttempt().getAttemptId();
        TriggerStackTraceSample expectedMsg = new TriggerStackTraceSample(0, expectedExecutionId, numSamples, delayBetweenSamples, maxStackTraceDepth);
        verify(vertex.getCurrentExecutionAttempt()).requestStackTraceSample(eq(0), eq(numSamples), eq(delayBetweenSamples), eq(maxStackTraceDepth), any(Time.class));
    }
    assertFalse(sampleFuture.isDone());
    StackTraceElement[] stackTraceSample = Thread.currentThread().getStackTrace();
    List<StackTraceElement[]> traces = new ArrayList<>();
    traces.add(stackTraceSample);
    traces.add(stackTraceSample);
    traces.add(stackTraceSample);
    // Collect stack traces
    for (int i = 0; i < vertices.length; i++) {
        ExecutionAttemptID executionId = vertices[i].getCurrentExecutionAttempt().getAttemptId();
        coord.collectStackTraces(0, executionId, traces);
        if (i == vertices.length - 1) {
            assertTrue(sampleFuture.isDone());
        } else {
            assertFalse(sampleFuture.isDone());
        }
    }
    // Verify completed stack trace sample
    StackTraceSample sample = sampleFuture.get();
    assertEquals(0, sample.getSampleId());
    assertTrue(sample.getEndTime() >= sample.getStartTime());
    Map<ExecutionAttemptID, List<StackTraceElement[]>> tracesByTask = sample.getStackTraces();
    for (ExecutionVertex vertex : vertices) {
        ExecutionAttemptID executionId = vertex.getCurrentExecutionAttempt().getAttemptId();
        List<StackTraceElement[]> sampleTraces = tracesByTask.get(executionId);
        assertNotNull("Task not found", sampleTraces);
        assertTrue(traces.equals(sampleTraces));
    }
    // Verify no more pending sample
    assertEquals(0, coord.getNumberOfPendingSamples());
    // Verify no error on late collect
    coord.collectStackTraces(0, vertices[0].getCurrentExecutionAttempt().getAttemptId(), traces);
}
Also used : TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) ArrayList(java.util.ArrayList) Time(org.apache.flink.api.common.time.Time) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 8 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class StackTraceSampleCoordinatorTest method testTriggerStackTraceSampleTimeout.

/** Tests that samples time out if they don't finish in time. */
@Test(timeout = 1000L)
public void testTriggerStackTraceSampleTimeout() throws Exception {
    int timeout = 100;
    coord = new StackTraceSampleCoordinator(system.dispatcher(), timeout);
    final ScheduledExecutorService scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
    try {
        ExecutionVertex[] vertices = new ExecutionVertex[] { mockExecutionVertexWithTimeout(new ExecutionAttemptID(), ExecutionState.RUNNING, scheduledExecutorService, timeout) };
        Future<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(vertices, 1, Time.milliseconds(100L), 0);
        // Wait for the timeout
        Thread.sleep(timeout * 2);
        boolean success = false;
        for (int i = 0; i < 10; i++) {
            if (sampleFuture.isDone()) {
                success = true;
                break;
            }
            Thread.sleep(timeout);
        }
        assertTrue("Sample did not time out", success);
        try {
            sampleFuture.get();
            fail("Expected exception.");
        } catch (ExecutionException e) {
            assertTrue(e.getCause().getCause().getMessage().contains("Timeout"));
        }
        // Collect after the timeout (should be ignored)
        ExecutionAttemptID executionId = vertices[0].getCurrentExecutionAttempt().getAttemptId();
        coord.collectStackTraces(0, executionId, new ArrayList<StackTraceElement[]>());
    } finally {
        scheduledExecutorService.shutdownNow();
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) ExecutionException(java.util.concurrent.ExecutionException) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Test(org.junit.Test)

Example 9 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class TaskExecutor method updateTaskExecutionState.

private void updateTaskExecutionState(final UUID jobMasterLeaderId, final JobMasterGateway jobMasterGateway, final TaskExecutionState taskExecutionState) {
    final ExecutionAttemptID executionAttemptID = taskExecutionState.getID();
    Future<Acknowledge> futureAcknowledge = jobMasterGateway.updateTaskExecutionState(jobMasterLeaderId, taskExecutionState);
    futureAcknowledge.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

        @Override
        public Void apply(Throwable value) {
            failTask(executionAttemptID, value);
            return null;
        }
    }, getMainThreadExecutor());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge)

Example 10 with ExecutionAttemptID

use of org.apache.flink.runtime.executiongraph.ExecutionAttemptID in project flink by apache.

the class JobManagerTest method testRequestPartitionState.

/**
	 * Tests responses to partition state requests.
	 */
@Test
public void testRequestPartitionState() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        // This is the mock execution ID of the task requesting the state of the partition
                        final ExecutionAttemptID receiver = new ExecutionAttemptID();
                        // Request the execution graph to get the runtime info
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
                        final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // - The test ----------------------------------------------------------------------
                        // 1. All execution states
                        RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
                        for (ExecutionState state : ExecutionState.values()) {
                            ExecutionGraphTestUtils.setVertexState(vertex, state);
                            Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                            ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
                            assertEquals(state, resp);
                        }
                        // 2. Non-existing execution
                        request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
                        Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected RuntimeException");
                        } catch (RuntimeException e) {
                            assertEquals(IllegalArgumentException.class, e.getCause().getClass());
                        }
                        // 3. Non-existing job
                        request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
                        futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected IllegalArgumentException");
                        } catch (IllegalArgumentException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)233 Test (org.junit.Test)176 JobID (org.apache.flink.api.common.JobID)111 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)92 Configuration (org.apache.flink.configuration.Configuration)56 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)56 IOException (java.io.IOException)51 CompletableFuture (java.util.concurrent.CompletableFuture)43 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)38 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)38 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)36 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)35 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)34 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)34 ExecutionException (java.util.concurrent.ExecutionException)29 ArrayList (java.util.ArrayList)27 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)27 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)27 ExecutionState (org.apache.flink.runtime.execution.ExecutionState)26