Search in sources :

Example 1 with BlockingNoOpInvokable

use of org.apache.flink.runtime.testtasks.BlockingNoOpInvokable in project flink by apache.

the class TaskManagerTest method testTriggerStackTraceSampleMessage.

// ------------------------------------------------------------------------
// Stack trace sample
// ------------------------------------------------------------------------
/**
	 * Tests sampling of task stack traces.
	 */
@Test
@SuppressWarnings("unchecked")
public void testTriggerStackTraceSampleMessage() throws Exception {
    new JavaTestKit(system) {

        {
            ActorGateway taskManagerActorGateway = null;
            // We need this to be a JM that answers to update messages for
            // robustness on Travis (if jobs need to be resubmitted in (4)).
            ActorRef jm = system.actorOf(Props.create(new SimpleLookupJobManagerCreator(null)));
            ActorGateway jobManagerActorGateway = new AkkaActorGateway(jm, null);
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                final ActorGateway jobManager = jobManagerActorGateway;
                final ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
                final JobID jobId = new JobID();
                // Single blocking task
                final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jobId, "Job", new JobVertexID(), new ExecutionAttemptID(), new SerializedValue<>(new ExecutionConfig()), "Task", 1, 0, 1, 0, new Configuration(), new Configuration(), BlockingNoOpInvokable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
                // Submit the task
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            // Make sure to register
                            Future<?> connectFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor()), remaining());
                            Await.ready(connectFuture, remaining());
                            Future<Object> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
                            taskManager.tell(new SubmitTask(tdd));
                            Await.ready(taskRunningFuture, d);
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 1) Trigger sample for non-existing task
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            ExecutionAttemptID taskId = new ExecutionAttemptID();
                            taskManager.tell(new TriggerStackTraceSample(112223, taskId, 100, timeD, 0), testActorGateway);
                            // Receive the expected message (heartbeat races possible)
                            Object[] msg = receiveN(1);
                            while (!(msg[0] instanceof Status.Failure)) {
                                msg = receiveN(1);
                            }
                            Status.Failure response = (Status.Failure) msg[0];
                            assertEquals(IllegalStateException.class, response.cause().getClass());
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 2) Trigger sample for the blocking task
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        boolean success = false;
                        Throwable lastError = null;
                        for (int i = 0; i < 100 && !success; i++) {
                            try {
                                int numSamples = 5;
                                taskManager.tell(new TriggerStackTraceSample(19230, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), 0), testActorGateway);
                                // Receive the expected message (heartbeat races possible)
                                Object[] msg = receiveN(1);
                                while (!(msg[0] instanceof StackTraceSampleResponse)) {
                                    msg = receiveN(1);
                                }
                                StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                                // ---- Verify response ----
                                assertEquals(19230, response.getSampleId());
                                assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                                List<StackTraceElement[]> traces = response.getSamples();
                                assertEquals("Number of samples", numSamples, traces.size());
                                for (StackTraceElement[] trace : traces) {
                                    // Look for BlockingNoOpInvokable#invoke
                                    for (StackTraceElement elem : trace) {
                                        if (elem.getClassName().equals(BlockingNoOpInvokable.class.getName())) {
                                            assertEquals("invoke", elem.getMethodName());
                                            success = true;
                                            break;
                                        }
                                    }
                                    assertTrue("Unexpected stack trace: " + Arrays.toString(trace), success);
                                }
                            } catch (Throwable t) {
                                lastError = t;
                                LOG.warn("Failed to find invokable.", t);
                            }
                            try {
                                Thread.sleep(100);
                            } catch (InterruptedException e) {
                                LOG.error("Interrupted while sleeping before retry.", e);
                                break;
                            }
                        }
                        if (!success) {
                            if (lastError == null) {
                                fail("Failed to find invokable");
                            } else {
                                fail(lastError.getMessage());
                            }
                        }
                    }
                };
                //
                // 3) Trigger sample for the blocking task with max depth
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            int numSamples = 5;
                            int maxDepth = 2;
                            taskManager.tell(new TriggerStackTraceSample(1337, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), maxDepth), testActorGateway);
                            // Receive the expected message (heartbeat races possible)
                            Object[] msg = receiveN(1);
                            while (!(msg[0] instanceof StackTraceSampleResponse)) {
                                msg = receiveN(1);
                            }
                            StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                            // ---- Verify response ----
                            assertEquals(1337, response.getSampleId());
                            assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                            List<StackTraceElement[]> traces = response.getSamples();
                            assertEquals("Number of samples", numSamples, traces.size());
                            for (StackTraceElement[] trace : traces) {
                                assertEquals("Max depth", maxDepth, trace.length);
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 4) Trigger sample for the blocking task, but cancel it during sampling
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            int maxAttempts = 10;
                            int sleepTime = 100;
                            for (int i = 0; i < maxAttempts; i++, sleepTime *= 2) {
                                // Trigger many samples in order to cancel the task
                                // during a sample
                                taskManager.tell(new TriggerStackTraceSample(44, tdd.getExecutionAttemptId(), Integer.MAX_VALUE, Time.milliseconds(10L), 0), testActorGateway);
                                Thread.sleep(sleepTime);
                                Future<?> removeFuture = taskManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobId), remaining());
                                // Cancel the task
                                taskManager.tell(new CancelTask(tdd.getExecutionAttemptId()));
                                // Receive the expected message (heartbeat races possible)
                                while (true) {
                                    Object[] msg = receiveN(1);
                                    if (msg[0] instanceof StackTraceSampleResponse) {
                                        StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                                        assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                                        assertEquals(44, response.getSampleId());
                                        // Done
                                        return;
                                    } else if (msg[0] instanceof Failure) {
                                        // Wait for removal before resubmitting
                                        Await.ready(removeFuture, remaining());
                                        Future<?> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
                                        // Resubmit
                                        taskManager.tell(new SubmitTask(tdd));
                                        Await.ready(taskRunningFuture, remaining());
                                        // Retry the sample message
                                        break;
                                    } else {
                                        // Different message
                                        continue;
                                    }
                                }
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } finally {
                TestingUtils.stopActor(taskManagerActorGateway);
                TestingUtils.stopActor(jobManagerActorGateway);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) StackTraceSampleResponse(org.apache.flink.runtime.messages.StackTraceSampleResponse) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) Failure(scala.util.Failure) Status(akka.actor.Status) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) CompletableFuture(org.apache.flink.runtime.concurrent.CompletableFuture) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) Future(scala.concurrent.Future) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ActorRef (akka.actor.ActorRef)1 Status (akka.actor.Status)1 JavaTestKit (akka.testkit.JavaTestKit)1 IOException (java.io.IOException)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 JobID (org.apache.flink.api.common.JobID)1 Configuration (org.apache.flink.configuration.Configuration)1 CompletableFuture (org.apache.flink.runtime.concurrent.CompletableFuture)1 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)1 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)1 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)1 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)1 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)1 PartitionNotFoundException (org.apache.flink.runtime.io.network.partition.PartitionNotFoundException)1 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)1 TriggerStackTraceSample (org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample)1 StackTraceSampleResponse (org.apache.flink.runtime.messages.StackTraceSampleResponse)1 CancelTask (org.apache.flink.runtime.messages.TaskMessages.CancelTask)1 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)1 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)1