Search in sources :

Example 6 with WaitForAllVerticesToBeRunning

use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning in project flink by apache.

the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.

/**
	 * Tests that a task waiting on an async producer/consumer that is stuck
	 * in a blocking buffer request can be properly cancelled.
	 *
	 * <p>This is currently required for the Flink Kafka sources, which spawn
	 * a separate Thread consuming from Kafka and producing the intermediate
	 * streams in the spawned Thread instead of the main task Thread.
	 */
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    TestingCluster flink = null;
    try {
        // Cluster
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
        config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
        flink = new TestingCluster(config, true);
        flink.start();
        // Job with async producer and consumer
        JobVertex producer = new JobVertex("AsyncProducer");
        producer.setParallelism(1);
        producer.setInvokableClass(AsyncProducer.class);
        JobVertex consumer = new JobVertex("AsyncConsumer");
        consumer.setParallelism(1);
        consumer.setInvokableClass(AsyncConsumer.class);
        consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
        SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
        producer.setSlotSharingGroup(slot);
        consumer.setSlotSharingGroup(slot);
        JobGraph jobGraph = new JobGraph(producer, consumer);
        // Submit job and wait until running
        ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
        flink.submitJobDetached(jobGraph);
        Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(runningFuture, deadline.timeLeft());
        // Wait for blocking requests, cancel and wait for cancellation
        msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
        Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
        boolean producerBlocked = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_PRODUCER_THREAD;
            if (thread != null && thread.isAlive()) {
                StackTraceElement[] stackTrace = thread.getStackTrace();
                producerBlocked = isInBlockingBufferRequest(stackTrace);
            }
            if (producerBlocked) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async producer is in blocking request
        assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
        boolean consumerWaiting = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_CONSUMER_THREAD;
            if (thread != null && thread.isAlive()) {
                consumerWaiting = thread.getState() == Thread.State.WAITING;
            }
            if (consumerWaiting) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async consumer is in blocking request
        assertTrue("Consumer thread is not blocked.", consumerWaiting);
        msg = new CancelJob(jobGraph.getJobID());
        Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(cancelFuture, deadline.timeLeft());
        Await.ready(cancelledFuture, deadline.timeLeft());
        // Verify the expected Exceptions
        assertNotNull(ASYNC_PRODUCER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
        assertNotNull(ASYNC_CONSUMER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 7 with WaitForAllVerticesToBeRunning

use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning in project flink by apache.

the class JobManagerTest method testStopSignalFail.

@Test
public void testStopSignalFail() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        final JobGraph jobGraph = new JobGraph("Non-Stoppable batching test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunning(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        jobManagerGateway.tell(new StopJob(jid), testActorGateway);
                        // - The test ----------------------------------------------------------------------
                        expectMsgClass(StoppingFailure.class);
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        expectMsgClass(ExecutionGraphFound.class);
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) StopJob(org.apache.flink.runtime.messages.JobManagerMessages.StopJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ActorGateway (org.apache.flink.runtime.instance.ActorGateway)7 WaitForAllVerticesToBeRunning (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning)7 Test (org.junit.Test)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)6 JavaTestKit (akka.testkit.JavaTestKit)5 FiniteDuration (scala.concurrent.duration.FiniteDuration)5 JobID (org.apache.flink.api.common.JobID)4 Configuration (org.apache.flink.configuration.Configuration)4 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)4 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)4 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)3 RequestExecutionGraph (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph)3 Deadline (scala.concurrent.duration.Deadline)3 File (java.io.File)2 ArrayList (java.util.ArrayList)2 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)2 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)2 CancelJob (org.apache.flink.runtime.messages.JobManagerMessages.CancelJob)2 DisposeSavepoint (org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint)2