Search in sources :

Example 21 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.

/**
	 * Tests that a task waiting on an async producer/consumer that is stuck
	 * in a blocking buffer request can be properly cancelled.
	 *
	 * <p>This is currently required for the Flink Kafka sources, which spawn
	 * a separate Thread consuming from Kafka and producing the intermediate
	 * streams in the spawned Thread instead of the main task Thread.
	 */
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    TestingCluster flink = null;
    try {
        // Cluster
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
        config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
        flink = new TestingCluster(config, true);
        flink.start();
        // Job with async producer and consumer
        JobVertex producer = new JobVertex("AsyncProducer");
        producer.setParallelism(1);
        producer.setInvokableClass(AsyncProducer.class);
        JobVertex consumer = new JobVertex("AsyncConsumer");
        consumer.setParallelism(1);
        consumer.setInvokableClass(AsyncConsumer.class);
        consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
        SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
        producer.setSlotSharingGroup(slot);
        consumer.setSlotSharingGroup(slot);
        JobGraph jobGraph = new JobGraph(producer, consumer);
        // Submit job and wait until running
        ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
        flink.submitJobDetached(jobGraph);
        Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(runningFuture, deadline.timeLeft());
        // Wait for blocking requests, cancel and wait for cancellation
        msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
        Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
        boolean producerBlocked = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_PRODUCER_THREAD;
            if (thread != null && thread.isAlive()) {
                StackTraceElement[] stackTrace = thread.getStackTrace();
                producerBlocked = isInBlockingBufferRequest(stackTrace);
            }
            if (producerBlocked) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async producer is in blocking request
        assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
        boolean consumerWaiting = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_CONSUMER_THREAD;
            if (thread != null && thread.isAlive()) {
                consumerWaiting = thread.getState() == Thread.State.WAITING;
            }
            if (consumerWaiting) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async consumer is in blocking request
        assertTrue("Consumer thread is not blocked.", consumerWaiting);
        msg = new CancelJob(jobGraph.getJobID());
        Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(cancelFuture, deadline.timeLeft());
        Await.ready(cancelledFuture, deadline.timeLeft());
        // Verify the expected Exceptions
        assertNotNull(ASYNC_PRODUCER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
        assertNotNull(ASYNC_CONSUMER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 22 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class RescalingITCase method setup.

@BeforeClass
public static void setup() throws Exception {
    Configuration config = new Configuration();
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, slotsPerTaskManager);
    final File checkpointDir = temporaryFolder.newFolder();
    final File savepointDir = temporaryFolder.newFolder();
    config.setString(CoreOptions.STATE_BACKEND, "filesystem");
    config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
    cluster = new TestingCluster(config);
    cluster.start();
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) File(java.io.File) BeforeClass(org.junit.BeforeClass)

Example 23 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobManagerTest method testStopSignalFail.

@Test
public void testStopSignalFail() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        final JobGraph jobGraph = new JobGraph("Non-Stoppable batching test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunning(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        jobManagerGateway.tell(new StopJob(jid), testActorGateway);
                        // - The test ----------------------------------------------------------------------
                        expectMsgClass(StoppingFailure.class);
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        expectMsgClass(ExecutionGraphFound.class);
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) StopJob(org.apache.flink.runtime.messages.JobManagerMessages.StopJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 24 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class PartialConsumePipelinedResultTest method setUp.

@BeforeClass
public static void setUp() throws Exception {
    final Configuration config = new Configuration();
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, NUMBER_OF_TMS);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, NUMBER_OF_SLOTS_PER_TM);
    config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT());
    config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, NUMBER_OF_NETWORK_BUFFERS);
    flink = new TestingCluster(config, true);
    flink.start();
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) BeforeClass(org.junit.BeforeClass)

Example 25 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class ZooKeeperLeaderElectionITCase method testJobExecutionOnClusterWithLeaderReelection.

/**
	 * Tests that a job can be executed after a new leader has been elected. For all except for the
	 * last leader, the job is blocking. The JobManager will be terminated while executing the
	 * blocking job. Once only one JobManager is left, it is checked that a non-blocking can be
	 * successfully executed.
	 */
@Test
public void testJobExecutionOnClusterWithLeaderReelection() throws Exception {
    int numJMs = 10;
    int numTMs = 2;
    int numSlotsPerTM = 3;
    int parallelism = numTMs * numSlotsPerTM;
    File rootFolder = tempFolder.getRoot();
    Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
    configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTM);
    // we "effectively" disable the automatic RecoverAllJobs message and sent it manually to make
    // sure that all TMs have registered to the JM prior to issueing the RecoverAllJobs message
    configuration.setString(ConfigConstants.AKKA_ASK_TIMEOUT, AkkaUtils.INF_TIMEOUT().toString());
    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);
    JobVertex sender = new JobVertex("sender");
    JobVertex receiver = new JobVertex("receiver");
    sender.setInvokableClass(Tasks.Sender.class);
    receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);
    sender.setParallelism(parallelism);
    receiver.setParallelism(parallelism);
    receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
    sender.setSlotSharingGroup(slotSharingGroup);
    receiver.setSlotSharingGroup(slotSharingGroup);
    final JobGraph graph = new JobGraph("Blocking test job", sender, receiver);
    final TestingCluster cluster = new TestingCluster(configuration);
    ActorSystem clientActorSystem = null;
    Thread thread = null;
    JobSubmitterRunnable jobSubmission = null;
    try {
        cluster.start();
        clientActorSystem = cluster.startJobClientActorSystem(graph.getJobID());
        final ActorSystem clientAS = clientActorSystem;
        jobSubmission = new JobSubmitterRunnable(clientAS, cluster, graph);
        thread = new Thread(jobSubmission);
        thread.start();
        Deadline deadline = timeout.$times(3).fromNow();
        // Kill all JobManager except for two
        for (int i = 0; i < numJMs; i++) {
            ActorGateway jm = cluster.getLeaderGateway(deadline.timeLeft());
            cluster.waitForTaskManagersToBeRegisteredAtJobManager(jm.actor());
            // recover all jobs, sent manually
            log.info("Sent recover all jobs manually to job manager {}.", jm.path());
            jm.tell(JobManagerMessages.getRecoverAllJobs());
            if (i < numJMs - 1) {
                Future<Object> future = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(graph.getJobID()), deadline.timeLeft());
                Await.ready(future, deadline.timeLeft());
                cluster.clearLeader();
                if (i == numJMs - 2) {
                    Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
                }
                log.info("Kill job manager {}.", jm.path());
                jm.tell(TestingJobManagerMessages.getDisablePostStop());
                jm.tell(Kill.getInstance());
            }
        }
        log.info("Waiting for submitter thread to terminate.");
        thread.join(deadline.timeLeft().toMillis());
        log.info("Submitter thread has terminated.");
        if (thread.isAlive()) {
            fail("The job submission thread did not stop (meaning it did not succeeded in" + "executing the test job.");
        }
        Await.result(jobSubmission.resultPromise.future(), deadline.timeLeft());
    } finally {
        if (clientActorSystem != null) {
            cluster.shutdownJobClientActorSystem(clientActorSystem);
        }
        if (thread != null && thread.isAlive()) {
            jobSubmission.finished = true;
        }
        cluster.stop();
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) Tasks(org.apache.flink.runtime.jobmanager.Tasks) Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) File(java.io.File) Test(org.junit.Test)

Aggregations

TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)25 Configuration (org.apache.flink.configuration.Configuration)20 Test (org.junit.Test)19 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)15 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)14 Deadline (scala.concurrent.duration.Deadline)12 File (java.io.File)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)10 JobID (org.apache.flink.api.common.JobID)7 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)7 JavaTestKit (akka.testkit.JavaTestKit)6 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)6 FiniteDuration (scala.concurrent.duration.FiniteDuration)6 BeforeClass (org.junit.BeforeClass)5 TestingUtils.startTestingCluster (org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster)4 Scanner (java.util.Scanner)3 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)3