Search in sources :

Example 6 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobManagerTest method testRequestPartitionStateMoreRecentExecutionAttempt.

/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph anymore and a new execution attempt is available.
	 */
@Test
public void testRequestPartitionStateMoreRecentExecutionAttempt() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just finish
                        sender.setInvokableClass(NoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobVertex sender2 = new JobVertex("Blocking Sender");
                        sender2.setParallelism(1);
                        // just block
                        sender2.setInvokableClass(BlockingNoOpInvokable.class);
                        sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobManagerMessages.JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(TestingJobManagerMessages.AllVerticesRunning.class);
                        Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
                        ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
                        ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
                        ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        while (vertex.getExecutionState() != ExecutionState.FINISHED) {
                            Thread.sleep(1);
                        }
                        IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // Reset execution => new execution attempt
                        vertex.resetForNewExecution();
                        // Producer finished, request state
                        Object request = new JobManagerMessages.RequestPartitionProducerState(jid, rid, partitionId);
                        Future<?> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(producerStateFuture, getRemainingTime());
                            fail("Did not fail with expected Exception");
                        } catch (PartitionProducerDisposedException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 7 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobManagerHAJobGraphRecoveryITCase method testJobPersistencyWhenJobManagerShutdown.

// ---------------------------------------------------------------------------------------------
/**
	 * Tests that the HA job is not cleaned up when the jobmanager is stopped.
	 */
@Test
public void testJobPersistencyWhenJobManagerShutdown() throws Exception {
    Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(ZooKeeper.getConnectString(), FileStateBackendBasePath.getPath());
    // Configure the cluster
    config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 1);
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
    TestingCluster flink = new TestingCluster(config, false, false);
    try {
        final Deadline deadline = TestTimeOut.fromNow();
        // Start the JobManager and TaskManager
        flink.start(true);
        JobGraph jobGraph = createBlockingJobGraph();
        // Set restart strategy to guard against shut down races.
        // If the TM fails before the JM, it might happen that the
        // Job is failed, leading to state removal.
        ExecutionConfig ec = new ExecutionConfig();
        ec.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 100));
        jobGraph.setExecutionConfig(ec);
        ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
        // Submit the job
        jobManager.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
        // Wait for the job to start
        JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, jobManager, deadline.timeLeft());
    } finally {
        flink.shutdown();
    }
    // verify that the persisted job data has not been removed from ZooKeeper when the JM has
    // been shutdown
    verifyRecoveryState(config);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SubmittedJobGraph(org.apache.flink.runtime.jobmanager.SubmittedJobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) Test(org.junit.Test)

Example 8 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobManagerHAJobGraphRecoveryITCase method testSubmitJobToNonLeader.

/**
	 * Tests that submissions to non-leaders are handled.
	 */
@Test
public void testSubmitJobToNonLeader() throws Exception {
    Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(ZooKeeper.getConnectString(), FileStateBackendBasePath.getPath());
    // Configure the cluster
    config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
    TestingCluster flink = new TestingCluster(config, false, false);
    try {
        final Deadline deadline = TestTimeOut.fromNow();
        // Start the JobManager and TaskManager
        flink.start(true);
        JobGraph jobGraph = createBlockingJobGraph();
        List<ActorRef> bothJobManagers = flink.getJobManagersAsJava();
        ActorGateway leadingJobManager = flink.getLeaderGateway(deadline.timeLeft());
        ActorGateway nonLeadingJobManager;
        if (bothJobManagers.get(0).equals(leadingJobManager.actor())) {
            nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(1), null);
        } else {
            nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(0), null);
        }
        log.info("Leading job manager: " + leadingJobManager);
        log.info("Non-leading job manager: " + nonLeadingJobManager);
        // Submit the job
        nonLeadingJobManager.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
        log.info("Submitted job graph to " + nonLeadingJobManager);
        // Wait for the job to start. We are asking the *leading** JM here although we've
        // submitted the job to the non-leading JM. This is the behaviour under test.
        JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, leadingJobManager, deadline.timeLeft());
        log.info("Wait that the non-leader removes the submitted job.");
        // Make sure that the **non-leading** JM has actually removed the job graph from its
        // local state.
        boolean success = false;
        while (!success && deadline.hasTimeLeft()) {
            JobStatusResponse jobStatusResponse = JobManagerActorTestUtils.requestJobStatus(jobGraph.getJobID(), nonLeadingJobManager, deadline.timeLeft());
            if (jobStatusResponse instanceof JobManagerMessages.JobNotFound) {
                success = true;
            } else {
                log.info(((JobManagerMessages.CurrentJobStatus) jobStatusResponse).status().toString());
                Thread.sleep(100);
            }
        }
        if (!success) {
            fail("Non-leading JM was still holding reference to the job graph.");
        }
        Future<Object> jobRemoved = leadingJobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
        leadingJobManager.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
        Await.ready(jobRemoved, deadline.timeLeft());
    } finally {
        flink.shutdown();
    }
    // Verify that everything is clean
    verifyCleanRecoveryState(config);
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobStatusResponse(org.apache.flink.runtime.messages.JobManagerMessages.JobStatusResponse) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) TestActorRef(akka.testkit.TestActorRef) Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SubmittedJobGraph(org.apache.flink.runtime.jobmanager.SubmittedJobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) Test(org.junit.Test)

Example 9 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class ZooKeeperLeaderElectionITCase method testTaskManagerRegistrationAtReelectedLeader.

/**
	 * Tests that the TaskManagers successfully register at the new leader once the old leader
	 * is terminated.
	 */
@Test
public void testTaskManagerRegistrationAtReelectedLeader() throws Exception {
    File rootFolder = tempFolder.getRoot();
    Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
    int numJMs = 10;
    int numTMs = 3;
    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
    configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
    TestingCluster cluster = new TestingCluster(configuration);
    try {
        cluster.start();
        for (int i = 0; i < numJMs; i++) {
            ActorGateway leadingJM = cluster.getLeaderGateway(timeout);
            cluster.waitForTaskManagersToBeRegisteredAtJobManager(leadingJM.actor());
            Future<Object> registeredTMs = leadingJM.ask(JobManagerMessages.getRequestNumberRegisteredTaskManager(), timeout);
            int numRegisteredTMs = (Integer) Await.result(registeredTMs, timeout);
            assertEquals(numTMs, numRegisteredTMs);
            cluster.clearLeader();
            leadingJM.tell(PoisonPill.getInstance());
        }
    } finally {
        cluster.stop();
    }
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) File(java.io.File) Test(org.junit.Test)

Example 10 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class AbstractQueryableStateITCase method setup.

@BeforeClass
public static void setup() {
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 4);
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, NUM_TMS);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, NUM_SLOTS_PER_TM);
        config.setInteger(QueryableStateOptions.CLIENT_NETWORK_THREADS, 1);
        config.setBoolean(QueryableStateOptions.SERVER_ENABLE, true);
        config.setInteger(QueryableStateOptions.SERVER_NETWORK_THREADS, 1);
        cluster = new TestingCluster(config, false);
        cluster.start(true);
        TEST_ACTOR_SYSTEM = AkkaUtils.createDefaultActorSystem();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) BeforeClass(org.junit.BeforeClass)

Aggregations

TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)25 Configuration (org.apache.flink.configuration.Configuration)20 Test (org.junit.Test)19 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)15 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)14 Deadline (scala.concurrent.duration.Deadline)12 File (java.io.File)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)10 JobID (org.apache.flink.api.common.JobID)7 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)7 JavaTestKit (akka.testkit.JavaTestKit)6 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)6 FiniteDuration (scala.concurrent.duration.FiniteDuration)6 BeforeClass (org.junit.BeforeClass)5 TestingUtils.startTestingCluster (org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster)4 Scanner (java.util.Scanner)3 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)3