Search in sources :

Example 11 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.

/**
	 * FLINK-5985
	 *
	 * This test ensures we can restore from a savepoint under modifications to the job graph that only concern
	 * stateless operators.
	 */
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
    // Config
    int numTaskManagers = 2;
    int numSlotsPerTaskManager = 2;
    int parallelism = 2;
    // Test deadline
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File tmpDir = CommonTestUtils.createTempDirectory();
    final File savepointDir = new File(tmpDir, "savepoints");
    TestingCluster flink = null;
    String savepointPath;
    try {
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
        LOG.info("Flink configuration: " + config + ".");
        // Start Flink
        flink = new TestingCluster(config);
        LOG.info("Starting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        final StatefulCounter statefulCounter = new StatefulCounter();
        StatefulCounter.resetForTest(parallelism);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 4 * value;
            }
        }).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 2 * value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
        JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
        JobID jobID = submissionResult.getJobID();
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
    try {
        LOG.info("Restarting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        // generate a modified job graph that adds a stateless op
        env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
        // Set the savepoint path
        modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
        // Submit the job
        flink.submitJobDetached(modifiedJobGraph);
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
}
Also used : RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) FileNotFoundException(java.io.FileNotFoundException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 12 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class AccumulatorLiveITCase method before.

@Before
public void before() throws Exception {
    system = AkkaUtils.createLocalActorSystem(new Configuration());
    Configuration config = new Configuration();
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
    config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT());
    TestingCluster testingCluster = new TestingCluster(config, false, true);
    testingCluster.start();
    jobManagerGateway = testingCluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
    taskManager = testingCluster.getTaskManagersAsJava().get(0);
    // generate test data
    for (int i = 0; i < NUM_ITERATIONS; i++) {
        inputData.add(i, String.valueOf(i + 1));
    }
    NotifyingMapper.finished = false;
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) Before(org.junit.Before)

Example 13 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class ClassLoaderITCase method setUp.

@BeforeClass
public static void setUp() throws Exception {
    FOLDER.create();
    Configuration config = new Configuration();
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
    config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 2);
    parallelism = 4;
    // we need to use the "filesystem" state backend to ensure FLINK-2543 is not happening again.
    config.setString(CoreOptions.STATE_BACKEND, "filesystem");
    config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, FOLDER.newFolder().getAbsoluteFile().toURI().toString());
    // Savepoint path
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, FOLDER.newFolder().getAbsoluteFile().toURI().toString());
    testCluster = new TestingCluster(config, false);
    testCluster.start();
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) BeforeClass(org.junit.BeforeClass)

Example 14 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobRetrievalITCase method before.

@BeforeClass
public static void before() {
    cluster = new TestingCluster(new Configuration(), false);
    cluster.start();
}
Also used : TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) Configuration(org.apache.flink.configuration.Configuration) BeforeClass(org.junit.BeforeClass)

Example 15 with TestingCluster

use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.

the class JobClientActorRecoveryITCase method testJobClientRecovery.

/**
	 * Tests wether the JobClientActor can connect to a newly elected leading job manager to obtain
	 * the JobExecutionResult. The submitted job blocks for the first execution attempt. The
	 * leading job manager will be killed so that the second job manager will be elected as the
	 * leader. The newly elected leader has to retrieve the checkpointed job from ZooKeeper
	 * and continue its execution. This time, the job does not block and, thus, can be finished.
	 * The execution result should be sent to the JobClientActor which originally submitted the
	 * job.
	 *
	 * @throws Exception
	 */
@Test
public void testJobClientRecovery() throws Exception {
    File rootFolder = tempFolder.getRoot();
    Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
    config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
    config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
    final TestingCluster cluster = new TestingCluster(config);
    cluster.start();
    JobVertex blockingVertex = new JobVertex("Blocking Vertex");
    blockingVertex.setInvokableClass(BlockingTask.class);
    blockingVertex.setParallelism(1);
    final JobGraph jobGraph = new JobGraph("Blocking Test Job", blockingVertex);
    final Promise<JobExecutionResult> promise = new scala.concurrent.impl.Promise.DefaultPromise<>();
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    try {
        Thread submitter = new Thread(new Runnable() {

            @Override
            public void run() {
                try {
                    JobExecutionResult result = cluster.submitJobAndWait(jobGraph, false);
                    promise.success(result);
                } catch (Exception e) {
                    promise.failure(e);
                }
            }
        });
        submitter.start();
        synchronized (BlockingTask.waitLock) {
            while (BlockingTask.HasBlockedExecution < 1 && deadline.hasTimeLeft()) {
                BlockingTask.waitLock.wait(deadline.timeLeft().toMillis());
            }
        }
        if (deadline.isOverdue()) {
            Assert.fail("The job has not blocked within the given deadline.");
        }
        ActorGateway gateway = cluster.getLeaderGateway(deadline.timeLeft());
        gateway.tell(TestingJobManagerMessages.getDisablePostStop());
        gateway.tell(PoisonPill.getInstance());
        // if the job fails then an exception is thrown here
        Await.result(promise.future(), deadline.timeLeft());
    } finally {
        cluster.shutdown();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) File(java.io.File) Test(org.junit.Test)

Aggregations

TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)25 Configuration (org.apache.flink.configuration.Configuration)20 Test (org.junit.Test)19 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)15 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)14 Deadline (scala.concurrent.duration.Deadline)12 File (java.io.File)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)10 JobID (org.apache.flink.api.common.JobID)7 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)7 JavaTestKit (akka.testkit.JavaTestKit)6 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)6 FiniteDuration (scala.concurrent.duration.FiniteDuration)6 BeforeClass (org.junit.BeforeClass)5 TestingUtils.startTestingCluster (org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster)4 Scanner (java.util.Scanner)3 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)3