Search in sources :

Example 1 with JobSubmissionResult

use of org.apache.flink.api.common.JobSubmissionResult in project flink by apache.

the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.

/**
	 * FLINK-5985
	 *
	 * This test ensures we can restore from a savepoint under modifications to the job graph that only concern
	 * stateless operators.
	 */
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
    // Config
    int numTaskManagers = 2;
    int numSlotsPerTaskManager = 2;
    int parallelism = 2;
    // Test deadline
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File tmpDir = CommonTestUtils.createTempDirectory();
    final File savepointDir = new File(tmpDir, "savepoints");
    TestingCluster flink = null;
    String savepointPath;
    try {
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
        LOG.info("Flink configuration: " + config + ".");
        // Start Flink
        flink = new TestingCluster(config);
        LOG.info("Starting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        final StatefulCounter statefulCounter = new StatefulCounter();
        StatefulCounter.resetForTest(parallelism);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 4 * value;
            }
        }).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 2 * value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
        JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
        JobID jobID = submissionResult.getJobID();
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
    try {
        LOG.info("Restarting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        // generate a modified job graph that adds a stateless op
        env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
        // Set the savepoint path
        modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
        // Submit the job
        flink.submitJobDetached(modifiedJobGraph);
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
}
Also used : RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) FileNotFoundException(java.io.FileNotFoundException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with JobSubmissionResult

use of org.apache.flink.api.common.JobSubmissionResult in project flink by apache.

the class ClientTest method shouldSubmitToJobClient.

/**
	 * This test verifies correct job submission messaging logic and plan translation calls.
	 */
@Test
public void shouldSubmitToJobClient() {
    try {
        jobManagerSystem.actorOf(Props.create(SuccessReturningActor.class), JobManager.JOB_MANAGER_NAME());
        ClusterClient out = new StandaloneClusterClient(config);
        out.setDetached(true);
        JobSubmissionResult result = out.run(program.getPlanWithJars(), 1);
        assertNotNull(result);
        program.deleteExtractedLibraries();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Example 3 with JobSubmissionResult

use of org.apache.flink.api.common.JobSubmissionResult in project flink by apache.

the class CliFrontend method executeProgram.

// --------------------------------------------------------------------------------------------
//  Interaction with programs and JobManager
// --------------------------------------------------------------------------------------------
protected int executeProgram(PackagedProgram program, ClusterClient client, int parallelism) {
    logAndSysout("Starting execution of program");
    JobSubmissionResult result;
    try {
        result = client.run(program, parallelism);
    } catch (ProgramParametrizationException e) {
        return handleParametrizationException(e);
    } catch (ProgramMissingJobException e) {
        return handleMissingJobException();
    } catch (ProgramInvocationException e) {
        return handleError(e);
    } finally {
        program.deleteExtractedLibraries();
    }
    if (null == result) {
        logAndSysout("No JobSubmissionResult returned, please make sure you called " + "ExecutionEnvironment.execute()");
        return 1;
    }
    if (result.isJobExecutionResult()) {
        logAndSysout("Program execution finished");
        JobExecutionResult execResult = result.getJobExecutionResult();
        System.out.println("Job with JobID " + execResult.getJobID() + " has finished.");
        System.out.println("Job Runtime: " + execResult.getNetRuntime() + " ms");
        Map<String, Object> accumulatorsResult = execResult.getAllAccumulatorResults();
        if (accumulatorsResult.size() > 0) {
            System.out.println("Accumulator Results: ");
            System.out.println(AccumulatorHelper.getResultsFormated(accumulatorsResult));
        }
    } else {
        logAndSysout("Job has been submitted with JobID " + result.getJobID());
    }
    return 0;
}
Also used : JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException)

Example 4 with JobSubmissionResult

use of org.apache.flink.api.common.JobSubmissionResult in project flink by apache.

the class ClusterClient method runDetached.

/**
	 * Submits a JobGraph detached.
	 * @param jobGraph The JobGraph
	 * @param classLoader User code class loader to deserialize the results and errors (may contain custom classes).
	 * @return JobSubmissionResult
	 * @throws ProgramInvocationException
	 */
public JobSubmissionResult runDetached(JobGraph jobGraph, ClassLoader classLoader) throws ProgramInvocationException {
    waitForClusterToBeReady();
    final ActorGateway jobManagerGateway;
    try {
        jobManagerGateway = getJobManagerGateway();
    } catch (Exception e) {
        throw new ProgramInvocationException("Failed to retrieve the JobManager gateway.", e);
    }
    try {
        logAndSysout("Submitting Job with JobID: " + jobGraph.getJobID() + ". Returning after job submission.");
        JobClient.submitJobDetached(jobManagerGateway, flinkConfig, jobGraph, timeout, classLoader);
        return new JobSubmissionResult(jobGraph.getJobID());
    } catch (JobExecutionException e) {
        throw new ProgramInvocationException("The program execution failed: " + e.getMessage(), e);
    }
}
Also used : JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobRetrievalException(org.apache.flink.runtime.client.JobRetrievalException) URISyntaxException(java.net.URISyntaxException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException)

Example 5 with JobSubmissionResult

use of org.apache.flink.api.common.JobSubmissionResult in project flink by apache.

the class SavepointMigrationTestBase method restoreAndExecute.

@SafeVarargs
protected final void restoreAndExecute(StreamExecutionEnvironment env, String savepointPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception {
    // Retrieve the job manager
    Await.result(cluster.leaderGateway().future(), DEADLINE.timeLeft());
    // Submit the job
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
    JobSubmissionResult jobSubmissionResult = cluster.submitJobDetached(jobGraph);
    StandaloneClusterClient clusterClient = new StandaloneClusterClient(cluster.configuration());
    boolean done = false;
    while (DEADLINE.hasTimeLeft()) {
        Thread.sleep(100);
        Map<String, Object> accumulators = clusterClient.getAccumulators(jobSubmissionResult.getJobID());
        boolean allDone = true;
        for (Tuple2<String, Integer> acc : expectedAccumulators) {
            Integer numFinished = (Integer) accumulators.get(acc.f0);
            if (numFinished == null) {
                allDone = false;
                break;
            }
            if (!numFinished.equals(acc.f1)) {
                allDone = false;
                break;
            }
        }
        if (allDone) {
            done = true;
            break;
        }
    }
    if (!done) {
        fail("Did not see the expected accumulator results within time limit.");
    }
}
Also used : JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) StandaloneClusterClient(org.apache.flink.client.program.StandaloneClusterClient)

Aggregations

JobSubmissionResult (org.apache.flink.api.common.JobSubmissionResult)7 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)3 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)3 File (java.io.File)2 IOException (java.io.IOException)2 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)2 StandaloneClusterClient (org.apache.flink.client.program.StandaloneClusterClient)2 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)2 Test (org.junit.Test)2 FileNotFoundException (java.io.FileNotFoundException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)1 JobID (org.apache.flink.api.common.JobID)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)1 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)1 ProgramMissingJobException (org.apache.flink.client.program.ProgramMissingJobException)1 ProgramParametrizationException (org.apache.flink.client.program.ProgramParametrizationException)1