Search in sources :

Example 21 with ProgramInvocationException

use of org.apache.flink.client.program.ProgramInvocationException in project flink by apache.

the class YarnClusterClientV2 method submitJob.

@Override
protected JobSubmissionResult submitJob(JobGraph jobGraph, ClassLoader classLoader) throws ProgramInvocationException {
    try {
        // Create application via yarnClient
        final YarnClientApplication yarnApplication = yarnClient.createApplication();
        ApplicationReport report = this.clusterDescriptor.startAppMaster(jobGraph, yarnClient, yarnApplication);
        if (report.getYarnApplicationState().equals(YarnApplicationState.RUNNING)) {
            appId = report.getApplicationId();
            trackingURL = report.getTrackingUrl();
            logAndSysout("Please refer to " + getWebInterfaceURL() + " for the running status of job " + jobGraph.getJobID().toString());
            //TODO: not support attach mode now
            return new JobSubmissionResult(jobGraph.getJobID());
        } else {
            throw new ProgramInvocationException("Fail to submit the job.");
        }
    } catch (Exception e) {
        throw new ProgramInvocationException("Fail to submit the job", e.getCause());
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) IOException(java.io.IOException)

Example 22 with ProgramInvocationException

use of org.apache.flink.client.program.ProgramInvocationException in project flink by apache.

the class CustomSerializationITCase method testIncorrectSerializer2.

@Test
public void testIncorrectSerializer2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARLLELISM);
        env.getConfig().disableSysoutLogging();
        env.generateSequence(1, 10 * PARLLELISM).map(new MapFunction<Long, ConsumesTooMuchSpanning>() {

            @Override
            public ConsumesTooMuchSpanning map(Long value) throws Exception {
                return new ConsumesTooMuchSpanning();
            }
        }).rebalance().output(new DiscardingOutputFormat<ConsumesTooMuchSpanning>());
        env.execute();
    } catch (ProgramInvocationException e) {
        Throwable rootCause = e.getCause().getCause();
        assertTrue(rootCause instanceof IOException);
        assertTrue(rootCause.getMessage().contains("broken serialization"));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) IOException(java.io.IOException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) IOException(java.io.IOException) Test(org.junit.Test)

Example 23 with ProgramInvocationException

use of org.apache.flink.client.program.ProgramInvocationException in project flink by apache.

the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.

@Test
public void testCancelingOnProcessFailure() {
    final StringWriter processOutput = new StringWriter();
    ActorSystem jmActorSystem = null;
    Process taskManagerProcess = null;
    try {
        // check that we run this test only if the java command
        // is available on this machine
        String javaCommand = getJavaCommandPath();
        if (javaCommand == null) {
            System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        // find a free port to start the JobManager
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        Configuration jmConfig = new Configuration();
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "5 s");
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "2000 s");
        jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 10);
        jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
        jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        // the TaskManager java command
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), AbstractTaskManagerProcessFailureRecoveryTest.TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
        // start the first two TaskManager processes
        taskManagerProcess = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
        // we wait for the JobManager to have the two TaskManagers available
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 1, 120000);
        final Throwable[] errorRef = new Throwable[1];
        // start the test program, which infinitely blocks 
        Runnable programRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jobManagerPort);
                    env.setParallelism(2);
                    env.setRestartStrategy(RestartStrategies.noRestart());
                    env.getConfig().disableSysoutLogging();
                    env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {

                        @Override
                        public Long map(Long value) throws Exception {
                            synchronized (this) {
                                wait();
                            }
                            return 0L;
                        }
                    }).output(new DiscardingOutputFormat<Long>());
                    env.execute();
                } catch (Throwable t) {
                    errorRef[0] = t;
                }
            }
        };
        Thread programThread = new Thread(programRunner);
        // kill the TaskManager
        taskManagerProcess.destroy();
        taskManagerProcess = null;
        // immediately submit the job. this should hit the case
        // where the JobManager still thinks it has the TaskManager and tries to send it tasks
        programThread.start();
        // try to cancel the job
        cancelRunningJob(jmActor);
        // we should see a failure within reasonable time (10s is the ask timeout).
        // since the CI environment is often slow, we conservatively give it up to 2 minutes, 
        // to fail, which is much lower than the failure time given by the heartbeats ( > 2000s)
        programThread.join(120000);
        assertFalse("The program did not cancel in time (2 minutes)", programThread.isAlive());
        Throwable error = errorRef[0];
        assertNotNull("The program did not fail properly", error);
        assertTrue(error instanceof ProgramInvocationException);
    // all seems well :-)
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog("TaskManager", processOutput.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput.toString());
        throw e;
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobManager(org.apache.flink.runtime.jobmanager.JobManager) MapFunction(org.apache.flink.api.common.functions.MapFunction) StringWriter(java.io.StringWriter) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) TimeoutException(java.util.concurrent.TimeoutException) Some(scala.Some) Tuple2(scala.Tuple2) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) File(java.io.File) Test(org.junit.Test)

Aggregations

ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)23 Test (org.junit.Test)13 IOException (java.io.IOException)8 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)8 Configuration (org.apache.flink.configuration.Configuration)7 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)5 File (java.io.File)4 URL (java.net.URL)4 Properties (java.util.Properties)3 PackagedProgram (org.apache.flink.client.program.PackagedProgram)3 Path (org.apache.flink.core.fs.Path)3 CompilerException (org.apache.flink.optimizer.CompilerException)3 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)3 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 FileNotFoundException (java.io.FileNotFoundException)2 StringWriter (java.io.StringWriter)2 MalformedURLException (java.net.MalformedURLException)2 ArrayList (java.util.ArrayList)2 JobSubmissionResult (org.apache.flink.api.common.JobSubmissionResult)2 ClusterClient (org.apache.flink.client.program.ClusterClient)2