Search in sources :

Example 16 with JobExecutionException

use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.

the class RescalingITCase method testSavepointRescalingNonPartitionedStateCausesException.

/**
	 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
	 * rescaled operator has non-partitioned state.
	 *
	 * @throws Exception
	 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
    final int parallelism = numSlots / 2;
    final int parallelism2 = numSlots;
    final int maxParallelism = 13;
    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    JobID jobID = null;
    ActorGateway jobManager = null;
    try {
        jobManager = cluster.getLeaderGateway(deadline.timeLeft());
        JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
        jobID = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        Object savepointResponse = null;
        // wait until the operator is started
        StateSourceBase.workStartedLatch.await();
        Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
        savepointResponse = Await.result(savepointPathFuture, waitingTime);
        assertTrue(String.valueOf(savepointResponse), savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
        final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
        Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
        Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
        Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
        assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
        Await.ready(jobRemovedFuture, deadline.timeLeft());
        // job successfully removed
        jobID = null;
        JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
        scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        jobID = scaledJobGraph.getJobID();
        cluster.submitJobAndWait(scaledJobGraph, false);
        jobID = null;
    } catch (JobExecutionException exception) {
        if (exception.getCause() instanceof IllegalStateException) {
        // we expect a IllegalStateException wrapped
        // in a JobExecutionException, because the job containing non-partitioned state
        // is being rescaled
        } else {
            throw exception;
        }
    } finally {
        // clear any left overs from a possibly failed job
        if (jobID != null && jobManager != null) {
            Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
            try {
                Await.ready(jobRemovedFuture, timeout);
            } catch (TimeoutException | InterruptedException ie) {
                fail("Failed while cleaning up the cluster.");
            }
        }
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 17 with JobExecutionException

use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.

the class StateBackendITCase method testStateBackendWithoutCheckpointing.

/**
	 * Verify that the user-specified state backend is used even if checkpointing is disabled.
	 *
	 * @throws Exception
	 */
@Test
public void testStateBackendWithoutCheckpointing() throws Exception {
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    see.setStateBackend(new FailingStateBackend());
    see.fromElements(new Tuple2<>("Hello", 1)).keyBy(0).map(new RichMapFunction<Tuple2<String, Integer>, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            getRuntimeContext().getState(new ValueStateDescriptor<Integer>("Test", Integer.class, 0));
        }

        @Override
        public String map(Tuple2<String, Integer> value) throws Exception {
            return value.f0;
        }
    }).print();
    try {
        see.execute();
        fail();
    } catch (JobExecutionException e) {
        Throwable t = e.getCause();
        assertTrue("wrong exception", t instanceof SuccessException);
    }
}
Also used : JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Configuration(org.apache.flink.configuration.Configuration) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 18 with JobExecutionException

use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.

the class StreamTaskTimerITCase method testOneInputOperatorWithoutChaining.

/**
	 * Note: this test fails if we don't check for exceptions in the source contexts and do not
	 * synchronize in the source contexts.
	 */
@Test
public void testOneInputOperatorWithoutChaining() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(timeCharacteristic);
    env.setParallelism(1);
    DataStream<String> source = env.addSource(new InfiniteTestSource());
    source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.NEVER));
    boolean testSuccess = false;
    try {
        env.execute("Timer test");
    } catch (JobExecutionException e) {
        if (e.getCause() instanceof TimerException) {
            TimerException te = (TimerException) e.getCause();
            if (te.getCause() instanceof RuntimeException) {
                RuntimeException re = (RuntimeException) te.getCause();
                if (re.getMessage().equals("TEST SUCCESS")) {
                    testSuccess = true;
                } else {
                    throw e;
                }
            } else {
                throw e;
            }
        } else {
            throw e;
        }
    }
    Assert.assertTrue(testSuccess);
}
Also used : TimerException(org.apache.flink.streaming.runtime.tasks.TimerException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 19 with JobExecutionException

use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.

the class StreamTaskTimerITCase method testOperatorChainedToSource.

/**
	 * Note: this test fails if we don't check for exceptions in the source contexts and do not
	 * synchronize in the source contexts.
	 */
@Test
public void testOperatorChainedToSource() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(timeCharacteristic);
    env.setParallelism(1);
    DataStream<String> source = env.addSource(new InfiniteTestSource());
    source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));
    boolean testSuccess = false;
    try {
        env.execute("Timer test");
    } catch (JobExecutionException e) {
        if (e.getCause() instanceof TimerException) {
            TimerException te = (TimerException) e.getCause();
            if (te.getCause() instanceof RuntimeException) {
                RuntimeException re = (RuntimeException) te.getCause();
                if (re.getMessage().equals("TEST SUCCESS")) {
                    testSuccess = true;
                } else {
                    throw e;
                }
            } else {
                throw e;
            }
        } else {
            throw e;
        }
    }
    Assert.assertTrue(testSuccess);
}
Also used : TimerException(org.apache.flink.streaming.runtime.tasks.TimerException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 20 with JobExecutionException

use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.

the class JobMaster method jobStatusChanged.

private void jobStatusChanged(final JobStatus newJobStatus, long timestamp, final Throwable error) {
    validateRunsInMainThread();
    final JobID jobID = executionGraph.getJobID();
    final String jobName = executionGraph.getJobName();
    log.info("Status of job {} ({}) changed to {}.", jobID, jobName, newJobStatus, error);
    if (newJobStatus.isGloballyTerminalState()) {
        switch(newJobStatus) {
            case FINISHED:
                try {
                    // TODO get correct job duration
                    // job done, let's get the accumulators
                    Map<String, Object> accumulatorResults = executionGraph.getAccumulators();
                    JobExecutionResult result = new JobExecutionResult(jobID, 0L, accumulatorResults);
                    jobCompletionActions.jobFinished(result);
                } catch (Exception e) {
                    log.error("Cannot fetch final accumulators for job {} ({})", jobName, jobID, e);
                    final JobExecutionException exception = new JobExecutionException(jobID, "Failed to retrieve accumulator results. " + "The job is registered as 'FINISHED (successful), but this notification describes " + "a failure, since the resulting accumulators could not be fetched.", e);
                    jobCompletionActions.jobFailed(exception);
                }
                break;
            case CANCELED:
                {
                    final JobExecutionException exception = new JobExecutionException(jobID, "Job was cancelled.", new Exception("The job was cancelled"));
                    jobCompletionActions.jobFailed(exception);
                    break;
                }
            case FAILED:
                {
                    final Throwable unpackedError = SerializedThrowable.get(error, userCodeLoader);
                    final JobExecutionException exception = new JobExecutionException(jobID, "Job execution failed.", unpackedError);
                    jobCompletionActions.jobFailed(exception);
                    break;
                }
            default:
                // this can happen only if the enum is buggy
                throw new IllegalStateException(newJobStatus.toString());
        }
    }
}
Also used : JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) LeaderIdMismatchException(org.apache.flink.runtime.highavailability.LeaderIdMismatchException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException)

Aggregations

JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)21 Test (org.junit.Test)10 IOException (java.io.IOException)8 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 URISyntaxException (java.net.URISyntaxException)3 JobID (org.apache.flink.api.common.JobID)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 CompilerException (org.apache.flink.optimizer.CompilerException)3 JobRetrievalException (org.apache.flink.runtime.client.JobRetrievalException)3 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)3 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)3 TimerException (org.apache.flink.streaming.runtime.tasks.TimerException)3 Properties (java.util.Properties)2 TimeoutException (java.util.concurrent.TimeoutException)2 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)2 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 JobSubmissionException (org.apache.flink.runtime.client.JobSubmissionException)2