use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.
the class RescalingITCase method testSavepointRescalingNonPartitionedStateCausesException.
/**
* Tests that a job cannot be restarted from a savepoint with a different parallelism if the
* rescaled operator has non-partitioned state.
*
* @throws Exception
*/
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
final int parallelism = numSlots / 2;
final int parallelism2 = numSlots;
final int maxParallelism = 13;
FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
JobID jobID = null;
ActorGateway jobManager = null;
try {
jobManager = cluster.getLeaderGateway(deadline.timeLeft());
JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
jobID = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
Object savepointResponse = null;
// wait until the operator is started
StateSourceBase.workStartedLatch.await();
Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
savepointResponse = Await.result(savepointPathFuture, waitingTime);
assertTrue(String.valueOf(savepointResponse), savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
Await.ready(jobRemovedFuture, deadline.timeLeft());
// job successfully removed
jobID = null;
JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);
scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
jobID = scaledJobGraph.getJobID();
cluster.submitJobAndWait(scaledJobGraph, false);
jobID = null;
} catch (JobExecutionException exception) {
if (exception.getCause() instanceof IllegalStateException) {
// we expect a IllegalStateException wrapped
// in a JobExecutionException, because the job containing non-partitioned state
// is being rescaled
} else {
throw exception;
}
} finally {
// clear any left overs from a possibly failed job
if (jobID != null && jobManager != null) {
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
try {
Await.ready(jobRemovedFuture, timeout);
} catch (TimeoutException | InterruptedException ie) {
fail("Failed while cleaning up the cluster.");
}
}
}
}
use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.
the class StateBackendITCase method testStateBackendWithoutCheckpointing.
/**
* Verify that the user-specified state backend is used even if checkpointing is disabled.
*
* @throws Exception
*/
@Test
public void testStateBackendWithoutCheckpointing() throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
see.setStateBackend(new FailingStateBackend());
see.fromElements(new Tuple2<>("Hello", 1)).keyBy(0).map(new RichMapFunction<Tuple2<String, Integer>, String>() {
private static final long serialVersionUID = 1L;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
getRuntimeContext().getState(new ValueStateDescriptor<Integer>("Test", Integer.class, 0));
}
@Override
public String map(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
}).print();
try {
see.execute();
fail();
} catch (JobExecutionException e) {
Throwable t = e.getCause();
assertTrue("wrong exception", t instanceof SuccessException);
}
}
use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.
the class StreamTaskTimerITCase method testOneInputOperatorWithoutChaining.
/**
* Note: this test fails if we don't check for exceptions in the source contexts and do not
* synchronize in the source contexts.
*/
@Test
public void testOneInputOperatorWithoutChaining() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(timeCharacteristic);
env.setParallelism(1);
DataStream<String> source = env.addSource(new InfiniteTestSource());
source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.NEVER));
boolean testSuccess = false;
try {
env.execute("Timer test");
} catch (JobExecutionException e) {
if (e.getCause() instanceof TimerException) {
TimerException te = (TimerException) e.getCause();
if (te.getCause() instanceof RuntimeException) {
RuntimeException re = (RuntimeException) te.getCause();
if (re.getMessage().equals("TEST SUCCESS")) {
testSuccess = true;
} else {
throw e;
}
} else {
throw e;
}
} else {
throw e;
}
}
Assert.assertTrue(testSuccess);
}
use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.
the class StreamTaskTimerITCase method testOperatorChainedToSource.
/**
* Note: this test fails if we don't check for exceptions in the source contexts and do not
* synchronize in the source contexts.
*/
@Test
public void testOperatorChainedToSource() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(timeCharacteristic);
env.setParallelism(1);
DataStream<String> source = env.addSource(new InfiniteTestSource());
source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));
boolean testSuccess = false;
try {
env.execute("Timer test");
} catch (JobExecutionException e) {
if (e.getCause() instanceof TimerException) {
TimerException te = (TimerException) e.getCause();
if (te.getCause() instanceof RuntimeException) {
RuntimeException re = (RuntimeException) te.getCause();
if (re.getMessage().equals("TEST SUCCESS")) {
testSuccess = true;
} else {
throw e;
}
} else {
throw e;
}
} else {
throw e;
}
}
Assert.assertTrue(testSuccess);
}
use of org.apache.flink.runtime.client.JobExecutionException in project flink by apache.
the class JobMaster method jobStatusChanged.
private void jobStatusChanged(final JobStatus newJobStatus, long timestamp, final Throwable error) {
validateRunsInMainThread();
final JobID jobID = executionGraph.getJobID();
final String jobName = executionGraph.getJobName();
log.info("Status of job {} ({}) changed to {}.", jobID, jobName, newJobStatus, error);
if (newJobStatus.isGloballyTerminalState()) {
switch(newJobStatus) {
case FINISHED:
try {
// TODO get correct job duration
// job done, let's get the accumulators
Map<String, Object> accumulatorResults = executionGraph.getAccumulators();
JobExecutionResult result = new JobExecutionResult(jobID, 0L, accumulatorResults);
jobCompletionActions.jobFinished(result);
} catch (Exception e) {
log.error("Cannot fetch final accumulators for job {} ({})", jobName, jobID, e);
final JobExecutionException exception = new JobExecutionException(jobID, "Failed to retrieve accumulator results. " + "The job is registered as 'FINISHED (successful), but this notification describes " + "a failure, since the resulting accumulators could not be fetched.", e);
jobCompletionActions.jobFailed(exception);
}
break;
case CANCELED:
{
final JobExecutionException exception = new JobExecutionException(jobID, "Job was cancelled.", new Exception("The job was cancelled"));
jobCompletionActions.jobFailed(exception);
break;
}
case FAILED:
{
final Throwable unpackedError = SerializedThrowable.get(error, userCodeLoader);
final JobExecutionException exception = new JobExecutionException(jobID, "Job execution failed.", unpackedError);
jobCompletionActions.jobFailed(exception);
break;
}
default:
// this can happen only if the enum is buggy
throw new IllegalStateException(newJobStatus.toString());
}
}
}
Aggregations