use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class JobMasterStopWithSavepointITCase method testRestartCheckpointCoordinatorIfStopWithSavepointFails.
@Test
public void testRestartCheckpointCoordinatorIfStopWithSavepointFails() throws Exception {
setUpJobGraph(CheckpointCountingTask.class, RestartStrategies.noRestart());
try {
Files.setPosixFilePermissions(savepointDirectory, Collections.emptySet());
} catch (IOException e) {
Assume.assumeNoException(e);
}
try {
stopWithSavepoint(true).get();
fail();
} catch (Exception e) {
Optional<CheckpointException> checkpointExceptionOptional = ExceptionUtils.findThrowable(e, CheckpointException.class);
if (!checkpointExceptionOptional.isPresent()) {
throw e;
}
String exceptionMessage = checkpointExceptionOptional.get().getMessage();
assertTrue("Stop with savepoint failed because of another cause " + exceptionMessage, exceptionMessage.contains(CheckpointFailureReason.IO_EXCEPTION.message()));
}
final JobStatus jobStatus = clusterClient.getJobStatus(jobGraph.getJobID()).get(60, TimeUnit.SECONDS);
assertThat(jobStatus, equalTo(JobStatus.RUNNING));
// assert that checkpoints are continued to be triggered
checkpointsToWaitFor = new CountDownLatch(1);
assertTrue(checkpointsToWaitFor.await(60L, TimeUnit.SECONDS));
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class TestJobExecutor method assertFinishedSuccessfully.
public TestJobExecutor assertFinishedSuccessfully() throws Exception {
LOG.debug("assertFinishedSuccessfully");
JobStatus jobStatus = miniClusterResource.getClusterClient().getJobStatus(jobID).get();
if (!jobStatus.equals(FINISHED)) {
String message = String.format("Job didn't finish successfully, status: %s", jobStatus);
Optional<SerializedThrowable> throwable = miniClusterResource.getClusterClient().requestJobResult(jobID).get().getSerializedThrowable();
if (throwable.isPresent()) {
throw new AssertionError(message, throwable.get());
} else {
fail(message);
}
}
return this;
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class JobMasterTriggerSavepointITCase method testStopJobAfterSavepoint.
@Test
public void testStopJobAfterSavepoint() throws Exception {
setUpWithCheckpointInterval(10L);
final String savepointLocation = cancelWithSavepoint();
final JobStatus jobStatus = clusterClient.getJobStatus(jobGraph.getJobID()).get();
assertThat(jobStatus, isOneOf(JobStatus.CANCELED, JobStatus.CANCELLING));
final List<Path> savepoints;
try (Stream<Path> savepointFiles = Files.list(savepointDirectory)) {
savepoints = savepointFiles.map(Path::getFileName).collect(Collectors.toList());
}
assertThat(savepoints, hasItem(Paths.get(savepointLocation).getFileName()));
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class JobMasterTriggerSavepointITCase method waitForJob.
private void waitForJob() throws Exception {
for (int i = 0; i < 60; i++) {
try {
final JobStatus jobStatus = clusterClient.getJobStatus(jobGraph.getJobID()).get(60, TimeUnit.SECONDS);
assertThat(jobStatus.isGloballyTerminalState(), equalTo(false));
if (jobStatus == JobStatus.RUNNING) {
return;
}
} catch (ExecutionException ignored) {
// JobManagerRunner is not yet registered in Dispatcher
}
Thread.sleep(1000);
}
throw new AssertionError("Job did not become running within timeout.");
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class AbstractOperatorRestoreTestBase method migrateJob.
private String migrateJob(ClusterClient<?> clusterClient, Deadline deadline) throws Throwable {
URL savepointResource = AbstractOperatorRestoreTestBase.class.getClassLoader().getResource("operatorstate/" + getMigrationSavepointName());
if (savepointResource == null) {
throw new IllegalArgumentException("Savepoint file does not exist.");
}
JobGraph jobToMigrate = createJobGraph(ExecutionMode.MIGRATE);
jobToMigrate.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointResource.getFile()));
assertNotNull(jobToMigrate.getJobID());
clusterClient.submitJob(jobToMigrate).get();
CompletableFuture<JobStatus> jobRunningFuture = FutureUtils.retrySuccessfulWithDelay(() -> clusterClient.getJobStatus(jobToMigrate.getJobID()), Time.milliseconds(50), deadline, (jobStatus) -> jobStatus == JobStatus.RUNNING, TestingUtils.defaultScheduledExecutor());
assertEquals(JobStatus.RUNNING, jobRunningFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
// Trigger savepoint
File targetDirectory = tmpFolder.newFolder();
String savepointPath = null;
// FLINK-4714)
while (deadline.hasTimeLeft() && savepointPath == null) {
try {
savepointPath = clusterClient.cancelWithSavepoint(jobToMigrate.getJobID(), targetDirectory.getAbsolutePath(), SavepointFormatType.CANONICAL).get();
} catch (Exception e) {
String exceptionString = ExceptionUtils.stringifyException(e);
if (!PATTERN_CANCEL_WITH_SAVEPOINT_TOLERATED_EXCEPTIONS.matcher(exceptionString).find()) {
throw e;
}
}
}
assertNotNull("Could not take savepoint.", savepointPath);
CompletableFuture<JobStatus> jobCanceledFuture = FutureUtils.retrySuccessfulWithDelay(() -> clusterClient.getJobStatus(jobToMigrate.getJobID()), Time.milliseconds(50), deadline, (jobStatus) -> jobStatus == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor());
assertEquals(JobStatus.CANCELED, jobCanceledFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
return savepointPath;
}
Aggregations