use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator.
/**
* Tests that the checkpoint coordinator is shut down if the execution graph is suspended.
*/
@Test
public void testClosingSchedulerSuspendsExecutionGraphAndShutsDownCheckpointCoordinator() throws Exception {
final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
final ExecutionGraph graph = scheduler.getExecutionGraph();
final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
assertThat(checkpointCoordinator, Matchers.notNullValue());
assertThat(checkpointCoordinator.isShutdown(), is(false));
scheduler.closeAsync().get();
assertThat(graph.getState(), is(JobStatus.SUSPENDED));
assertThat(checkpointCoordinator.isShutdown(), is(true));
assertThat(counterShutdownFuture.get(), is(JobStatus.SUSPENDED));
assertThat(storeShutdownFuture.get(), is(JobStatus.SUSPENDED));
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class DefaultSchedulerCheckpointCoordinatorTest method testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph.
/**
* Tests that the checkpoint coordinator is shut down if the execution graph is failed.
*/
@Test
public void testClosingSchedulerShutsDownCheckpointCoordinatorOnFailedExecutionGraph() throws Exception {
final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
CheckpointIDCounter counter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(counterShutdownFuture);
final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
CompletedCheckpointStore store = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(storeShutdownFuture);
final SchedulerBase scheduler = createSchedulerAndEnableCheckpointing(counter, store);
final ExecutionGraph graph = scheduler.getExecutionGraph();
final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();
assertThat(checkpointCoordinator, Matchers.notNullValue());
assertThat(checkpointCoordinator.isShutdown(), is(false));
graph.failJob(new Exception("Test Exception"), System.currentTimeMillis());
scheduler.closeAsync().get();
assertThat(checkpointCoordinator.isShutdown(), is(true));
assertThat(counterShutdownFuture.get(), is(JobStatus.FAILED));
assertThat(storeShutdownFuture.get(), is(JobStatus.FAILED));
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class Dispatcher method jobReachedTerminalState.
protected CleanupJobState jobReachedTerminalState(ExecutionGraphInfo executionGraphInfo) {
final ArchivedExecutionGraph archivedExecutionGraph = executionGraphInfo.getArchivedExecutionGraph();
final JobStatus terminalJobStatus = archivedExecutionGraph.getState();
Preconditions.checkArgument(terminalJobStatus.isTerminalState(), "Job %s is in state %s which is not terminal.", archivedExecutionGraph.getJobID(), terminalJobStatus);
// the failureInfo contains the reason for why job was failed/suspended, but for
// finished/canceled jobs it may contain the last cause of a restart (if there were any)
// for finished/canceled jobs we don't want to print it because it is misleading
final boolean isFailureInfoRelatedToJobTermination = terminalJobStatus == JobStatus.SUSPENDED || terminalJobStatus == JobStatus.FAILED;
if (archivedExecutionGraph.getFailureInfo() != null && isFailureInfoRelatedToJobTermination) {
log.info("Job {} reached terminal state {}.\n{}", archivedExecutionGraph.getJobID(), terminalJobStatus, archivedExecutionGraph.getFailureInfo().getExceptionAsString().trim());
} else {
log.info("Job {} reached terminal state {}.", archivedExecutionGraph.getJobID(), terminalJobStatus);
}
archiveExecutionGraph(executionGraphInfo);
if (terminalJobStatus.isGloballyTerminalState()) {
final JobID jobId = executionGraphInfo.getJobId();
try {
if (jobResultStore.hasCleanJobResultEntry(jobId)) {
log.warn("Job {} is already marked as clean but clean up was triggered again.", jobId);
} else if (!jobResultStore.hasDirtyJobResultEntry(jobId)) {
jobResultStore.createDirtyResult(new JobResultEntry(JobResult.createFrom(executionGraphInfo.getArchivedExecutionGraph())));
log.info("Job {} has been registered for cleanup in the JobResultStore after reaching a terminal state.", jobId);
}
} catch (IOException e) {
fatalErrorHandler.onFatalError(new FlinkException(String.format("The job %s couldn't be marked as pre-cleanup finished in JobResultStore.", jobId), e));
}
}
return terminalJobStatus.isGloballyTerminalState() ? CleanupJobState.GLOBAL : CleanupJobState.LOCAL;
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class ClientUtilsTest method testWaitUntilJobInitializationFinished_throwsInitializationException.
/**
* Ensure that the waitUntilJobInitializationFinished() method throws
* JobInitializationException.
*/
@Test
public void testWaitUntilJobInitializationFinished_throwsInitializationException() {
Iterator<JobStatus> statusSequenceIterator = Arrays.asList(JobStatus.INITIALIZING, JobStatus.INITIALIZING, JobStatus.FAILED).iterator();
CommonTestUtils.assertThrows("Something is wrong", JobInitializationException.class, () -> {
ClientUtils.waitUntilJobInitializationFinished(statusSequenceIterator::next, () -> {
Throwable throwable = new JobInitializationException(TESTING_JOB_ID, "Something is wrong", new RuntimeException("Err"));
return buildJobResult(throwable);
}, ClassLoader.getSystemClassLoader());
return null;
});
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class RestClusterClientTest method testNotShowSuspendedJobStatus.
/**
* The SUSPENDED job status should never be returned by the client thus client retries until it
* either receives a different job status or the cluster is not reachable.
*/
@Test
public void testNotShowSuspendedJobStatus() throws Exception {
final List<JobDetailsInfo> jobDetails = new ArrayList<>();
jobDetails.add(buildJobDetail(JobStatus.SUSPENDED));
jobDetails.add(buildJobDetail(JobStatus.RUNNING));
final TestJobStatusHandler jobStatusHandler = new TestJobStatusHandler(jobDetails.iterator());
try (TestRestServerEndpoint restServerEndpoint = createRestServerEndpoint(jobStatusHandler)) {
final RestClusterClient<?> restClusterClient = createRestClusterClient(restServerEndpoint.getServerAddress().getPort());
try {
final CompletableFuture<JobStatus> future = restClusterClient.getJobStatus(jobId);
assertEquals(JobStatus.RUNNING, future.get());
} finally {
restClusterClient.close();
}
}
}
Aggregations