use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class DispatcherTest method testJobStatusIsShownDuringTermination.
@Test
public void testJobStatusIsShownDuringTermination() throws Exception {
final JobID blockingId = new JobID();
haServices.setJobMasterLeaderElectionService(blockingId, new TestingLeaderElectionService());
final JobManagerRunnerWithBlockingTerminationFactory jobManagerRunnerFactory = new JobManagerRunnerWithBlockingTerminationFactory(blockingId);
dispatcher = createAndStartDispatcher(heartbeatServices, haServices, jobManagerRunnerFactory);
final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
final JobGraph blockedJobGraph = JobGraphTestUtils.singleNoOpJobGraph();
blockedJobGraph.setJobID(blockingId);
// Submit two jobs, one blocks forever
dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();
dispatcherGateway.submitJob(blockedJobGraph, TIMEOUT).get();
// Trigger termination
final CompletableFuture<Void> terminationFuture = dispatcher.closeAsync();
// ensure job eventually transitions to SUSPENDED state
try {
CommonTestUtils.waitUntilCondition(() -> {
JobStatus status = dispatcherGateway.requestExecutionGraphInfo(jobId, TIMEOUT).get().getArchivedExecutionGraph().getState();
return status == JobStatus.SUSPENDED;
}, Deadline.fromNow(TimeUtils.toDuration(TIMEOUT)), 5L);
} finally {
// Unblock the termination of the second job
jobManagerRunnerFactory.unblockTermination();
terminationFuture.get();
}
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class ArchivedExecutionGraph method createFrom.
/**
* Create a {@link ArchivedExecutionGraph} from the given {@link ExecutionGraph}.
*
* @param executionGraph to create the ArchivedExecutionGraph from
* @param statusOverride optionally overrides the JobStatus of the ExecutionGraph with a
* non-globally-terminal state and clears timestamps of globally-terminal states
* @return ArchivedExecutionGraph created from the given ExecutionGraph
*/
public static ArchivedExecutionGraph createFrom(ExecutionGraph executionGraph, @Nullable JobStatus statusOverride) {
Preconditions.checkArgument(statusOverride == null || !statusOverride.isGloballyTerminalState(), "Status override is only allowed for non-globally-terminal states.");
Map<JobVertexID, ArchivedExecutionJobVertex> archivedTasks = new HashMap<>();
List<ArchivedExecutionJobVertex> archivedVerticesInCreationOrder = new ArrayList<>();
for (ExecutionJobVertex task : executionGraph.getVerticesTopologically()) {
ArchivedExecutionJobVertex archivedTask = task.archive();
archivedVerticesInCreationOrder.add(archivedTask);
archivedTasks.put(task.getJobVertexId(), archivedTask);
}
final Map<String, SerializedValue<OptionalFailure<Object>>> serializedUserAccumulators = executionGraph.getAccumulatorsSerialized();
final long[] timestamps = new long[JobStatus.values().length];
// if the state is overridden with a non-globally-terminal state then we need to erase
// traces of globally-terminal states for consistency
final boolean clearGloballyTerminalStateTimestamps = statusOverride != null;
for (JobStatus jobStatus : JobStatus.values()) {
final int ordinal = jobStatus.ordinal();
if (!(clearGloballyTerminalStateTimestamps && jobStatus.isGloballyTerminalState())) {
timestamps[ordinal] = executionGraph.getStatusTimestamp(jobStatus);
}
}
return new ArchivedExecutionGraph(executionGraph.getJobID(), executionGraph.getJobName(), archivedTasks, archivedVerticesInCreationOrder, timestamps, statusOverride == null ? executionGraph.getState() : statusOverride, executionGraph.getFailureInfo(), executionGraph.getJsonPlan(), executionGraph.getAccumulatorResultsStringified(), serializedUserAccumulators, executionGraph.getArchivedExecutionConfig(), executionGraph.isStoppable(), executionGraph.getCheckpointCoordinatorConfiguration(), executionGraph.getCheckpointStatsSnapshot(), executionGraph.getStateBackendName().orElse(null), executionGraph.getCheckpointStorageName().orElse(null));
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class DefaultExecutionGraph method cancel.
@Override
public void cancel() {
assertRunningInJobMasterMainThread();
while (true) {
JobStatus current = state;
if (current == JobStatus.RUNNING || current == JobStatus.CREATED || current == JobStatus.RESTARTING) {
if (transitionState(current, JobStatus.CANCELLING)) {
incrementRestarts();
final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;
// cancel ongoing scheduling action
if (ongoingSchedulingFuture != null) {
ongoingSchedulingFuture.cancel(false);
}
final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
allTerminal.whenComplete((Void value, Throwable throwable) -> {
if (throwable != null) {
transitionState(JobStatus.CANCELLING, JobStatus.FAILED, new FlinkException("Could not cancel job " + getJobName() + " because not all execution job vertices could be cancelled.", throwable));
} else {
// cancellations may currently be overridden by failures which
// trigger
// restarts, so we need to pass a proper restart global version
// here
allVerticesInTerminalState();
}
});
return;
}
} else // all vertices to be in their final state.
if (current == JobStatus.FAILING) {
if (transitionState(current, JobStatus.CANCELLING)) {
return;
}
} else {
// no need to treat other states
return;
}
}
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class JobDetailsHandler method createJobDetailsInfo.
private static JobDetailsInfo createJobDetailsInfo(AccessExecutionGraph executionGraph, @Nullable MetricFetcher metricFetcher) {
final long now = System.currentTimeMillis();
final long startTime = executionGraph.getStatusTimestamp(JobStatus.INITIALIZING);
final long endTime = executionGraph.getState().isGloballyTerminalState() ? executionGraph.getStatusTimestamp(executionGraph.getState()) : -1L;
final long duration = (endTime > 0L ? endTime : now) - startTime;
final Map<JobStatus, Long> timestamps = new HashMap<>(JobStatus.values().length);
for (JobStatus jobStatus : JobStatus.values()) {
timestamps.put(jobStatus, executionGraph.getStatusTimestamp(jobStatus));
}
Collection<JobDetailsInfo.JobVertexDetailsInfo> jobVertexInfos = new ArrayList<>(executionGraph.getAllVertices().size());
int[] jobVerticesPerState = new int[ExecutionState.values().length];
for (AccessExecutionJobVertex accessExecutionJobVertex : executionGraph.getVerticesTopologically()) {
final JobDetailsInfo.JobVertexDetailsInfo vertexDetailsInfo = createJobVertexDetailsInfo(accessExecutionJobVertex, now, executionGraph.getJobID(), metricFetcher);
jobVertexInfos.add(vertexDetailsInfo);
jobVerticesPerState[vertexDetailsInfo.getExecutionState().ordinal()]++;
}
Map<ExecutionState, Integer> jobVerticesPerStateMap = new HashMap<>(ExecutionState.values().length);
for (ExecutionState executionState : ExecutionState.values()) {
jobVerticesPerStateMap.put(executionState, jobVerticesPerState[executionState.ordinal()]);
}
return new JobDetailsInfo(executionGraph.getJobID(), executionGraph.getJobName(), executionGraph.isStoppable(), executionGraph.getState(), startTime, endTime, duration, executionGraph.getArchivedExecutionConfig().getMaxParallelism(), now, timestamps, jobVertexInfos, jobVerticesPerStateMap, executionGraph.getJsonPlan());
}
use of org.apache.flink.api.common.JobStatus in project flink by apache.
the class DefaultCompletedCheckpointStoreTest method testShutdownFailsAnyFutureCallsToAddCheckpoint.
@Test
public void testShutdownFailsAnyFutureCallsToAddCheckpoint() throws Exception {
final CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner();
for (JobStatus status : JobStatus.values()) {
final CompletedCheckpointStore completedCheckpointStore = createCompletedCheckpointStore(builder.build());
completedCheckpointStore.shutdown(status, checkpointsCleaner);
assertThrows(IllegalStateException.class, () -> completedCheckpointStore.addCheckpointAndSubsumeOldestOne(CompletedCheckpointStoreTest.createCheckpoint(0L, new SharedStateRegistryImpl()), checkpointsCleaner, () -> {
// No-op.
}));
}
}
Aggregations