use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.
the class CheckpointCoordinator method startTriggeringCheckpoint.
private void startTriggeringCheckpoint(CheckpointTriggerRequest request) {
try {
synchronized (lock) {
preCheckGlobalState(request.isPeriodic);
}
// we will actually trigger this checkpoint!
Preconditions.checkState(!isTriggering);
isTriggering = true;
final long timestamp = System.currentTimeMillis();
CompletableFuture<CheckpointPlan> checkpointPlanFuture = checkpointPlanCalculator.calculateCheckpointPlan();
boolean initializeBaseLocations = !baseLocationsForCheckpointInitialized;
baseLocationsForCheckpointInitialized = true;
final CompletableFuture<PendingCheckpoint> pendingCheckpointCompletableFuture = checkpointPlanFuture.thenApplyAsync(plan -> {
try {
CheckpointIdAndStorageLocation checkpointIdAndStorageLocation = initializeCheckpoint(request.props, request.externalSavepointLocation, initializeBaseLocations);
return new Tuple2<>(plan, checkpointIdAndStorageLocation);
} catch (Throwable e) {
throw new CompletionException(e);
}
}, executor).thenApplyAsync((checkpointInfo) -> createPendingCheckpoint(timestamp, request.props, checkpointInfo.f0, request.isPeriodic, checkpointInfo.f1.checkpointId, checkpointInfo.f1.checkpointStorageLocation, request.getOnCompletionFuture()), timer);
final CompletableFuture<?> coordinatorCheckpointsComplete = pendingCheckpointCompletableFuture.thenComposeAsync((pendingCheckpoint) -> OperatorCoordinatorCheckpoints.triggerAndAcknowledgeAllCoordinatorCheckpointsWithCompletion(coordinatorsToCheckpoint, pendingCheckpoint, timer), timer);
// We have to take the snapshot of the master hooks after the coordinator checkpoints
// has completed.
// This is to ensure the tasks are checkpointed after the OperatorCoordinators in case
// ExternallyInducedSource is used.
final CompletableFuture<?> masterStatesComplete = coordinatorCheckpointsComplete.thenComposeAsync(ignored -> {
// If the code reaches here, the pending checkpoint is guaranteed to
// be not null.
// We use FutureUtils.getWithoutException() to make compiler happy
// with checked
// exceptions in the signature.
PendingCheckpoint checkpoint = FutureUtils.getWithoutException(pendingCheckpointCompletableFuture);
return snapshotMasterState(checkpoint);
}, timer);
FutureUtils.assertNoException(CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete).handleAsync((ignored, throwable) -> {
final PendingCheckpoint checkpoint = FutureUtils.getWithoutException(pendingCheckpointCompletableFuture);
Preconditions.checkState(checkpoint != null || throwable != null, "Either the pending checkpoint needs to be created or an error must have occurred.");
if (throwable != null) {
// the initialization might not be finished yet
if (checkpoint == null) {
onTriggerFailure(request, throwable);
} else {
onTriggerFailure(checkpoint, throwable);
}
} else {
triggerCheckpointRequest(request, timestamp, checkpoint);
}
return null;
}, timer).exceptionally(error -> {
if (!isShutdown()) {
throw new CompletionException(error);
} else if (findThrowable(error, RejectedExecutionException.class).isPresent()) {
LOG.debug("Execution rejected during shutdown");
} else {
LOG.warn("Error encountered during shutdown", error);
}
return null;
}));
} catch (Throwable throwable) {
onTriggerFailure(request, throwable);
}
}
use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.
the class CheckpointCoordinator method triggerTasks.
private CompletableFuture<Void> triggerTasks(CheckpointTriggerRequest request, long timestamp, PendingCheckpoint checkpoint) {
// no exception, no discarding, everything is OK
final long checkpointId = checkpoint.getCheckpointID();
final SnapshotType type;
if (this.forceFullSnapshot && !request.props.isSavepoint()) {
type = CheckpointType.FULL_CHECKPOINT;
} else {
type = request.props.getCheckpointType();
}
final CheckpointOptions checkpointOptions = CheckpointOptions.forConfig(type, checkpoint.getCheckpointStorageLocation().getLocationReference(), isExactlyOnceMode, unalignedCheckpointsEnabled, alignedCheckpointTimeout);
// send messages to the tasks to trigger their checkpoints
List<CompletableFuture<Acknowledge>> acks = new ArrayList<>();
for (Execution execution : checkpoint.getCheckpointPlan().getTasksToTrigger()) {
if (request.props.isSynchronous()) {
acks.add(execution.triggerSynchronousSavepoint(checkpointId, timestamp, checkpointOptions));
} else {
acks.add(execution.triggerCheckpoint(checkpointId, timestamp, checkpointOptions));
}
}
return FutureUtils.waitForAll(acks);
}
use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.
the class DefaultCheckpointPlan method fulfillFinishedTaskStatus.
@Override
public void fulfillFinishedTaskStatus(Map<OperatorID, OperatorState> operatorStates) {
if (!mayHaveFinishedTasks) {
return;
}
Map<JobVertexID, ExecutionJobVertex> partlyFinishedVertex = new HashMap<>();
for (Execution task : finishedTasks) {
JobVertexID jobVertexId = task.getVertex().getJobvertexId();
if (!fullyFinishedOrFinishedOnRestoreVertices.containsKey(jobVertexId)) {
partlyFinishedVertex.put(jobVertexId, task.getVertex().getJobVertex());
}
}
checkNoPartlyFinishedVertexUsedUnionListState(partlyFinishedVertex, operatorStates);
checkNoPartlyOperatorsFinishedVertexUsedUnionListState(partlyFinishedVertex, operatorStates);
fulfillFullyFinishedOrFinishedOnRestoreOperatorStates(operatorStates);
fulfillSubtaskStateForPartiallyFinishedOperators(operatorStates);
}
use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.
the class DefaultCheckpointPlan method fulfillSubtaskStateForPartiallyFinishedOperators.
private void fulfillSubtaskStateForPartiallyFinishedOperators(Map<OperatorID, OperatorState> operatorStates) {
for (Execution finishedTask : finishedTasks) {
ExecutionJobVertex jobVertex = finishedTask.getVertex().getJobVertex();
for (OperatorIDPair operatorIDPair : jobVertex.getOperatorIDs()) {
OperatorState operatorState = operatorStates.get(operatorIDPair.getGeneratedOperatorID());
if (operatorState != null && operatorState.isFullyFinished()) {
continue;
}
if (operatorState == null) {
operatorState = new OperatorState(operatorIDPair.getGeneratedOperatorID(), jobVertex.getParallelism(), jobVertex.getMaxParallelism());
operatorStates.put(operatorIDPair.getGeneratedOperatorID(), operatorState);
}
operatorState.putState(finishedTask.getParallelSubtaskIndex(), FinishedOperatorSubtaskState.INSTANCE);
}
}
}
use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.
the class SchedulerBase method archiveFromFailureHandlingResult.
protected final void archiveFromFailureHandlingResult(FailureHandlingResultSnapshot failureHandlingResult) {
if (failureHandlingResult.getRootCauseExecution().isPresent()) {
final Execution rootCauseExecution = failureHandlingResult.getRootCauseExecution().get();
final RootExceptionHistoryEntry rootEntry = RootExceptionHistoryEntry.fromFailureHandlingResultSnapshot(failureHandlingResult);
exceptionHistory.add(rootEntry);
log.debug("Archive local failure causing attempt {} to fail: {}", rootCauseExecution.getAttemptId(), rootEntry.getExceptionAsString());
} else {
archiveGlobalFailure(failureHandlingResult.getRootCause(), failureHandlingResult.getTimestamp(), failureHandlingResult.getConcurrentlyFailedExecution());
}
}
Aggregations