use of org.apache.flink.runtime.executiongraph.failover.flip1.FailureHandlingResult in project flink by apache.
the class DefaultScheduler method handleGlobalFailure.
@Override
public void handleGlobalFailure(final Throwable error) {
final long timestamp = System.currentTimeMillis();
setGlobalFailureCause(error, timestamp);
log.info("Trying to recover from a global failure.", error);
final FailureHandlingResult failureHandlingResult = executionFailureHandler.getGlobalFailureHandlingResult(error, timestamp);
maybeRestartTasks(failureHandlingResult);
}
use of org.apache.flink.runtime.executiongraph.failover.flip1.FailureHandlingResult in project flink by apache.
the class FailureHandlingResultSnapshot method create.
/**
* Creates a {@code FailureHandlingResultSnapshot} based on the passed {@link
* FailureHandlingResult} and {@link ExecutionVertex ExecutionVertices}.
*
* @param failureHandlingResult The {@code FailureHandlingResult} that is used for extracting
* the failure information.
* @param latestExecutionLookup The look-up function for retrieving the latest {@link Execution}
* instance for a given {@link ExecutionVertexID}.
* @return The {@code FailureHandlingResultSnapshot}.
*/
public static FailureHandlingResultSnapshot create(FailureHandlingResult failureHandlingResult, Function<ExecutionVertexID, Execution> latestExecutionLookup) {
final Execution rootCauseExecution = failureHandlingResult.getExecutionVertexIdOfFailedTask().map(latestExecutionLookup).orElse(null);
Preconditions.checkArgument(rootCauseExecution == null || rootCauseExecution.getFailureInfo().isPresent(), String.format("The execution %s didn't provide a failure info even though the corresponding ExecutionVertex %s is marked as having handled the root cause of this failure.", // added to make the compiler happy
rootCauseExecution != null ? rootCauseExecution.getAttemptId() : "(null)", failureHandlingResult.getExecutionVertexIdOfFailedTask().map(Objects::toString).orElse("(null)")));
final ExecutionVertexID rootCauseExecutionVertexId = failureHandlingResult.getExecutionVertexIdOfFailedTask().orElse(null);
final Set<Execution> concurrentlyFailedExecutions = failureHandlingResult.getVerticesToRestart().stream().filter(executionVertexId -> !executionVertexId.equals(rootCauseExecutionVertexId)).map(latestExecutionLookup).filter(execution -> execution.getFailureInfo().isPresent()).collect(Collectors.toSet());
return new FailureHandlingResultSnapshot(rootCauseExecution, ErrorInfo.handleMissingThrowable(failureHandlingResult.getError()), failureHandlingResult.getTimestamp(), concurrentlyFailedExecutions);
}
use of org.apache.flink.runtime.executiongraph.failover.flip1.FailureHandlingResult in project flink by apache.
the class FailureHandlingResultSnapshotTest method testGlobalFailureHandlingResultSnapshotCreation.
@Test
public void testGlobalFailureHandlingResultSnapshotCreation() {
final Throwable rootCause = new FlinkException("Expected exception: root cause");
final long timestamp = System.currentTimeMillis();
final ExecutionVertex failedExecutionVertex0 = extractExecutionVertex(0);
final Throwable failure0 = new RuntimeException("Expected exception: failure #0");
final ExecutionVertex failedExecutionVertex1 = extractExecutionVertex(1);
final Throwable failure1 = new IllegalStateException("Expected exception: failure #1");
triggerFailure(failedExecutionVertex0, failure0);
triggerFailure(failedExecutionVertex1, failure1);
final FailureHandlingResult failureHandlingResult = FailureHandlingResult.restartable(null, rootCause, timestamp, StreamSupport.stream(executionGraph.getAllExecutionVertices().spliterator(), false).map(ExecutionVertex::getID).collect(Collectors.toSet()), 0L, true);
final FailureHandlingResultSnapshot testInstance = FailureHandlingResultSnapshot.create(failureHandlingResult, this::getLatestExecution);
assertThat(testInstance.getRootCause(), is(rootCause));
assertThat(testInstance.getTimestamp(), is(timestamp));
assertThat(testInstance.getRootCauseExecution().isPresent(), is(false));
assertThat(testInstance.getConcurrentlyFailedExecution(), IsIterableContainingInAnyOrder.containsInAnyOrder(failedExecutionVertex0.getCurrentExecutionAttempt(), failedExecutionVertex1.getCurrentExecutionAttempt()));
}
use of org.apache.flink.runtime.executiongraph.failover.flip1.FailureHandlingResult in project flink by apache.
the class FailureHandlingResultSnapshotTest method testMissingThrowableHandling.
// see FLINK-22060/FLINK-21376
@Test
public void testMissingThrowableHandling() {
final ExecutionVertex rootCauseExecutionVertex = extractExecutionVertex(0);
final long rootCauseTimestamp = triggerFailure(rootCauseExecutionVertex, null);
final FailureHandlingResult failureHandlingResult = FailureHandlingResult.restartable(rootCauseExecutionVertex.getID(), null, rootCauseTimestamp, StreamSupport.stream(executionGraph.getAllExecutionVertices().spliterator(), false).map(ExecutionVertex::getID).collect(Collectors.toSet()), 0L, false);
final FailureHandlingResultSnapshot testInstance = FailureHandlingResultSnapshot.create(failureHandlingResult, this::getLatestExecution);
final Throwable actualException = new SerializedThrowable(testInstance.getRootCause()).deserializeError(ClassLoader.getSystemClassLoader());
assertThat(actualException, IsInstanceOf.instanceOf(FlinkException.class));
assertThat(actualException, FlinkMatchers.containsMessage(ErrorInfo.handleMissingThrowable(null).getMessage()));
assertThat(testInstance.getTimestamp(), is(rootCauseTimestamp));
assertThat(testInstance.getRootCauseExecution().isPresent(), is(true));
assertThat(testInstance.getRootCauseExecution().get(), is(rootCauseExecutionVertex.getCurrentExecutionAttempt()));
}
use of org.apache.flink.runtime.executiongraph.failover.flip1.FailureHandlingResult in project flink by apache.
the class DefaultScheduler method handleTaskFailure.
private void handleTaskFailure(final ExecutionVertexID executionVertexId, @Nullable final Throwable error) {
final long timestamp = System.currentTimeMillis();
setGlobalFailureCause(error, timestamp);
notifyCoordinatorsAboutTaskFailure(executionVertexId, error);
final FailureHandlingResult failureHandlingResult = executionFailureHandler.getFailureHandlingResult(executionVertexId, error, timestamp);
maybeRestartTasks(failureHandlingResult);
}
Aggregations