Search in sources :

Example 21 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class JobExceptionsHandlerTest method createAccessExecutionGraph.

private static ExecutionGraphInfo createAccessExecutionGraph(int numTasks) {
    Map<JobVertexID, ArchivedExecutionJobVertex> tasks = new HashMap<>();
    for (int i = 0; i < numTasks; i++) {
        final JobVertexID jobVertexId = new JobVertexID();
        tasks.put(jobVertexId, createArchivedExecutionJobVertex(jobVertexId));
    }
    final Throwable failureCause = new RuntimeException("root cause");
    final long failureTimestamp = System.currentTimeMillis();
    final List<RootExceptionHistoryEntry> exceptionHistory = Collections.singletonList(new RootExceptionHistoryEntry(failureCause, failureTimestamp, "test task #1", new LocalTaskManagerLocation(), Collections.emptySet()));
    return new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setFailureCause(new ErrorInfo(failureCause, failureTimestamp)).setTasks(tasks).build(), exceptionHistory);
}
Also used : ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) HashMap(java.util.HashMap) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) SerializedThrowable(org.apache.flink.util.SerializedThrowable) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)

Example 22 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class JobExceptionsHandler method createJobExceptionsInfo.

private static JobExceptionsInfoWithHistory createJobExceptionsInfo(ExecutionGraphInfo executionGraphInfo, int exceptionToReportMaxSize) {
    final ArchivedExecutionGraph executionGraph = executionGraphInfo.getArchivedExecutionGraph();
    if (executionGraph.getFailureInfo() == null) {
        return new JobExceptionsInfoWithHistory();
    }
    List<JobExceptionsInfo.ExecutionExceptionInfo> taskExceptionList = new ArrayList<>();
    boolean truncated = false;
    for (AccessExecutionVertex task : executionGraph.getAllExecutionVertices()) {
        Optional<ErrorInfo> failure = task.getFailureInfo();
        if (failure.isPresent()) {
            if (taskExceptionList.size() >= exceptionToReportMaxSize) {
                truncated = true;
                break;
            }
            TaskManagerLocation location = task.getCurrentAssignedResourceLocation();
            String locationString = toString(location);
            long timestamp = task.getStateTimestamp(ExecutionState.FAILED);
            taskExceptionList.add(new JobExceptionsInfo.ExecutionExceptionInfo(failure.get().getExceptionAsString(), task.getTaskNameWithSubtaskIndex(), locationString, timestamp == 0 ? -1 : timestamp));
        }
    }
    final ErrorInfo rootCause = executionGraph.getFailureInfo();
    return new JobExceptionsInfoWithHistory(rootCause.getExceptionAsString(), rootCause.getTimestamp(), taskExceptionList, truncated, createJobExceptionHistory(executionGraphInfo.getExceptionHistory(), exceptionToReportMaxSize));
}
Also used : TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArrayList(java.util.ArrayList) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) JobExceptionsInfo(org.apache.flink.runtime.rest.messages.JobExceptionsInfo) JobExceptionsInfoWithHistory(org.apache.flink.runtime.rest.messages.JobExceptionsInfoWithHistory) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 23 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class DefaultSchedulerTest method testExceptionHistoryWithPreDeployFailure.

@Test
public void testExceptionHistoryWithPreDeployFailure() {
    // disable auto-completing slot requests to simulate timeout
    executionSlotAllocatorFactory.getTestExecutionSlotAllocator().disableAutoCompletePendingRequests();
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(singleNonParallelJobVertexJobGraph());
    executionSlotAllocatorFactory.getTestExecutionSlotAllocator().timeoutPendingRequests();
    final ArchivedExecutionVertex taskFailureExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
    // pending slot request timeout triggers a task failure that needs to be processed
    taskRestartExecutor.triggerNonPeriodicScheduledTask();
    // sanity check that the TaskManagerLocation of the failed task is indeed null, as expected
    assertThat(taskFailureExecutionVertex.getCurrentAssignedResourceLocation(), is(nullValue()));
    final ErrorInfo failureInfo = taskFailureExecutionVertex.getFailureInfo().orElseThrow(() -> new AssertionError("A failureInfo should be set."));
    final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = scheduler.getExceptionHistory();
    assertThat(actualExceptionHistory, IsIterableContainingInOrder.contains(ExceptionHistoryEntryMatcher.matchesFailure(failureInfo.getException(), failureInfo.getTimestamp(), taskFailureExecutionVertex.getTaskNameWithSubtaskIndex(), taskFailureExecutionVertex.getCurrentAssignedResourceLocation())));
}
Also used : RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 24 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class ExceptionHistoryEntryTest method testWithMissingTaskManagerLocation.

@Test
public void testWithMissingTaskManagerLocation() {
    final Exception failure = new Exception("Expected failure");
    final long timestamp = System.currentTimeMillis();
    final String taskName = "task name";
    final ExceptionHistoryEntry entry = ExceptionHistoryEntry.create(TestingAccessExecution.newBuilder().withTaskManagerLocation(null).withErrorInfo(new ErrorInfo(failure, timestamp)).build(), taskName);
    assertThat(entry.getException().deserializeError(ClassLoader.getSystemClassLoader()), is(failure));
    assertThat(entry.getTimestamp(), is(timestamp));
    assertThat(entry.getFailingTaskName(), is(taskName));
    assertThat(entry.getTaskManagerLocation(), is(nullValue()));
    assertThat(entry.isGlobal(), is(false));
}
Also used : ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) Test(org.junit.Test)

Example 25 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class FailingTest method testTaskFailuresAreIgnored.

@Test
public void testTaskFailuresAreIgnored() throws Exception {
    try (MockFailingContext ctx = new MockFailingContext()) {
        StateTrackingMockExecutionGraph meg = new StateTrackingMockExecutionGraph();
        Failing failing = createFailingState(ctx, meg);
        // register execution at EG
        Exception exception = new RuntimeException();
        TestingAccessExecution execution = TestingAccessExecution.newBuilder().withExecutionState(ExecutionState.FAILED).withErrorInfo(new ErrorInfo(exception, System.currentTimeMillis())).build();
        meg.registerExecution(execution);
        TaskExecutionStateTransition update = ExecutingTest.createFailingStateTransition(execution.getAttemptId(), exception);
        failing.updateTaskExecutionState(update);
        ctx.assertNoStateTransition();
    }
}
Also used : TestingAccessExecution(org.apache.flink.runtime.scheduler.exceptionhistory.TestingAccessExecution) TaskExecutionStateTransition(org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) Test(org.junit.Test)

Aggregations

ErrorInfo (org.apache.flink.runtime.executiongraph.ErrorInfo)27 Test (org.junit.Test)17 JobID (org.apache.flink.api.common.JobID)9 TaskExecutionStateTransition (org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition)8 TestingAccessExecution (org.apache.flink.runtime.scheduler.exceptionhistory.TestingAccessExecution)8 ArchivedExecutionGraphBuilder (org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)7 FlinkException (org.apache.flink.util.FlinkException)7 JobStatus (org.apache.flink.api.common.JobStatus)5 CompletableFuture (java.util.concurrent.CompletableFuture)4 ArchivedExecutionGraph (org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph)4 ExecutionGraphInfo (org.apache.flink.runtime.scheduler.ExecutionGraphInfo)4 RootExceptionHistoryEntry (org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry)4 Duration (java.time.Duration)3 Deadline (org.apache.flink.api.common.time.Deadline)3 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)3 ArchivedExecutionVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex)3 LocalTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)3 File (java.io.File)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2