Search in sources :

Example 6 with RootExceptionHistoryEntry

use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.

the class ExecutionGraphInfoTest method testExecutionGraphHistoryBeingDerivedFromFailedExecutionGraph.

@Test
public void testExecutionGraphHistoryBeingDerivedFromFailedExecutionGraph() {
    final ArchivedExecutionGraph executionGraph = ArchivedExecutionGraph.createSparseArchivedExecutionGraph(new JobID(), "test job name", JobStatus.FAILED, new RuntimeException("Expected RuntimeException"), null, System.currentTimeMillis());
    final ExecutionGraphInfo executionGraphInfo = new ExecutionGraphInfo(executionGraph);
    final ErrorInfo failureInfo = executionGraphInfo.getArchivedExecutionGraph().getFailureInfo();
    final RootExceptionHistoryEntry actualEntry = Iterables.getOnlyElement(executionGraphInfo.getExceptionHistory());
    assertThat(failureInfo).isNotNull();
    assertThat(failureInfo.getException()).isEqualTo(actualEntry.getException());
    assertThat(failureInfo.getTimestamp()).isEqualTo(actualEntry.getTimestamp());
    assertThat(actualEntry.isGlobal()).isTrue();
    assertThat(actualEntry.getFailingTaskName()).isNull();
    assertThat(actualEntry.getTaskManagerLocation()).isNull();
}
Also used : RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) JobID(org.apache.flink.api.common.JobID) Test(org.junit.jupiter.api.Test)

Example 7 with RootExceptionHistoryEntry

use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.

the class DefaultSchedulerTest method testExceptionHistoryWithRestartableFailure.

@Test
public void testExceptionHistoryWithRestartableFailure() {
    final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
    final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
    final TestingLogicalSlotBuilder logicalSlotBuilder = new TestingLogicalSlotBuilder();
    logicalSlotBuilder.setTaskManagerLocation(taskManagerLocation);
    executionSlotAllocatorFactory = new TestExecutionSlotAllocatorFactory(logicalSlotBuilder);
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    // initiate restartable failure
    final ArchivedExecutionVertex taskFailureExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
    final RuntimeException restartableException = new RuntimeException("restartable exception");
    final long updateStateTriggeringRestartTimestamp = initiateFailure(scheduler, taskFailureExecutionVertex.getCurrentExecutionAttempt().getAttemptId(), restartableException);
    taskRestartExecutor.triggerNonPeriodicScheduledTask();
    // initiate job failure
    testRestartBackoffTimeStrategy.setCanRestart(false);
    final ExecutionAttemptID failingAttemptId = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices()).getCurrentExecutionAttempt().getAttemptId();
    final RuntimeException failingException = new RuntimeException("failing exception");
    final long updateStateTriggeringJobFailureTimestamp = initiateFailure(scheduler, failingAttemptId, failingException);
    final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = scheduler.getExceptionHistory();
    // assert restarted attempt
    assertThat(actualExceptionHistory, IsIterableContainingInOrder.contains(ExceptionHistoryEntryMatcher.matchesFailure(restartableException, updateStateTriggeringRestartTimestamp, taskFailureExecutionVertex.getTaskNameWithSubtaskIndex(), taskFailureExecutionVertex.getCurrentAssignedResourceLocation()), ExceptionHistoryEntryMatcher.matchesGlobalFailure(failingException, updateStateTriggeringJobFailureTimestamp)));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 8 with RootExceptionHistoryEntry

use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.

the class JobExceptionsHandlerTest method createExecutionGraphInfo.

// -------- exception history related utility methods for creating the input data --------
private static ExecutionGraphInfo createExecutionGraphInfo(RootExceptionHistoryEntry... historyEntries) {
    final ArchivedExecutionGraphBuilder executionGraphBuilder = new ArchivedExecutionGraphBuilder();
    final List<RootExceptionHistoryEntry> historyEntryCollection = new ArrayList<>();
    for (int i = 0; i < historyEntries.length; i++) {
        if (i == 0) {
            // first entry is root cause
            executionGraphBuilder.setFailureCause(new ErrorInfo(historyEntries[i].getException(), historyEntries[i].getTimestamp()));
        }
        historyEntryCollection.add(historyEntries[i]);
    }
    // we have to reverse it to simulate how the Scheduler collects it
    Collections.reverse(historyEntryCollection);
    return new ExecutionGraphInfo(executionGraphBuilder.build(), historyEntryCollection);
}
Also used : RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArrayList(java.util.ArrayList) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)

Example 9 with RootExceptionHistoryEntry

use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.

the class JobExceptionsHandlerTest method testWithExceptionHistory.

@Test
public void testWithExceptionHistory() throws HandlerRequestException {
    final RootExceptionHistoryEntry rootCause = fromGlobalFailure(new RuntimeException("exception #0"), System.currentTimeMillis());
    final RootExceptionHistoryEntry otherFailure = new RootExceptionHistoryEntry(new RuntimeException("exception #1"), System.currentTimeMillis(), "task name", new LocalTaskManagerLocation(), Collections.emptySet());
    final ExecutionGraphInfo executionGraphInfo = createExecutionGraphInfo(rootCause, otherFailure);
    final HandlerRequest<EmptyRequestBody> request = createRequest(executionGraphInfo.getJobId(), 10);
    final JobExceptionsInfoWithHistory response = testInstance.handleRequest(request, executionGraphInfo);
    assertThat(response.getExceptionHistory().getEntries(), contains(historyContainsGlobalFailure(rootCause.getException(), rootCause.getTimestamp()), historyContainsJobExceptionInfo(otherFailure.getException(), otherFailure.getTimestamp(), otherFailure.getFailingTaskName(), JobExceptionsHandler.toString(otherFailure.getTaskManagerLocation()))));
    assertFalse(response.getExceptionHistory().isTruncated());
}
Also used : RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) JobExceptionsInfoWithHistory(org.apache.flink.runtime.rest.messages.JobExceptionsInfoWithHistory) Test(org.junit.Test)

Example 10 with RootExceptionHistoryEntry

use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.

the class JobExceptionsHandlerTest method createAccessExecutionGraph.

private static ExecutionGraphInfo createAccessExecutionGraph(int numTasks) {
    Map<JobVertexID, ArchivedExecutionJobVertex> tasks = new HashMap<>();
    for (int i = 0; i < numTasks; i++) {
        final JobVertexID jobVertexId = new JobVertexID();
        tasks.put(jobVertexId, createArchivedExecutionJobVertex(jobVertexId));
    }
    final Throwable failureCause = new RuntimeException("root cause");
    final long failureTimestamp = System.currentTimeMillis();
    final List<RootExceptionHistoryEntry> exceptionHistory = Collections.singletonList(new RootExceptionHistoryEntry(failureCause, failureTimestamp, "test task #1", new LocalTaskManagerLocation(), Collections.emptySet()));
    return new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setFailureCause(new ErrorInfo(failureCause, failureTimestamp)).setTasks(tasks).build(), exceptionHistory);
}
Also used : ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) HashMap(java.util.HashMap) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) SerializedThrowable(org.apache.flink.util.SerializedThrowable) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)

Aggregations

RootExceptionHistoryEntry (org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry)19 Test (org.junit.Test)14 LocalTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)12 ArchivedExecutionVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)10 ArrayList (java.util.ArrayList)8 CompletableFuture (java.util.concurrent.CompletableFuture)8 JobID (org.apache.flink.api.common.JobID)8 IOException (java.io.IOException)7 Duration (java.time.Duration)7 Arrays (java.util.Arrays)7 List (java.util.List)7 Optional (java.util.Optional)7 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)7 BlockingQueue (java.util.concurrent.BlockingQueue)7 ExecutionException (java.util.concurrent.ExecutionException)7 Executors (java.util.concurrent.Executors)7 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)7 TimeUnit (java.util.concurrent.TimeUnit)7 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)7