use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.
the class DefaultSchedulerTest method testExceptionHistoryConcurrentRestart.
@Test
public void testExceptionHistoryConcurrentRestart() throws Exception {
final JobGraph jobGraph = singleJobVertexJobGraph(2);
final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
final TestingLogicalSlotBuilder logicalSlotBuilder = new TestingLogicalSlotBuilder();
logicalSlotBuilder.setTaskManagerLocation(taskManagerLocation);
executionSlotAllocatorFactory = new TestExecutionSlotAllocatorFactory(logicalSlotBuilder);
final ReorganizableManuallyTriggeredScheduledExecutor delayExecutor = new ReorganizableManuallyTriggeredScheduledExecutor();
final TestFailoverStrategyFactory failoverStrategyFactory = new TestFailoverStrategyFactory();
final DefaultScheduler scheduler = createScheduler(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread(), new PipelinedRegionSchedulingStrategy.Factory(), failoverStrategyFactory, delayExecutor);
scheduler.startScheduling();
final ExecutionVertex executionVertex0 = Iterables.get(scheduler.getExecutionGraph().getAllExecutionVertices(), 0);
final ExecutionVertex executionVertex1 = Iterables.get(scheduler.getExecutionGraph().getAllExecutionVertices(), 1);
// single-ExecutionVertex failure
final RuntimeException exception0 = new RuntimeException("failure #0");
failoverStrategyFactory.setTasksToRestart(executionVertex0.getID());
final long updateStateTriggeringRestartTimestamp0 = initiateFailure(scheduler, executionVertex0.getCurrentExecutionAttempt().getAttemptId(), exception0);
// multi-ExecutionVertex failure
final RuntimeException exception1 = new RuntimeException("failure #1");
failoverStrategyFactory.setTasksToRestart(executionVertex1.getID(), executionVertex0.getID());
final long updateStateTriggeringRestartTimestamp1 = initiateFailure(scheduler, executionVertex1.getCurrentExecutionAttempt().getAttemptId(), exception1);
// there might be a race condition with the delayExecutor if the tasks are scheduled quite
// close to each other which we want to simulate here
Collections.reverse(delayExecutor.getCollectedScheduledTasks());
delayExecutor.triggerNonPeriodicScheduledTasks();
assertThat(scheduler.getExceptionHistory(), IsIterableWithSize.iterableWithSize(2));
final Iterator<RootExceptionHistoryEntry> actualExceptionHistory = scheduler.getExceptionHistory().iterator();
final RootExceptionHistoryEntry entry0 = actualExceptionHistory.next();
assertThat(entry0, is(ExceptionHistoryEntryMatcher.matchesFailure(exception0, updateStateTriggeringRestartTimestamp0, executionVertex0.getTaskNameWithSubtaskIndex(), executionVertex0.getCurrentAssignedResourceLocation())));
assertThat(entry0.getConcurrentExceptions(), IsIterableContainingInOrder.contains(ExceptionHistoryEntryMatcher.matchesFailure(exception1, updateStateTriggeringRestartTimestamp1, executionVertex1.getTaskNameWithSubtaskIndex(), executionVertex1.getCurrentAssignedResourceLocation())));
final RootExceptionHistoryEntry entry1 = actualExceptionHistory.next();
assertThat(entry1, is(ExceptionHistoryEntryMatcher.matchesFailure(exception1, updateStateTriggeringRestartTimestamp1, executionVertex1.getTaskNameWithSubtaskIndex(), executionVertex1.getCurrentAssignedResourceLocation())));
assertThat(entry1.getConcurrentExceptions(), IsEmptyIterable.emptyIterable());
}
use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailure.
@Test
public void testExceptionHistoryWithTaskFailure() throws Exception {
final Exception expectedException = new Exception("Expected Local Exception");
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
final ExecutionAttemptID attemptId = attemptIds.get(1);
scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
};
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic);
assertThat(actualExceptionHistory).hasSize(1);
final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskConcurrentGlobalFailure.
@Test
public void testExceptionHistoryWithTaskConcurrentGlobalFailure() throws Exception {
final Exception expectedException1 = new Exception("Expected Global Exception 1");
final Exception expectedException2 = new Exception("Expected Global Exception 2");
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
scheduler.handleGlobalFailure(expectedException1);
scheduler.handleGlobalFailure(expectedException2);
};
final Iterable<RootExceptionHistoryEntry> entries = runExceptionHistoryTests(testLogic);
assertThat(entries).hasSize(1);
final RootExceptionHistoryEntry failure = entries.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException1);
final Iterable<ExceptionHistoryEntry> concurrentExceptions = failure.getConcurrentExceptions();
final List<Throwable> foundExceptions = IterableUtils.toStream(concurrentExceptions).map(ExceptionHistoryEntry::getException).map(exception -> exception.deserializeError(classLoader)).collect(Collectors.toList());
assertThat(foundExceptions).containsExactly(expectedException2);
}
use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailureFromStopWithSavepoint.
@Test
public void testExceptionHistoryWithTaskFailureFromStopWithSavepoint() throws Exception {
final Exception expectedException = new Exception("Expected Local Exception");
Consumer<JobGraph> setupJobGraph = jobGraph -> jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
Consumer<AdaptiveSchedulerBuilder> setupScheduler = builder -> builder.setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner);
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
final ExecutionAttemptID attemptId = attemptIds.get(1);
scheduler.stopWithSavepoint("file:///tmp/target", true, SavepointFormatType.CANONICAL);
scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
};
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic, setupScheduler, setupJobGraph);
assertThat(actualExceptionHistory).hasSize(1);
final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
use of org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry in project flink by apache.
the class DefaultJobMasterServiceProcessTest method testInitializationFailureSetsExceptionHistoryProperly.
@Test
public void testInitializationFailureSetsExceptionHistoryProperly() throws ExecutionException, InterruptedException {
final CompletableFuture<JobMasterService> jobMasterServiceFuture = new CompletableFuture<>();
DefaultJobMasterServiceProcess serviceProcess = createTestInstance(jobMasterServiceFuture);
final RuntimeException originalCause = new RuntimeException("Expected RuntimeException");
long beforeFailureTimestamp = System.currentTimeMillis();
jobMasterServiceFuture.completeExceptionally(originalCause);
long afterFailureTimestamp = System.currentTimeMillis();
final RootExceptionHistoryEntry entry = Iterables.getOnlyElement(serviceProcess.getResultFuture().get().getExecutionGraphInfo().getExceptionHistory());
assertInitializationException(entry.getException(), originalCause, entry.getTimestamp(), beforeFailureTimestamp, afterFailureTimestamp);
assertThat(entry.isGlobal()).isTrue();
}
Aggregations