Search in sources :

Example 16 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class CommonTestUtils method waitForAllTaskRunning.

public static void waitForAllTaskRunning(SupplierWithException<AccessExecutionGraph, Exception> executionGraphSupplier, Deadline timeout, boolean allowFinished) throws Exception {
    Predicate<AccessExecutionVertex> subtaskPredicate = task -> {
        switch(task.getExecutionState()) {
            case RUNNING:
                return true;
            case FINISHED:
                if (allowFinished) {
                    return true;
                } else {
                    throw new RuntimeException("Sub-Task finished unexpectedly" + task);
                }
            default:
                return false;
        }
    };
    waitUntilCondition(() -> {
        final AccessExecutionGraph graph = executionGraphSupplier.get();
        if (graph.getState().isGloballyTerminalState()) {
            final ErrorInfo failureInfo = graph.getFailureInfo();
            fail(format("Graph is in globally terminal state (%s)", graph.getState()), failureInfo != null ? failureInfo.getException() : null);
        }
        return graph.getState() == JobStatus.RUNNING && graph.getAllVertices().values().stream().allMatch(jobVertex -> Arrays.stream(jobVertex.getTaskVertices()).allMatch(subtaskPredicate));
    }, timeout);
}
Also used : Assertions.fail(org.junit.jupiter.api.Assertions.fail) Deadline(org.apache.flink.api.common.time.Deadline) Arrays(java.util.Arrays) BufferedInputStream(java.io.BufferedInputStream) FileUtils(org.apache.flink.util.FileUtils) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) TimeoutException(java.util.concurrent.TimeoutException) JobStatus(org.apache.flink.api.common.JobStatus) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Duration(java.time.Duration) Map(java.util.Map) MiniCluster(org.apache.flink.runtime.minicluster.MiniCluster) ManagementFactory(java.lang.management.ManagementFactory) PrintWriter(java.io.PrintWriter) RuntimeMXBean(java.lang.management.RuntimeMXBean) Predicate(java.util.function.Predicate) JobDetailsInfo(org.apache.flink.runtime.rest.messages.job.JobDetailsInfo) StringWriter(java.io.StringWriter) Collection(java.util.Collection) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) FileWriter(java.io.FileWriter) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) IOException(java.io.IOException) JobClient(org.apache.flink.core.execution.JobClient) File(java.io.File) String.format(java.lang.String.format) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) JobID(org.apache.flink.api.common.JobID) ChronoUnit(java.time.temporal.ChronoUnit) Stream(java.util.stream.Stream) SupplierWithException(org.apache.flink.util.function.SupplierWithException) InputStream(java.io.InputStream) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 17 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class DispatcherTest method testRetrieveJobResultAfterSubmissionOfFailedJob.

@Test
public void testRetrieveJobResultAfterSubmissionOfFailedJob() throws Exception {
    dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(jobId, createdJobManagerRunnerLatch));
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    final JobID failedJobId = new JobID();
    final String failedJobName = "test";
    final CompletableFuture<Acknowledge> submitFuture = dispatcherGateway.submitFailedJob(failedJobId, failedJobName, new RuntimeException("Test exception."));
    submitFuture.get();
    final ArchivedExecutionGraph archivedExecutionGraph = dispatcherGateway.requestJob(failedJobId, TIMEOUT).get();
    Assertions.assertThat(archivedExecutionGraph.getJobID()).isEqualTo(failedJobId);
    Assertions.assertThat(archivedExecutionGraph.getJobName()).isEqualTo(failedJobName);
    Assertions.assertThat(archivedExecutionGraph.getState()).isEqualTo(JobStatus.FAILED);
    Assertions.assertThat(archivedExecutionGraph.getFailureInfo()).isNotNull().extracting(ErrorInfo::getException).extracting(e -> e.deserializeError(Thread.currentThread().getContextClassLoader())).satisfies(exception -> Assertions.assertThat(exception).isInstanceOf(RuntimeException.class).hasMessage("Test exception."));
}
Also used : Arrays(java.util.Arrays) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobManagerJobMetricGroupFactory(org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) DefaultJobMasterServiceProcessFactory(org.apache.flink.runtime.jobmaster.factories.DefaultJobMasterServiceProcessFactory) Duration(java.time.Duration) Is.is(org.hamcrest.core.Is.is) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) Path(java.nio.file.Path) BlockingQueue(java.util.concurrent.BlockingQueue) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) CheckpointMetadata(org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) JobMasterServiceLeadershipRunner(org.apache.flink.runtime.jobmaster.JobMasterServiceLeadershipRunner) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobStatus(org.apache.flink.api.common.JobStatus) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) Before(org.junit.Before) JobManagerRunnerResult(org.apache.flink.runtime.jobmaster.JobManagerRunnerResult) CheckpointStorageLocation(org.apache.flink.runtime.state.CheckpointStorageLocation) Files(java.nio.file.Files) JobMasterServiceProcessFactory(org.apache.flink.runtime.jobmaster.factories.JobMasterServiceProcessFactory) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) TestingJobMasterServiceFactory(org.apache.flink.runtime.jobmaster.factories.TestingJobMasterServiceFactory) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) Deadline(org.apache.flink.api.common.time.Deadline) NoSuchFileException(java.nio.file.NoSuchFileException) URISyntaxException(java.net.URISyntaxException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) Assert.assertThat(org.junit.Assert.assertThat) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) JobMasterService(org.apache.flink.runtime.jobmaster.JobMasterService) Assertions(org.assertj.core.api.Assertions) Checkpoints(org.apache.flink.runtime.checkpoint.Checkpoints) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) FlinkJobTerminatedWithoutCancellationException(org.apache.flink.runtime.messages.FlinkJobTerminatedWithoutCancellationException) UUID(java.util.UUID) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) CheckpointMetadataOutputStream(org.apache.flink.runtime.state.CheckpointMetadataOutputStream) TestingCleanupRunnerFactory(org.apache.flink.runtime.dispatcher.cleanup.TestingCleanupRunnerFactory) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JobManagerSharedServices(org.apache.flink.runtime.jobmaster.JobManagerSharedServices) Queue(java.util.Queue) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CheckpointStorageCoordinatorView(org.apache.flink.runtime.state.CheckpointStorageCoordinatorView) LeaderElectionService(org.apache.flink.runtime.leaderelection.LeaderElectionService) Assert.assertThrows(org.junit.Assert.assertThrows) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicReference(java.util.concurrent.atomic.AtomicReference) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) Nonnull(javax.annotation.Nonnull) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) Configuration(org.apache.flink.configuration.Configuration) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) TestingJobMasterService(org.apache.flink.runtime.jobmaster.TestingJobMasterService) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) TimeUnit(java.util.concurrent.TimeUnit) MultipleJobsDetails(org.apache.flink.runtime.messages.webmonitor.MultipleJobsDetails) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 18 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class DispatcherTest method testCacheJobExecutionResult.

/**
 * Test that {@link JobResult} is cached when the job finishes.
 */
@Test
public void testCacheJobExecutionResult() throws Exception {
    dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(jobId, createdJobManagerRunnerLatch));
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    final JobID failedJobId = new JobID();
    final JobStatus expectedState = JobStatus.FAILED;
    final ExecutionGraphInfo failedExecutionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setJobID(failedJobId).setState(expectedState).setFailureCause(new ErrorInfo(new RuntimeException("expected"), 1L)).build());
    dispatcher.completeJobExecution(failedExecutionGraphInfo);
    assertThat(dispatcherGateway.requestJobStatus(failedJobId, TIMEOUT).get(), equalTo(expectedState));
    assertThat(dispatcherGateway.requestExecutionGraphInfo(failedJobId, TIMEOUT).get(), equalTo(failedExecutionGraphInfo));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class JobExceptionsHandlerTest method createArchivedExecutionJobVertex.

private static ArchivedExecutionJobVertex createArchivedExecutionJobVertex(JobVertexID jobVertexID) {
    final StringifiedAccumulatorResult[] emptyAccumulators = new StringifiedAccumulatorResult[0];
    final long[] timestamps = new long[ExecutionState.values().length];
    final ExecutionState expectedState = ExecutionState.RUNNING;
    final LocalTaskManagerLocation assignedResourceLocation = new LocalTaskManagerLocation();
    final AllocationID allocationID = new AllocationID();
    final int subtaskIndex = 1;
    final int attempt = 2;
    return new ArchivedExecutionJobVertex(new ArchivedExecutionVertex[] { new ArchivedExecutionVertex(subtaskIndex, "test task", new ArchivedExecution(new StringifiedAccumulatorResult[0], null, new ExecutionAttemptID(), attempt, expectedState, new ErrorInfo(new RuntimeException("error"), System.currentTimeMillis()), assignedResourceLocation, allocationID, subtaskIndex, timestamps), new EvictingBoundedList<>(0)) }, jobVertexID, jobVertexID.toString(), 1, 1, ResourceProfile.UNKNOWN, emptyAccumulators);
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) StringifiedAccumulatorResult(org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult) ArchivedExecution(org.apache.flink.runtime.executiongraph.ArchivedExecution) EvictingBoundedList(org.apache.flink.runtime.util.EvictingBoundedList) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)

Example 20 with ErrorInfo

use of org.apache.flink.runtime.executiongraph.ErrorInfo in project flink by apache.

the class JobExceptionsHandlerTest method createExecutionGraphInfo.

// -------- exception history related utility methods for creating the input data --------
private static ExecutionGraphInfo createExecutionGraphInfo(RootExceptionHistoryEntry... historyEntries) {
    final ArchivedExecutionGraphBuilder executionGraphBuilder = new ArchivedExecutionGraphBuilder();
    final List<RootExceptionHistoryEntry> historyEntryCollection = new ArrayList<>();
    for (int i = 0; i < historyEntries.length; i++) {
        if (i == 0) {
            // first entry is root cause
            executionGraphBuilder.setFailureCause(new ErrorInfo(historyEntries[i].getException(), historyEntries[i].getTimestamp()));
        }
        historyEntryCollection.add(historyEntries[i]);
    }
    // we have to reverse it to simulate how the Scheduler collects it
    Collections.reverse(historyEntryCollection);
    return new ExecutionGraphInfo(executionGraphBuilder.build(), historyEntryCollection);
}
Also used : RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArrayList(java.util.ArrayList) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)

Aggregations

ErrorInfo (org.apache.flink.runtime.executiongraph.ErrorInfo)27 Test (org.junit.Test)17 JobID (org.apache.flink.api.common.JobID)9 TaskExecutionStateTransition (org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition)8 TestingAccessExecution (org.apache.flink.runtime.scheduler.exceptionhistory.TestingAccessExecution)8 ArchivedExecutionGraphBuilder (org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)7 FlinkException (org.apache.flink.util.FlinkException)7 JobStatus (org.apache.flink.api.common.JobStatus)5 CompletableFuture (java.util.concurrent.CompletableFuture)4 ArchivedExecutionGraph (org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph)4 ExecutionGraphInfo (org.apache.flink.runtime.scheduler.ExecutionGraphInfo)4 RootExceptionHistoryEntry (org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry)4 Duration (java.time.Duration)3 Deadline (org.apache.flink.api.common.time.Deadline)3 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)3 ArchivedExecutionVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex)3 LocalTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)3 File (java.io.File)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2