Search in sources :

Example 16 with ExecutionGraphInfo

use of org.apache.flink.runtime.scheduler.ExecutionGraphInfo in project flink by apache.

the class DefaultExecutionGraphCacheTest method testImmediateCacheInvalidationAfterFailure.

/**
 * Tests that a failure in requesting an AccessExecutionGraph from the gateway, will not create
 * a cache entry --> another cache request will trigger a new gateway request.
 */
@Test
public void testImmediateCacheInvalidationAfterFailure() throws Exception {
    final Time timeout = Time.milliseconds(100L);
    final Time timeToLive = Time.hours(1L);
    // let's first answer with a JobNotFoundException and then only with the correct result
    final CountingRestfulGateway restfulGateway = createCountingRestfulGateway(expectedJobId, FutureUtils.completedExceptionally(new FlinkJobNotFoundException(expectedJobId)), CompletableFuture.completedFuture(expectedExecutionGraphInfo));
    try (ExecutionGraphCache executionGraphCache = new DefaultExecutionGraphCache(timeout, timeToLive)) {
        CompletableFuture<ExecutionGraphInfo> executionGraphFuture = executionGraphCache.getExecutionGraphInfo(expectedJobId, restfulGateway);
        try {
            executionGraphFuture.get();
            fail("The execution graph future should have been completed exceptionally.");
        } catch (ExecutionException ee) {
            ee.printStackTrace();
            assertTrue(ee.getCause() instanceof FlinkException);
        }
        CompletableFuture<ExecutionGraphInfo> executionGraphFuture2 = executionGraphCache.getExecutionGraphInfo(expectedJobId, restfulGateway);
        assertEquals(expectedExecutionGraphInfo, executionGraphFuture2.get());
    }
}
Also used : ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Time(org.apache.flink.api.common.time.Time) ExecutionException(java.util.concurrent.ExecutionException) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Example 17 with ExecutionGraphInfo

use of org.apache.flink.runtime.scheduler.ExecutionGraphInfo in project flink by apache.

the class DefaultJobMasterServiceProcessTest method testSuccessOnTerminalState.

@Test
public void testSuccessOnTerminalState() throws Exception {
    final CompletableFuture<JobMasterService> jobMasterServiceFuture = new CompletableFuture<>();
    DefaultJobMasterServiceProcess serviceProcess = createTestInstance(jobMasterServiceFuture);
    jobMasterServiceFuture.complete(new TestingJobMasterService());
    ArchivedExecutionGraph archivedExecutionGraph = new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).build();
    serviceProcess.jobReachedGloballyTerminalState(new ExecutionGraphInfo(archivedExecutionGraph));
    assertThat(serviceProcess.getResultFuture()).isCompletedWithValueMatching(JobManagerRunnerResult::isSuccess).isCompletedWithValueMatching(r -> r.getExecutionGraphInfo().getArchivedExecutionGraph().getState() == JobStatus.FINISHED);
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) Test(org.junit.jupiter.api.Test)

Example 18 with ExecutionGraphInfo

use of org.apache.flink.runtime.scheduler.ExecutionGraphInfo in project flink by apache.

the class JobMaster method jobStatusChanged.

private void jobStatusChanged(final JobStatus newJobStatus) {
    validateRunsInMainThread();
    if (newJobStatus.isGloballyTerminalState()) {
        runAsync(() -> {
            Collection<ResultPartitionID> allTracked = partitionTracker.getAllTrackedPartitions().stream().map(d -> d.getShuffleDescriptor().getResultPartitionID()).collect(Collectors.toList());
            if (newJobStatus == JobStatus.FINISHED) {
                partitionTracker.stopTrackingAndReleaseOrPromotePartitions(allTracked);
            } else {
                partitionTracker.stopTrackingAndReleasePartitions(allTracked);
            }
        });
        final ExecutionGraphInfo executionGraphInfo = schedulerNG.requestJob();
        futureExecutor.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(executionGraphInfo));
    }
}
Also used : ShuffleMaster(org.apache.flink.runtime.shuffle.ShuffleMaster) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) RpcServiceUtils(org.apache.flink.runtime.rpc.RpcServiceUtils) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) HeartbeatListener(org.apache.flink.runtime.heartbeat.HeartbeatListener) JobManagerJobMetricGroupFactory(org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) HeartbeatManager(org.apache.flink.runtime.heartbeat.HeartbeatManager) InstantiationUtil(org.apache.flink.util.InstantiationUtil) JobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.JobMasterPartitionTracker) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) Map(java.util.Map) PermanentlyFencedRpcEndpoint(org.apache.flink.runtime.rpc.PermanentlyFencedRpcEndpoint) JobShuffleContext(org.apache.flink.runtime.shuffle.JobShuffleContext) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) BlobWriter(org.apache.flink.runtime.blob.BlobWriter) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) JobManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup) HeartbeatReceiver(org.apache.flink.runtime.heartbeat.HeartbeatReceiver) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) RegisteredRpcConnection(org.apache.flink.runtime.registration.RegisteredRpcConnection) UUID(java.util.UUID) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Objects(java.util.Objects) RetryingRegistration(org.apache.flink.runtime.registration.RetryingRegistration) SerializedValue(org.apache.flink.util.SerializedValue) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) ResolutionMode(org.apache.flink.runtime.taskmanager.TaskManagerLocation.ResolutionMode) FlinkException(org.apache.flink.util.FlinkException) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) OnCompletionActions(org.apache.flink.runtime.jobmanager.OnCompletionActions) JobShuffleContextImpl(org.apache.flink.runtime.shuffle.JobShuffleContextImpl) HeartbeatSender(org.apache.flink.runtime.heartbeat.HeartbeatSender) CoordinationResponse(org.apache.flink.runtime.operators.coordination.CoordinationResponse) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) RpcService(org.apache.flink.runtime.rpc.RpcService) SchedulerNG(org.apache.flink.runtime.scheduler.SchedulerNG) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) Executor(java.util.concurrent.Executor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) IOException(java.io.IOException) AccumulatorSnapshot(org.apache.flink.runtime.accumulators.AccumulatorSnapshot) JobStatusListener(org.apache.flink.runtime.executiongraph.JobStatusListener) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) NoOpHeartbeatManager(org.apache.flink.runtime.heartbeat.NoOpHeartbeatManager) JobID(org.apache.flink.api.common.JobID) TaskStateSnapshot.deserializeTaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot.deserializeTaskStateSnapshot) PartitionTrackerFactory(org.apache.flink.runtime.io.network.partition.PartitionTrackerFactory) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) KvStateID(org.apache.flink.queryablestate.KvStateID) OperatorEvent(org.apache.flink.runtime.operators.coordination.OperatorEvent) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID)

Example 19 with ExecutionGraphInfo

use of org.apache.flink.runtime.scheduler.ExecutionGraphInfo in project flink by apache.

the class DispatcherTest method testCacheJobExecutionResult.

/**
 * Test that {@link JobResult} is cached when the job finishes.
 */
@Test
public void testCacheJobExecutionResult() throws Exception {
    dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(jobId, createdJobManagerRunnerLatch));
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    final JobID failedJobId = new JobID();
    final JobStatus expectedState = JobStatus.FAILED;
    final ExecutionGraphInfo failedExecutionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setJobID(failedJobId).setState(expectedState).setFailureCause(new ErrorInfo(new RuntimeException("expected"), 1L)).build());
    dispatcher.completeJobExecution(failedExecutionGraphInfo);
    assertThat(dispatcherGateway.requestJobStatus(failedJobId, TIMEOUT).get(), equalTo(expectedState));
    assertThat(dispatcherGateway.requestExecutionGraphInfo(failedJobId, TIMEOUT).get(), equalTo(failedExecutionGraphInfo));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 20 with ExecutionGraphInfo

use of org.apache.flink.runtime.scheduler.ExecutionGraphInfo in project flink by apache.

the class DispatcherTest method testFatalErrorIfRecoveredJobsCannotBeStarted.

/**
 * Tests that the {@link Dispatcher} fails fatally if the recovered jobs cannot be started. See
 * FLINK-9097.
 */
@Test
public void testFatalErrorIfRecoveredJobsCannotBeStarted() throws Exception {
    final FlinkException testException = new FlinkException("Test exception");
    jobMasterLeaderElectionService.isLeader(UUID.randomUUID());
    final TestingJobMasterServiceLeadershipRunnerFactory jobManagerRunnerFactory = new TestingJobMasterServiceLeadershipRunnerFactory();
    dispatcher = createTestingDispatcherBuilder().setJobManagerRunnerFactory(jobManagerRunnerFactory).setRecoveredJobs(Collections.singleton(JobGraphTestUtils.emptyJobGraph())).build();
    dispatcher.start();
    final TestingFatalErrorHandler fatalErrorHandler = testingFatalErrorHandlerResource.getFatalErrorHandler();
    final TestingJobManagerRunner testingJobManagerRunner = jobManagerRunnerFactory.takeCreatedJobManagerRunner();
    // Let the initialization of the JobManagerRunner fail
    testingJobManagerRunner.completeResultFuture(JobManagerRunnerResult.forInitializationFailure(new ExecutionGraphInfo(ArchivedExecutionGraph.createSparseArchivedExecutionGraph(jobId, jobGraph.getName(), JobStatus.FAILED, testException, jobGraph.getCheckpointingSettings(), 1L)), testException));
    final Throwable error = fatalErrorHandler.getErrorFuture().get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS);
    assertThat(ExceptionUtils.findThrowableWithMessage(error, testException.getMessage()).isPresent(), is(true));
    fatalErrorHandler.clearError();
}
Also used : TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Aggregations

ExecutionGraphInfo (org.apache.flink.runtime.scheduler.ExecutionGraphInfo)45 Test (org.junit.Test)33 ArchivedExecutionGraphBuilder (org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder)23 Time (org.apache.flink.api.common.time.Time)12 CompletableFuture (java.util.concurrent.CompletableFuture)11 JobID (org.apache.flink.api.common.JobID)11 JobStatus (org.apache.flink.api.common.JobStatus)9 FlinkException (org.apache.flink.util.FlinkException)8 File (java.io.File)7 TestingJobManagerRunner (org.apache.flink.runtime.jobmaster.TestingJobManagerRunner)7 ArrayList (java.util.ArrayList)6 ArchivedExecutionGraph (org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph)6 FlinkJobNotFoundException (org.apache.flink.runtime.messages.FlinkJobNotFoundException)6 EmptyRequestBody (org.apache.flink.runtime.rest.messages.EmptyRequestBody)6 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobManagerRunner (org.apache.flink.runtime.jobmaster.JobManagerRunner)5 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)5 JobExceptionsInfoWithHistory (org.apache.flink.runtime.rest.messages.JobExceptionsInfoWithHistory)5 RootExceptionHistoryEntry (org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry)5 ExceptionUtils (org.apache.flink.util.ExceptionUtils)5