Search in sources :

Example 11 with UnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.

the class TaskExecutorTest method createTaskExecutor.

private TaskExecutor createTaskExecutor(int numberOFSlots) throws IOException {
    final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(numberOFSlots);
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).build();
    configuration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, numberOFSlots);
    return createTaskExecutor(taskManagerServices);
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)

Example 12 with UnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.

the class TaskExecutorTest method runJobManagerHeartbeatTest.

private void runJobManagerHeartbeatTest(ResourceID jmResourceId, HeartbeatServices heartbeatServices, Consumer<TestingJobMasterGatewayBuilder> jobMasterGatewayBuilderConsumer, TriConsumer<ResourceID, TaskExecutorGateway, AllocationID> heartbeatAction) throws IOException, InterruptedException, ExecutionException, TimeoutException {
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
    final String jobMasterAddress = "jm";
    final UUID jmLeaderId = UUID.randomUUID();
    final CountDownLatch registrationAttempts = new CountDownLatch(2);
    final OneShotLatch slotOfferedLatch = new OneShotLatch();
    final CompletableFuture<ResourceID> disconnectTaskManagerFuture = new CompletableFuture<>();
    final TestingJobMasterGatewayBuilder testingJobMasterGatewayBuilder = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> {
        registrationAttempts.countDown();
        return CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jmResourceId));
    }).setDisconnectTaskManagerFunction(resourceID -> {
        disconnectTaskManagerFuture.complete(resourceID);
        return CompletableFuture.completedFuture(Acknowledge.get());
    }).setOfferSlotsFunction((resourceID, slotOffers) -> {
        slotOfferedLatch.trigger();
        return CompletableFuture.completedFuture(slotOffers);
    });
    jobMasterGatewayBuilderConsumer.accept(testingJobMasterGatewayBuilder);
    final TestingJobMasterGateway jobMasterGateway = testingJobMasterGatewayBuilder.build();
    final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
    final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices, heartbeatServices);
    final OneShotLatch slotReportReceived = new OneShotLatch();
    final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
    testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
        slotReportReceived.trigger();
        return CompletableFuture.completedFuture(Acknowledge.get());
    });
    final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>();
    registrationResponses.add(CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))));
    registrationResponses.add(new CompletableFuture<>());
    testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> registrationResponses.poll());
    rpc.registerGateway(jobMasterAddress, jobMasterGateway);
    rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
    try {
        taskManager.start();
        taskManager.waitUntilStarted();
        final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
        resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
        slotReportReceived.await();
        final AllocationID allocationId = new AllocationID();
        requestSlot(taskExecutorGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterAddress, testingResourceManagerGateway.getFencingToken());
        // now inform the task manager about the new job leader
        jobManagerLeaderRetriever.notifyListener(jobMasterAddress, jmLeaderId);
        // register task manager success will trigger monitoring heartbeat target between tm and
        // jm
        slotOfferedLatch.await();
        heartbeatAction.accept(unresolvedTaskManagerLocation.getResourceID(), taskExecutorGateway, allocationId);
        // the timeout should trigger disconnecting from the JobManager
        final ResourceID resourceID = disconnectTaskManagerFuture.get();
        assertThat(resourceID, equalTo(unresolvedTaskManagerLocation.getResourceID()));
        assertTrue("The TaskExecutor should try to reconnect to the JM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
    } finally {
        RpcUtils.terminateRpcEndpoint(taskManager, timeout);
    }
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) InstanceID(org.apache.flink.runtime.instance.InstanceID) Matchers.containsString(org.hamcrest.Matchers.containsString) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) UUID(java.util.UUID) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ArrayDeque(java.util.ArrayDeque) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)

Example 13 with UnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.

the class TaskManagerServices method fromConfiguration.

// --------------------------------------------------------------------------------------------
// Static factory methods for task manager services
// --------------------------------------------------------------------------------------------
/**
 * Creates and returns the task manager services.
 *
 * @param taskManagerServicesConfiguration task manager configuration
 * @param permanentBlobService permanentBlobService used by the services
 * @param taskManagerMetricGroup metric group of the task manager
 * @param ioExecutor executor for async IO operations
 * @param fatalErrorHandler to handle class loading OOMs
 * @param workingDirectory the working directory of the process
 * @return task manager components
 * @throws Exception
 */
public static TaskManagerServices fromConfiguration(TaskManagerServicesConfiguration taskManagerServicesConfiguration, PermanentBlobService permanentBlobService, MetricGroup taskManagerMetricGroup, ExecutorService ioExecutor, FatalErrorHandler fatalErrorHandler, WorkingDirectory workingDirectory) throws Exception {
    // pre-start checks
    checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths());
    final TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
    // start the I/O manager, it will create some temp directories.
    final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths());
    final ShuffleEnvironment<?, ?> shuffleEnvironment = createShuffleEnvironment(taskManagerServicesConfiguration, taskEventDispatcher, taskManagerMetricGroup, ioExecutor);
    final int listeningDataPort = shuffleEnvironment.start();
    final KvStateService kvStateService = KvStateService.fromConfiguration(taskManagerServicesConfiguration);
    kvStateService.start();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new UnresolvedTaskManagerLocation(taskManagerServicesConfiguration.getResourceID(), taskManagerServicesConfiguration.getExternalAddress(), // iff the external data port is not explicitly defined
    taskManagerServicesConfiguration.getExternalDataPort() > 0 ? taskManagerServicesConfiguration.getExternalDataPort() : listeningDataPort);
    final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager();
    final TaskSlotTable<Task> taskSlotTable = createTaskSlotTable(taskManagerServicesConfiguration.getNumberOfSlots(), taskManagerServicesConfiguration.getTaskExecutorResourceSpec(), taskManagerServicesConfiguration.getTimerServiceShutdownTimeout(), taskManagerServicesConfiguration.getPageSize(), ioExecutor);
    final JobTable jobTable = DefaultJobTable.create();
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getRetryingRegistrationConfiguration());
    final TaskExecutorLocalStateStoresManager taskStateManager = new TaskExecutorLocalStateStoresManager(taskManagerServicesConfiguration.isLocalRecoveryEnabled(), taskManagerServicesConfiguration.getLocalRecoveryStateDirectories(), ioExecutor);
    final TaskExecutorStateChangelogStoragesManager changelogStoragesManager = new TaskExecutorStateChangelogStoragesManager();
    final boolean failOnJvmMetaspaceOomError = taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.FAIL_ON_USER_CLASS_LOADING_METASPACE_OOM);
    final boolean checkClassLoaderLeak = taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.CHECK_LEAKED_CLASSLOADER);
    final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(permanentBlobService, BlobLibraryCacheManager.defaultClassLoaderFactory(taskManagerServicesConfiguration.getClassLoaderResolveOrder(), taskManagerServicesConfiguration.getAlwaysParentFirstLoaderPatterns(), failOnJvmMetaspaceOomError ? fatalErrorHandler : null, checkClassLoaderLeak));
    final SlotAllocationSnapshotPersistenceService slotAllocationSnapshotPersistenceService;
    if (taskManagerServicesConfiguration.isLocalRecoveryEnabled()) {
        slotAllocationSnapshotPersistenceService = new FileSlotAllocationSnapshotPersistenceService(workingDirectory.getSlotAllocationSnapshotDirectory());
    } else {
        slotAllocationSnapshotPersistenceService = NoOpSlotAllocationSnapshotPersistenceService.INSTANCE;
    }
    return new TaskManagerServices(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getManagedMemorySize().getBytes(), ioManager, shuffleEnvironment, kvStateService, broadcastVariableManager, taskSlotTable, jobTable, jobLeaderService, taskStateManager, changelogStoragesManager, taskEventDispatcher, ioExecutor, libraryCacheManager, slotAllocationSnapshotPersistenceService);
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Task(org.apache.flink.runtime.taskmanager.Task) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) NoOpSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.NoOpSlotAllocationSnapshotPersistenceService) SlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.SlotAllocationSnapshotPersistenceService) FileSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.FileSlotAllocationSnapshotPersistenceService) FileSlotAllocationSnapshotPersistenceService(org.apache.flink.runtime.taskexecutor.slot.FileSlotAllocationSnapshotPersistenceService) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager)

Example 14 with UnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.

the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.

/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
 * information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
 * for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
    final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
    final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
        try {
            if (hasReceivedSlotOffers.isTriggered()) {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
            } else {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
            }
        } catch (AssertionError e) {
            assertionFuture.completeExceptionally(e);
        }
        if (terminateHeartbeatVerification.get()) {
            assertionFuture.complete(null);
        }
        return FutureUtils.completedVoidFuture();
    }).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
    final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
        final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
        assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
        terminateHeartbeatVerification.set(true);
        // make sure that no assertion has been violated
        assertionFuture.get();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
        jobManagerSharedServices.shutdown();
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) Collection(java.util.Collection) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Test(org.junit.Test)

Aggregations

UnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation)14 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)13 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)11 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)11 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)11 FlinkException (org.apache.flink.util.FlinkException)11 Test (org.junit.Test)11 Collection (java.util.Collection)10 CompletableFuture (java.util.concurrent.CompletableFuture)10 CountDownLatch (java.util.concurrent.CountDownLatch)10 ExecutionException (java.util.concurrent.ExecutionException)10 Collectors (java.util.stream.Collectors)10 JobID (org.apache.flink.api.common.JobID)10 Time (org.apache.flink.api.common.time.Time)10 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)10 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)10 File (java.io.File)9 IOException (java.io.IOException)9 ArrayDeque (java.util.ArrayDeque)9