Search in sources :

Example 31 with TaskExecutorGateway

use of org.apache.flink.runtime.taskexecutor.TaskExecutorGateway in project flink by apache.

the class ResourceManagerTest method testTaskExecutorBecomesUnreachableTriggersDisconnect.

@Test
public void testTaskExecutorBecomesUnreachableTriggersDisconnect() throws Exception {
    final ResourceID taskExecutorId = ResourceID.generate();
    final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>();
    final CompletableFuture<ResourceID> stopWorkerFuture = new CompletableFuture<>();
    final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setAddress(UUID.randomUUID().toString()).setDisconnectResourceManagerConsumer(disconnectFuture::complete).setHeartbeatResourceManagerFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    runHeartbeatTargetBecomesUnreachableTest(builder -> builder.withStopWorkerFunction((worker) -> {
        stopWorkerFuture.complete(worker);
        return true;
    }), resourceManagerGateway -> registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress()), resourceManagerResourceId -> {
        assertThat(disconnectFuture.get(), instanceOf(ResourceManagerException.class));
        assertThat(stopWorkerFuture.get(), is(taskExecutorId));
    });
}
Also used : RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Matchers.nullValue(org.hamcrest.Matchers.nullValue) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) AfterClass(org.junit.AfterClass) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) Matchers.anyOf(org.hamcrest.Matchers.anyOf) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BeforeClass(org.junit.BeforeClass) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) Function(java.util.function.Function) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) DeclarativeSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.DeclarativeSlotManagerBuilder) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Before(org.junit.Before) Matchers.empty(org.hamcrest.Matchers.empty) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Test(org.junit.Test) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) TimeoutException(java.util.concurrent.TimeoutException) FlinkException(org.apache.flink.util.FlinkException) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) Test(org.junit.Test)

Example 32 with TaskExecutorGateway

use of org.apache.flink.runtime.taskexecutor.TaskExecutorGateway in project flink by apache.

the class DeclarativeSlotManager method allocateSlot.

/**
 * Allocates the given slot. This entails sending a registration message to the task manager and
 * treating failures.
 *
 * @param taskManagerSlot slot to allocate
 * @param jobId job for which the slot should be allocated for
 * @param targetAddress address of the job master
 * @param resourceProfile resource profile for the requirement for which the slot is used
 */
private void allocateSlot(TaskManagerSlotInformation taskManagerSlot, JobID jobId, String targetAddress, ResourceProfile resourceProfile) {
    final SlotID slotId = taskManagerSlot.getSlotId();
    LOG.debug("Starting allocation of slot {} for job {} with resource profile {}.", slotId, jobId, resourceProfile);
    final InstanceID instanceId = taskManagerSlot.getInstanceId();
    if (!taskExecutorManager.isTaskManagerRegistered(instanceId)) {
        throw new IllegalStateException("Could not find a registered task manager for instance id " + instanceId + '.');
    }
    final TaskExecutorConnection taskExecutorConnection = taskManagerSlot.getTaskManagerConnection();
    final TaskExecutorGateway gateway = taskExecutorConnection.getTaskExecutorGateway();
    final AllocationID allocationId = new AllocationID();
    slotTracker.notifyAllocationStart(slotId, jobId);
    taskExecutorManager.markUsed(instanceId);
    pendingSlotAllocations.put(slotId, allocationId);
    // RPC call to the task manager
    CompletableFuture<Acknowledge> requestFuture = gateway.requestSlot(slotId, jobId, allocationId, resourceProfile, targetAddress, resourceManagerId, taskManagerRequestTimeout);
    CompletableFuture<Void> slotAllocationResponseProcessingFuture = requestFuture.handleAsync((Acknowledge acknowledge, Throwable throwable) -> {
        final AllocationID currentAllocationForSlot = pendingSlotAllocations.get(slotId);
        if (currentAllocationForSlot == null || !currentAllocationForSlot.equals(allocationId)) {
            LOG.debug("Ignoring slot allocation update from task executor {} for slot {} and job {}, because the allocation was already completed or cancelled.", instanceId, slotId, jobId);
            return null;
        }
        if (acknowledge != null) {
            LOG.trace("Completed allocation of slot {} for job {}.", slotId, jobId);
            slotTracker.notifyAllocationComplete(slotId, jobId);
        } else {
            if (throwable instanceof SlotOccupiedException) {
                SlotOccupiedException exception = (SlotOccupiedException) throwable;
                LOG.debug("Tried allocating slot {} for job {}, but it was already allocated for job {}.", slotId, jobId, exception.getJobId());
                // report as a slot status to force the state transition
                // this could be a problem if we ever assume that the task
                // executor always reports about all slots
                slotTracker.notifySlotStatus(Collections.singleton(new SlotStatus(slotId, taskManagerSlot.getResourceProfile(), exception.getJobId(), exception.getAllocationId())));
            } else {
                LOG.warn("Slot allocation for slot {} for job {} failed.", slotId, jobId, throwable);
                slotTracker.notifyFree(slotId);
            }
            checkResourceRequirements();
        }
        return null;
    }, mainThreadExecutor);
    FutureUtils.assertNoException(slotAllocationResponseProcessingFuture);
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)

Example 33 with TaskExecutorGateway

use of org.apache.flink.runtime.taskexecutor.TaskExecutorGateway in project flink by apache.

the class ResourceManager method requestTaskManagerMetricQueryServiceAddresses.

@Override
public CompletableFuture<Collection<Tuple2<ResourceID, String>>> requestTaskManagerMetricQueryServiceAddresses(Time timeout) {
    final ArrayList<CompletableFuture<Optional<Tuple2<ResourceID, String>>>> metricQueryServiceAddressFutures = new ArrayList<>(taskExecutors.size());
    for (Map.Entry<ResourceID, WorkerRegistration<WorkerType>> workerRegistrationEntry : taskExecutors.entrySet()) {
        final ResourceID tmResourceId = workerRegistrationEntry.getKey();
        final WorkerRegistration<WorkerType> workerRegistration = workerRegistrationEntry.getValue();
        final TaskExecutorGateway taskExecutorGateway = workerRegistration.getTaskExecutorGateway();
        final CompletableFuture<Optional<Tuple2<ResourceID, String>>> metricQueryServiceAddressFuture = taskExecutorGateway.requestMetricQueryServiceAddress(timeout).thenApply(o -> o.toOptional().map(address -> Tuple2.of(tmResourceId, address)));
        metricQueryServiceAddressFutures.add(metricQueryServiceAddressFuture);
    }
    return FutureUtils.combineAll(metricQueryServiceAddressFutures).thenApply(collection -> collection.stream().filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList()));
}
Also used : TaskExecutorRegistrationRejection(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationRejection) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) RpcServiceUtils(org.apache.flink.runtime.rpc.RpcServiceUtils) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) LogInfo(org.apache.flink.runtime.rest.messages.LogInfo) HeartbeatListener(org.apache.flink.runtime.heartbeat.HeartbeatListener) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) HeartbeatManager(org.apache.flink.runtime.heartbeat.HeartbeatManager) Map(java.util.Map) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) FileType(org.apache.flink.runtime.taskexecutor.FileType) JobMasterRegistrationSuccess(org.apache.flink.runtime.jobmaster.JobMasterRegistrationSuccess) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) Collection(java.util.Collection) CompletionException(java.util.concurrent.CompletionException) ResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTracker) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Objects(java.util.Objects) MetricNames(org.apache.flink.runtime.metrics.MetricNames) Optional(java.util.Optional) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JobManagerRegistration(org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration) FlinkException(org.apache.flink.util.FlinkException) ResourceIDRetrievable(org.apache.flink.runtime.clusterframework.types.ResourceIDRetrievable) HeartbeatSender(org.apache.flink.runtime.heartbeat.HeartbeatSender) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) ArrayList(java.util.ArrayList) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerMetricGroup(org.apache.flink.runtime.metrics.groups.ResourceManagerMetricGroup) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobMaster(org.apache.flink.runtime.jobmaster.JobMaster) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) Nullable(javax.annotation.Nullable) FencedRpcEndpoint(org.apache.flink.runtime.rpc.FencedRpcEndpoint) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Executor(java.util.concurrent.Executor) UnknownTaskExecutorException(org.apache.flink.runtime.resourcemanager.exceptions.UnknownTaskExecutorException) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) TaskExecutorHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorHeartbeatPayload) TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) ThreadDumpInfo(org.apache.flink.runtime.rest.messages.ThreadDumpInfo) InstanceID(org.apache.flink.runtime.instance.InstanceID) ResourceActions(org.apache.flink.runtime.resourcemanager.slotmanager.ResourceActions) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ResourceManagerPartitionTrackerFactory(org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTrackerFactory) NoOpHeartbeatManager(org.apache.flink.runtime.heartbeat.NoOpHeartbeatManager) JobID(org.apache.flink.api.common.JobID) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) DataSetMetaInfo(org.apache.flink.runtime.io.network.partition.DataSetMetaInfo) Optional(java.util.Optional) ArrayList(java.util.ArrayList) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Tuple2(org.apache.flink.api.java.tuple.Tuple2) WorkerRegistration(org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration) Map(java.util.Map) HashMap(java.util.HashMap)

Example 34 with TaskExecutorGateway

use of org.apache.flink.runtime.taskexecutor.TaskExecutorGateway in project flink by apache.

the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.

/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
 * information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
 * for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
    final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
    final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
        try {
            if (hasReceivedSlotOffers.isTriggered()) {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
            } else {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
            }
        } catch (AssertionError e) {
            assertionFuture.completeExceptionally(e);
        }
        if (terminateHeartbeatVerification.get()) {
            assertionFuture.complete(null);
        }
        return FutureUtils.completedVoidFuture();
    }).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
    final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
        final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
        assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
        terminateHeartbeatVerification.set(true);
        // make sure that no assertion has been violated
        assertionFuture.get();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
        jobManagerSharedServices.shutdown();
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) Collection(java.util.Collection) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Test(org.junit.Test)

Example 35 with TaskExecutorGateway

use of org.apache.flink.runtime.taskexecutor.TaskExecutorGateway in project flink by apache.

the class AbstractFineGrainedSlotManagerITCase method testAllocationUpdatesIgnoredIfTaskExecutorUnregistered.

// ---------------------------------------------------------------------------------------------
// Allocation update
// ---------------------------------------------------------------------------------------------
/**
 * Verify that the ack of request slot form unregistered task manager will not cause system
 * breakdown.
 */
@Test
public void testAllocationUpdatesIgnoredIfTaskExecutorUnregistered() throws Exception {
    final CompletableFuture<Acknowledge> slotRequestFuture = new CompletableFuture<>();
    final CompletableFuture<Void> slotRequestCallFuture = new CompletableFuture<>();
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(ignored -> {
        slotRequestCallFuture.complete(null);
        return slotRequestFuture;
    }).createTestingTaskExecutorGateway();
    // The fatal error handler will exit the system if there is any exceptions in handling the
    // ack of request slot. We need the security manager to verify that would not happen.
    final SystemExitTrackingSecurityManager trackingSecurityManager = new SystemExitTrackingSecurityManager();
    System.setSecurityManager(trackingSecurityManager);
    final JobID jobId = new JobID();
    final ResourceID taskExecutorResourceId = ResourceID.generate();
    final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, taskExecutorGateway);
    final SlotReport slotReport = new SlotReport();
    new Context() {

        {
            runTest(() -> {
                runInMainThread(() -> {
                    getSlotManager().processResourceRequirements(createResourceRequirements(jobId, 1));
                    getSlotManager().registerTaskManager(taskExecutionConnection, slotReport, DEFAULT_TOTAL_RESOURCE_PROFILE, DEFAULT_SLOT_RESOURCE_PROFILE);
                });
                assertFutureCompleteAndReturn(slotRequestCallFuture);
                runInMainThread(() -> {
                    getSlotManager().unregisterTaskManager(taskExecutionConnection.getInstanceID(), TEST_EXCEPTION);
                    slotRequestFuture.complete(Acknowledge.get());
                });
                assertThat(trackingSecurityManager.getSystemExitFuture().isDone(), is(false));
            });
        }
    };
    System.setSecurityManager(null);
}
Also used : TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) CompletableFuture(java.util.concurrent.CompletableFuture) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) FunctionUtils(org.apache.flink.util.function.FunctionUtils) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) Matchers.empty(org.hamcrest.Matchers.empty) Iterator(java.util.Iterator) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) SystemExitTrackingSecurityManager(org.apache.flink.runtime.testutils.SystemExitTrackingSecurityManager) Test(org.junit.Test) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) Collections(java.util.Collections) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) Assert.assertEquals(org.junit.Assert.assertEquals) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SystemExitTrackingSecurityManager(org.apache.flink.runtime.testutils.SystemExitTrackingSecurityManager) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Test(org.junit.Test)

Aggregations

TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)45 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)36 Test (org.junit.Test)34 CompletableFuture (java.util.concurrent.CompletableFuture)29 JobID (org.apache.flink.api.common.JobID)29 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)29 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)27 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)27 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)25 ArrayList (java.util.ArrayList)22 Matchers.is (org.hamcrest.Matchers.is)22 TestingTaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway)21 Matchers.empty (org.hamcrest.Matchers.empty)21 Matchers.equalTo (org.hamcrest.Matchers.equalTo)21 Collections (java.util.Collections)20 List (java.util.List)20 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)20 TimeoutException (java.util.concurrent.TimeoutException)19 ResourceManagerId (org.apache.flink.runtime.resourcemanager.ResourceManagerId)19 Collection (java.util.Collection)18