Search in sources :

Example 36 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.

/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
 * information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
 * for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
    final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
    final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
        try {
            if (hasReceivedSlotOffers.isTriggered()) {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
            } else {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
            }
        } catch (AssertionError e) {
            assertionFuture.completeExceptionally(e);
        }
        if (terminateHeartbeatVerification.get()) {
            assertionFuture.complete(null);
        }
        return FutureUtils.completedVoidFuture();
    }).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
    final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
        final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
        assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
        terminateHeartbeatVerification.set(true);
        // make sure that no assertion has been violated
        assertionFuture.get();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
        jobManagerSharedServices.shutdown();
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) Collection(java.util.Collection) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Test(org.junit.Test)

Example 37 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ActiveResourceManagerTest method testStartNewWorker.

/**
 * Tests worker successfully requested, started and registered.
 */
@Test
public void testStartNewWorker() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final CompletableFuture<TaskExecutorProcessSpec> requestWorkerFromDriverFuture = new CompletableFuture<>();
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                requestWorkerFromDriverFuture.complete(taskExecutorProcessSpec);
                return CompletableFuture.completedFuture(tmResourceId);
            });
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                TaskExecutorProcessSpec taskExecutorProcessSpec = requestWorkerFromDriverFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                assertThat(taskExecutorProcessSpec, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
                // worker registered, verify registration succeeded
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceId);
                assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
            });
        }
    };
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 38 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ActiveResourceManagerTest method testStartNewWorkerFailedRequesting.

/**
 * Tests worker failed while requesting.
 */
@Test
public void testStartNewWorkerFailedRequesting() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final AtomicInteger requestCount = new AtomicInteger(0);
            final List<CompletableFuture<ResourceID>> resourceIdFutures = new ArrayList<>();
            resourceIdFutures.add(new CompletableFuture<>());
            resourceIdFutures.add(new CompletableFuture<>());
            final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                int idx = requestCount.getAndIncrement();
                assertThat(idx, lessThan(2));
                requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
                return resourceIdFutures.get(idx);
            });
            slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
                // first request failed, verify requesting another worker from driver
                runInMainThread(() -> resourceIdFutures.get(0).completeExceptionally(new Throwable("testing error")));
                TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
                // second request allocated, verify registration succeed
                runInMainThread(() -> resourceIdFutures.get(1).complete(tmResourceId));
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceId);
                assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
            });
        }
    };
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 39 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ActiveResourceManagerTest method testWorkerTerminatedAfterRegister.

/**
 * Tests worker terminated after registered.
 */
@Test
public void testWorkerTerminatedAfterRegister() throws Exception {
    new Context() {

        {
            final AtomicInteger requestCount = new AtomicInteger(0);
            final List<ResourceID> tmResourceIds = new ArrayList<>();
            tmResourceIds.add(ResourceID.generate());
            tmResourceIds.add(ResourceID.generate());
            final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                int idx = requestCount.getAndIncrement();
                assertThat(idx, lessThan(2));
                requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
                return CompletableFuture.completedFuture(tmResourceIds.get(idx));
            });
            slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
                // first worker registered, verify registration succeed
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture1 = registerTaskExecutor(tmResourceIds.get(0));
                assertThat(registerTaskExecutorFuture1.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
                // first worker terminated, verify requesting another worker from driver
                runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
                TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
                // second worker registered, verify registration succeed
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture2 = registerTaskExecutor(tmResourceIds.get(1));
                assertThat(registerTaskExecutorFuture2.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
            });
        }
    };
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 40 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ActiveResourceManagerTest method testStartWorkerIntervalOnWorkerTerminationExceedFailureRate.

@Test
public void testStartWorkerIntervalOnWorkerTerminationExceedFailureRate() throws Exception {
    new Context() {

        {
            flinkConfig.setDouble(ResourceManagerOptions.START_WORKER_MAX_FAILURE_RATE, 1);
            flinkConfig.set(ResourceManagerOptions.START_WORKER_RETRY_INTERVAL, Duration.ofMillis(TESTING_START_WORKER_INTERVAL.toMilliseconds()));
            final AtomicInteger requestCount = new AtomicInteger(0);
            final List<ResourceID> tmResourceIds = new ArrayList<>();
            tmResourceIds.add(ResourceID.generate());
            tmResourceIds.add(ResourceID.generate());
            final List<CompletableFuture<Long>> requestWorkerFromDriverFutures = new ArrayList<>();
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                int idx = requestCount.getAndIncrement();
                assertThat(idx, lessThan(2));
                requestWorkerFromDriverFutures.get(idx).complete(System.currentTimeMillis());
                return CompletableFuture.completedFuture(tmResourceIds.get(idx));
            });
            slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                long t1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                // first worker failed before register, verify requesting another worker
                // from driver
                runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
                long t2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                // validate trying creating worker twice, with proper interval
                assertThat((t2 - t1), greaterThanOrEqualTo(TESTING_START_WORKER_INTERVAL.toMilliseconds()));
                // second worker registered, verify registration succeed
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceIds.get(1));
                assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
            });
        }
    };
}
Also used : ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Aggregations

RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)40 Test (org.junit.Test)35 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)23 CompletableFuture (java.util.concurrent.CompletableFuture)18 UUID (java.util.UUID)14 JobID (org.apache.flink.api.common.JobID)14 ArrayList (java.util.ArrayList)12 FlinkException (org.apache.flink.util.FlinkException)11 Time (org.apache.flink.api.common.time.Time)10 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)10 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)10 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)9 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)9 ExecutionException (java.util.concurrent.ExecutionException)8 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)8 UnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation)8 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)8 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)7 TimeoutException (java.util.concurrent.TimeoutException)7 Configuration (org.apache.flink.configuration.Configuration)7