Search in sources :

Example 1 with DefaultSchedulerFactory

use of org.apache.flink.runtime.scheduler.DefaultSchedulerFactory in project flink by apache.

the class DefaultSlotPoolServiceSchedulerFactory method fromConfiguration.

public static DefaultSlotPoolServiceSchedulerFactory fromConfiguration(Configuration configuration, JobType jobType) {
    final Time rpcTimeout = Time.fromDuration(configuration.get(AkkaOptions.ASK_TIMEOUT_DURATION));
    final Time slotIdleTimeout = Time.milliseconds(configuration.getLong(JobManagerOptions.SLOT_IDLE_TIMEOUT));
    final Time batchSlotTimeout = Time.milliseconds(configuration.getLong(JobManagerOptions.SLOT_REQUEST_TIMEOUT));
    final SlotPoolServiceFactory slotPoolServiceFactory;
    final SchedulerNGFactory schedulerNGFactory;
    JobManagerOptions.SchedulerType schedulerType = ClusterOptions.getSchedulerType(configuration);
    if (schedulerType == JobManagerOptions.SchedulerType.Adaptive && jobType == JobType.BATCH) {
        LOG.info("Adaptive Scheduler configured, but Batch job detected. Changing scheduler type to NG / DefaultScheduler.");
        // overwrite
        schedulerType = JobManagerOptions.SchedulerType.Ng;
    }
    switch(schedulerType) {
        case Ng:
            schedulerNGFactory = new DefaultSchedulerFactory();
            slotPoolServiceFactory = new DeclarativeSlotPoolBridgeServiceFactory(SystemClock.getInstance(), rpcTimeout, slotIdleTimeout, batchSlotTimeout, getRequestSlotMatchingStrategy(configuration, jobType));
            break;
        case Adaptive:
            schedulerNGFactory = getAdaptiveSchedulerFactoryFromConfiguration(configuration);
            slotPoolServiceFactory = new DeclarativeSlotPoolServiceFactory(SystemClock.getInstance(), slotIdleTimeout, rpcTimeout);
            break;
        case AdaptiveBatch:
            schedulerNGFactory = new AdaptiveBatchSchedulerFactory();
            slotPoolServiceFactory = new DeclarativeSlotPoolBridgeServiceFactory(SystemClock.getInstance(), rpcTimeout, slotIdleTimeout, batchSlotTimeout, getRequestSlotMatchingStrategy(configuration, jobType));
            break;
        default:
            throw new IllegalArgumentException(String.format("Illegal value [%s] for config option [%s]", schedulerType, JobManagerOptions.SCHEDULER.key()));
    }
    return new DefaultSlotPoolServiceSchedulerFactory(slotPoolServiceFactory, schedulerNGFactory);
}
Also used : SchedulerNGFactory(org.apache.flink.runtime.scheduler.SchedulerNGFactory) AdaptiveBatchSchedulerFactory(org.apache.flink.runtime.scheduler.adaptivebatch.AdaptiveBatchSchedulerFactory) DeclarativeSlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolServiceFactory) DeclarativeSlotPoolBridgeServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolBridgeServiceFactory) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) Time(org.apache.flink.api.common.time.Time) DeclarativeSlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPoolServiceFactory) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions)

Example 2 with DefaultSchedulerFactory

use of org.apache.flink.runtime.scheduler.DefaultSchedulerFactory in project flink by apache.

the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.

/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
 * information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
 * for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
    final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
    final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
    final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
        try {
            if (hasReceivedSlotOffers.isTriggered()) {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
            } else {
                assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
            }
        } catch (AssertionError e) {
            assertionFuture.completeExceptionally(e);
        }
        if (terminateHeartbeatVerification.get()) {
            assertionFuture.complete(null);
        }
        return FutureUtils.completedVoidFuture();
    }).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
    final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
        final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
        assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
        terminateHeartbeatVerification.set(true);
        // make sure that no assertion has been violated
        assertionFuture.get();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
        jobManagerSharedServices.shutdown();
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) Collection(java.util.Collection) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Test(org.junit.Test)

Aggregations

Time (org.apache.flink.api.common.time.Time)2 File (java.io.File)1 IOException (java.io.IOException)1 Duration (java.time.Duration)1 ArrayDeque (java.util.ArrayDeque)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Queue (java.util.Queue)1 UUID (java.util.UUID)1 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)1 BlockingQueue (java.util.concurrent.BlockingQueue)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 CountDownLatch (java.util.concurrent.CountDownLatch)1