use of org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider in project flink by apache.
the class AdaptiveBatchSchedulerFactory method createInstance.
@Override
public SchedulerNG createInstance(Logger log, JobGraph jobGraph, Executor ioExecutor, Configuration jobMasterConfiguration, SlotPoolService slotPoolService, ScheduledExecutorService futureExecutor, ClassLoader userCodeLoader, CheckpointRecoveryFactory checkpointRecoveryFactory, Time rpcTimeout, BlobWriter blobWriter, JobManagerJobMetricGroup jobManagerJobMetricGroup, Time slotRequestTimeout, ShuffleMaster<?> shuffleMaster, JobMasterPartitionTracker partitionTracker, ExecutionDeploymentTracker executionDeploymentTracker, long initializationTimestamp, ComponentMainThreadExecutor mainThreadExecutor, FatalErrorHandler fatalErrorHandler, JobStatusListener jobStatusListener) throws Exception {
checkState(jobGraph.getJobType() == JobType.BATCH, "Adaptive batch scheduler only supports batch jobs");
checkAllExchangesBlocking(jobGraph);
final SlotPool slotPool = slotPoolService.castInto(SlotPool.class).orElseThrow(() -> new IllegalStateException("The DefaultScheduler requires a SlotPool."));
final SlotSelectionStrategy slotSelectionStrategy = SlotSelectionStrategyUtils.selectSlotSelectionStrategy(JobType.BATCH, jobMasterConfiguration);
final PhysicalSlotRequestBulkChecker bulkChecker = PhysicalSlotRequestBulkCheckerImpl.createFromSlotPool(slotPool, SystemClock.getInstance());
final PhysicalSlotProvider physicalSlotProvider = new PhysicalSlotProviderImpl(slotSelectionStrategy, slotPool);
final ExecutionSlotAllocatorFactory allocatorFactory = new SlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider, false, bulkChecker, slotRequestTimeout);
final RestartBackoffTimeStrategy restartBackoffTimeStrategy = RestartBackoffTimeStrategyFactoryLoader.createRestartBackoffTimeStrategyFactory(jobGraph.getSerializedExecutionConfig().deserializeValue(userCodeLoader).getRestartStrategy(), jobMasterConfiguration, jobGraph.isCheckpointingEnabled()).create();
log.info("Using restart back off time strategy {} for {} ({}).", restartBackoffTimeStrategy, jobGraph.getName(), jobGraph.getJobID());
final ExecutionGraphFactory executionGraphFactory = new DefaultExecutionGraphFactory(jobMasterConfiguration, userCodeLoader, executionDeploymentTracker, futureExecutor, ioExecutor, rpcTimeout, jobManagerJobMetricGroup, blobWriter, shuffleMaster, partitionTracker, true);
return new AdaptiveBatchScheduler(log, jobGraph, ioExecutor, jobMasterConfiguration, bulkChecker::start, new ScheduledExecutorServiceAdapter(futureExecutor), userCodeLoader, new CheckpointsCleaner(), checkpointRecoveryFactory, jobManagerJobMetricGroup, new VertexwiseSchedulingStrategy.Factory(), FailoverStrategyFactoryLoader.loadFailoverStrategyFactory(jobMasterConfiguration), restartBackoffTimeStrategy, new DefaultExecutionVertexOperations(), new ExecutionVertexVersioner(), allocatorFactory, initializationTimestamp, mainThreadExecutor, jobStatusListener, executionGraphFactory, shuffleMaster, rpcTimeout, DefaultVertexParallelismDecider.from(jobMasterConfiguration), jobMasterConfiguration.getInteger(JobManagerOptions.ADAPTIVE_BATCH_SCHEDULER_MAX_PARALLELISM));
}
use of org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider in project flink by apache.
the class DefaultSchedulerTest method testStatusMetrics.
@Test
public void testStatusMetrics() throws Exception {
// running time acts as a stand-in for generic status time metrics
final CompletableFuture<Gauge<Long>> runningTimeMetricFuture = new CompletableFuture<>();
final MetricRegistry metricRegistry = TestingMetricRegistry.builder().setRegisterConsumer((metric, name, group) -> {
switch(name) {
case "runningTimeTotal":
runningTimeMetricFuture.complete((Gauge<Long>) metric);
break;
}
}).build();
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
final Configuration configuration = new Configuration();
configuration.set(MetricOptions.JOB_STATUS_METRICS, Arrays.asList(MetricOptions.JobStatusMetrics.TOTAL_TIME));
final ComponentMainThreadExecutor singleThreadMainThreadExecutor = ComponentMainThreadExecutorServiceAdapter.forSingleThreadExecutor(scheduledExecutorService);
final Time slotTimeout = Time.milliseconds(5L);
final SlotPool slotPool = new DeclarativeSlotPoolBridgeBuilder().setBatchSlotTimeout(slotTimeout).buildAndStart(singleThreadMainThreadExecutor);
final PhysicalSlotProvider slotProvider = new PhysicalSlotProviderImpl(LocationPreferenceSlotSelectionStrategy.createDefault(), slotPool);
final DefaultScheduler scheduler = createSchedulerBuilder(jobGraph, singleThreadMainThreadExecutor).setJobMasterConfiguration(configuration).setJobManagerJobMetricGroup(JobManagerMetricGroup.createJobManagerMetricGroup(metricRegistry, "localhost").addJob(new JobID(), "jobName")).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(slotProvider, slotTimeout)).build();
final AdaptiveSchedulerTest.SubmissionBufferingTaskManagerGateway taskManagerGateway = new AdaptiveSchedulerTest.SubmissionBufferingTaskManagerGateway(1);
taskManagerGateway.setCancelConsumer(executionAttemptId -> {
singleThreadMainThreadExecutor.execute(() -> scheduler.updateTaskExecutionState(new TaskExecutionState(executionAttemptId, ExecutionState.CANCELED)));
});
singleThreadMainThreadExecutor.execute(() -> {
scheduler.startScheduling();
offerSlots(slotPool, createSlotOffersForResourceRequirements(ResourceCounter.withResource(ResourceProfile.UNKNOWN, 1)), taskManagerGateway);
});
// wait for the first task submission
taskManagerGateway.waitForSubmissions(1, Duration.ofSeconds(5));
// sleep a bit to ensure uptime is > 0
Thread.sleep(10L);
final Gauge<Long> runningTimeGauge = runningTimeMetricFuture.get();
Assert.assertThat(runningTimeGauge.getValue(), greaterThan(0L));
}
use of org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider in project flink by apache.
the class ExecutionGraphRestartTest method createExecutionSlotAllocatorFactory.
private static ExecutionSlotAllocatorFactory createExecutionSlotAllocatorFactory(SlotPool slotPool) throws Exception {
setupSlotPool(slotPool);
PhysicalSlotProvider physicalSlotProvider = new PhysicalSlotProviderImpl(LocationPreferenceSlotSelectionStrategy.createDefault(), slotPool);
return SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider);
}
use of org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider in project flink by apache.
the class DefaultSchedulerComponents method createPipelinedRegionSchedulerComponents.
private static DefaultSchedulerComponents createPipelinedRegionSchedulerComponents(final JobType jobType, final Configuration jobMasterConfiguration, final SlotPool slotPool, final Time slotRequestTimeout) {
final SlotSelectionStrategy slotSelectionStrategy = SlotSelectionStrategyUtils.selectSlotSelectionStrategy(jobType, jobMasterConfiguration);
final PhysicalSlotRequestBulkChecker bulkChecker = PhysicalSlotRequestBulkCheckerImpl.createFromSlotPool(slotPool, SystemClock.getInstance());
final PhysicalSlotProvider physicalSlotProvider = new PhysicalSlotProviderImpl(slotSelectionStrategy, slotPool);
final ExecutionSlotAllocatorFactory allocatorFactory = new SlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider, jobType == JobType.STREAMING, bulkChecker, slotRequestTimeout);
return new DefaultSchedulerComponents(new PipelinedRegionSchedulingStrategy.Factory(), bulkChecker::start, allocatorFactory);
}
use of org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider in project flink by apache.
the class DefaultSchedulerBatchSchedulingTest method testSchedulingOfJobWithFewerSlotsThanParallelism.
/**
* Tests that a batch job can be executed with fewer slots than its parallelism. See FLINK-13187
* for more information.
*/
@Test
public void testSchedulingOfJobWithFewerSlotsThanParallelism() throws Exception {
final int parallelism = 5;
final Time batchSlotTimeout = Time.milliseconds(5L);
final JobGraph jobGraph = createBatchJobGraph(parallelism);
try (final SlotPool slotPool = createSlotPool(mainThreadExecutor, batchSlotTimeout)) {
final ArrayBlockingQueue<ExecutionAttemptID> submittedTasksQueue = new ArrayBlockingQueue<>(parallelism);
TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((tdd, ignored) -> {
submittedTasksQueue.offer(tdd.getExecutionAttemptId());
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway();
final PhysicalSlotProvider slotProvider = new PhysicalSlotProviderImpl(LocationPreferenceSlotSelectionStrategy.createDefault(), slotPool);
final GloballyTerminalJobStatusListener jobStatusListener = new GloballyTerminalJobStatusListener();
final SchedulerNG scheduler = createScheduler(jobGraph, mainThreadExecutor, slotProvider, batchSlotTimeout, jobStatusListener);
CompletableFuture.runAsync(scheduler::startScheduling, mainThreadExecutor).join();
// register a single slot at the slot pool
SlotPoolUtils.offerSlots(slotPool, mainThreadExecutor, Collections.singletonList(ResourceProfile.ANY), new RpcTaskManagerGateway(testingTaskExecutorGateway, JobMasterId.generate()));
// wait until the batch slot timeout has been reached
Thread.sleep(batchSlotTimeout.toMilliseconds());
final CompletableFuture<JobStatus> terminationFuture = jobStatusListener.getTerminationFuture();
for (int i = 0; i < parallelism; i++) {
final CompletableFuture<ExecutionAttemptID> submittedTaskFuture = CompletableFuture.supplyAsync(CheckedSupplier.unchecked(submittedTasksQueue::take));
// wait until one of them is completed
CompletableFuture.anyOf(submittedTaskFuture, terminationFuture).join();
if (submittedTaskFuture.isDone()) {
finishExecution(submittedTaskFuture.get(), scheduler, mainThreadExecutor);
} else {
fail(String.format("Job reached a globally terminal state %s before all executions were finished.", terminationFuture.get()));
}
}
assertThat(terminationFuture.get(), is(JobStatus.FINISHED));
}
}
Aggregations