use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class DeclarativeSlotManagerTest method testNotificationAboutNotEnoughResources.
private static void testNotificationAboutNotEnoughResources(boolean withNotificationGracePeriod) throws Exception {
final JobID jobId = new JobID();
final int numRequiredSlots = 3;
final int numExistingSlots = 1;
List<Tuple2<JobID, Collection<ResourceRequirement>>> notEnoughResourceNotifications = new ArrayList<>();
ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().setAllocateResourceFunction(ignored -> false).setNotEnoughResourcesConsumer((jobId1, acquiredResources) -> notEnoughResourceNotifications.add(Tuple2.of(jobId1, acquiredResources))).build();
try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().buildAndStart(ResourceManagerId.generate(), new ManuallyTriggeredScheduledExecutor(), resourceManagerActions)) {
if (withNotificationGracePeriod) {
// this should disable notifications
slotManager.setFailUnfulfillableRequest(false);
}
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway());
final SlotReport slotReport = createSlotReport(taskExecutorResourceId, numExistingSlots);
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
ResourceRequirements resourceRequirements = createResourceRequirements(jobId, numRequiredSlots);
slotManager.processResourceRequirements(resourceRequirements);
if (withNotificationGracePeriod) {
assertThat(notEnoughResourceNotifications, empty());
// re-enable notifications which should also trigger another resource check
slotManager.setFailUnfulfillableRequest(true);
}
assertThat(notEnoughResourceNotifications, hasSize(1));
Tuple2<JobID, Collection<ResourceRequirement>> notification = notEnoughResourceNotifications.get(0);
assertThat(notification.f0, is(jobId));
assertThat(notification.f1, hasItem(ResourceRequirement.create(ResourceProfile.ANY, numExistingSlots)));
// another slot report that does not indicate any changes should not trigger another
// notification
slotManager.reportSlotStatus(taskExecutionConnection.getInstanceID(), slotReport);
assertThat(notEnoughResourceNotifications, hasSize(1));
}
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class DefaultJobLeaderIdServiceTest method testLeaderFutureWaitsForValidLeader.
/**
* Tests that the leaderId future is only completed once the service is notified about an actual
* leader being elected. Specifically, it tests that the future is not completed if the
* leadership was revoked without a new leader having been elected.
*/
@Test(timeout = 10000)
public void testLeaderFutureWaitsForValidLeader() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, new ManuallyTriggeredScheduledExecutor(), Time.milliseconds(5000L));
jobLeaderIdService.start(new NoOpJobLeaderIdActions());
jobLeaderIdService.addJob(jobId);
// elect some leader
leaderRetrievalService.notifyListener("foo", UUID.randomUUID());
// notify about leadership loss
leaderRetrievalService.notifyListener(null, null);
final CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// there is currently no leader, so this should not be completed
assertThat(leaderIdFuture.isDone(), is(false));
// elect a new leader
final UUID newLeaderId = UUID.randomUUID();
leaderRetrievalService.notifyListener("foo", newLeaderId);
assertThat(leaderIdFuture.get(), is(JobMasterId.fromUuidOrNull(newLeaderId)));
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class ExecutionGraphSuspendTest method testSuspendWhileRestarting.
/**
* Tests that we can suspend a job when in state RESTARTING.
*/
@Test
public void testSuspendWhileRestarting() throws Exception {
final ManuallyTriggeredScheduledExecutor taskRestartExecutor = new ManuallyTriggeredScheduledExecutor();
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.emptyJobGraph(), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(true, Long.MAX_VALUE)).setDelayExecutor(taskRestartExecutor).build();
scheduler.startScheduling();
final ExecutionGraph eg = scheduler.getExecutionGraph();
assertEquals(JobStatus.RUNNING, eg.getState());
ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
scheduler.handleGlobalFailure(new Exception("test"));
assertEquals(JobStatus.RESTARTING, eg.getState());
ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
assertEquals(JobStatus.RESTARTING, eg.getState());
scheduler.closeAsync();
assertEquals(JobStatus.SUSPENDED, eg.getState());
taskRestartExecutor.triggerScheduledTasks();
assertEquals(JobStatus.SUSPENDED, eg.getState());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class FileExecutionGraphInfoStoreTest method testExecutionGraphExpiration.
/**
* Tests that an expired execution graph is removed from the execution graph store.
*/
@Test
public void testExecutionGraphExpiration() throws Exception {
final File rootDir = temporaryFolder.newFolder();
final Time expirationTime = Time.milliseconds(1L);
final ManuallyTriggeredScheduledExecutor scheduledExecutor = new ManuallyTriggeredScheduledExecutor();
final ManualTicker manualTicker = new ManualTicker();
try (final FileExecutionGraphInfoStore executionGraphInfoStore = new FileExecutionGraphInfoStore(rootDir, expirationTime, Integer.MAX_VALUE, 10000L, scheduledExecutor, manualTicker)) {
final ExecutionGraphInfo executionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).build());
executionGraphInfoStore.put(executionGraphInfo);
// there should one execution graph
assertThat(executionGraphInfoStore.size(), Matchers.equalTo(1));
manualTicker.advanceTime(expirationTime.toMilliseconds(), TimeUnit.MILLISECONDS);
// this should trigger the cleanup after expiration
scheduledExecutor.triggerScheduledTasks();
assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
assertThat(executionGraphInfoStore.get(executionGraphInfo.getJobId()), Matchers.nullValue());
final File storageDirectory = executionGraphInfoStore.getStorageDir();
// check that the persisted file has been deleted
assertThat(storageDirectory.listFiles().length, Matchers.equalTo(0));
}
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class ZooKeeperCompletedCheckpointStoreITCase method testChekpointingPausesAndResumeWhenTooManyCheckpoints.
/**
* FLINK-17073 tests that there is no request triggered when there are too many checkpoints
* waiting to clean and that it resumes when the number of waiting checkpoints as gone below the
* threshold.
*/
@Test
public void testChekpointingPausesAndResumeWhenTooManyCheckpoints() throws Exception {
ManualClock clock = new ManualClock();
clock.advanceTime(1, TimeUnit.DAYS);
int maxCleaningCheckpoints = 1;
CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner();
CheckpointRequestDecider checkpointRequestDecider = new CheckpointRequestDecider(maxCleaningCheckpoints, unused -> {
}, clock, 1, new AtomicInteger(0)::get, checkpointsCleaner::getNumberOfCheckpointsToClean);
final int maxCheckpointsToRetain = 1;
ManuallyTriggeredScheduledExecutor executor = new ManuallyTriggeredScheduledExecutor();
CompletedCheckpointStore checkpointStore = createRecoveredCompletedCheckpointStore(maxCheckpointsToRetain, executor);
int nbCheckpointsToInject = 3;
for (int i = 1; i <= nbCheckpointsToInject; i++) {
// add checkpoints to clean, the ManuallyTriggeredScheduledExecutor.execute() just
// queues the runnables but does not execute them.
TestCompletedCheckpoint completedCheckpoint = new TestCompletedCheckpoint(new JobID(), i, i, Collections.emptyMap(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.RETAIN_ON_FAILURE));
checkpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, checkpointsCleaner, () -> {
});
}
int nbCheckpointsSubmittedForCleaning = nbCheckpointsToInject - maxCheckpointsToRetain;
// wait for cleaning request submission by checkpointsStore
CommonTestUtils.waitUntilCondition(() -> checkpointsCleaner.getNumberOfCheckpointsToClean() == nbCheckpointsSubmittedForCleaning, Deadline.fromNow(Duration.ofSeconds(3)));
assertEquals(nbCheckpointsSubmittedForCleaning, checkpointsCleaner.getNumberOfCheckpointsToClean());
// checkpointing is on hold because checkpointsCleaner.getNumberOfCheckpointsToClean() >
// maxCleaningCheckpoints
assertFalse(checkpointRequestDecider.chooseRequestToExecute(regularCheckpoint(), false, 0).isPresent());
// make the executor execute checkpoint requests.
executor.triggerAll();
// wait for a checkpoint to be cleaned
CommonTestUtils.waitUntilCondition(() -> checkpointsCleaner.getNumberOfCheckpointsToClean() < nbCheckpointsSubmittedForCleaning, Deadline.fromNow(Duration.ofSeconds(3)));
// some checkpoints were cleaned
assertTrue(checkpointsCleaner.getNumberOfCheckpointsToClean() < nbCheckpointsSubmittedForCleaning);
// checkpointing is resumed because checkpointsCleaner.getNumberOfCheckpointsToClean() <=
// maxCleaningCheckpoints
assertTrue(checkpointRequestDecider.chooseRequestToExecute(regularCheckpoint(), false, 0).isPresent());
checkpointStore.shutdown(JobStatus.FINISHED, checkpointsCleaner);
}
Aggregations