Search in sources :

Example 6 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class DeclarativeSlotManagerTest method testNotificationAboutNotEnoughResources.

private static void testNotificationAboutNotEnoughResources(boolean withNotificationGracePeriod) throws Exception {
    final JobID jobId = new JobID();
    final int numRequiredSlots = 3;
    final int numExistingSlots = 1;
    List<Tuple2<JobID, Collection<ResourceRequirement>>> notEnoughResourceNotifications = new ArrayList<>();
    ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().setAllocateResourceFunction(ignored -> false).setNotEnoughResourcesConsumer((jobId1, acquiredResources) -> notEnoughResourceNotifications.add(Tuple2.of(jobId1, acquiredResources))).build();
    try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().buildAndStart(ResourceManagerId.generate(), new ManuallyTriggeredScheduledExecutor(), resourceManagerActions)) {
        if (withNotificationGracePeriod) {
            // this should disable notifications
            slotManager.setFailUnfulfillableRequest(false);
        }
        final ResourceID taskExecutorResourceId = ResourceID.generate();
        final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway());
        final SlotReport slotReport = createSlotReport(taskExecutorResourceId, numExistingSlots);
        slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
        ResourceRequirements resourceRequirements = createResourceRequirements(jobId, numRequiredSlots);
        slotManager.processResourceRequirements(resourceRequirements);
        if (withNotificationGracePeriod) {
            assertThat(notEnoughResourceNotifications, empty());
            // re-enable notifications which should also trigger another resource check
            slotManager.setFailUnfulfillableRequest(true);
        }
        assertThat(notEnoughResourceNotifications, hasSize(1));
        Tuple2<JobID, Collection<ResourceRequirement>> notification = notEnoughResourceNotifications.get(0);
        assertThat(notification.f0, is(jobId));
        assertThat(notification.f1, hasItem(ResourceRequirement.create(ResourceProfile.ANY, numExistingSlots)));
        // another slot report that does not indicate any changes should not trigger another
        // notification
        slotManager.reportSlotStatus(taskExecutionConnection.getInstanceID(), slotReport);
        assertThat(notEnoughResourceNotifications, hasSize(1));
    }
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) Assert.assertThat(org.junit.Assert.assertThat) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) TestLogger(org.apache.flink.util.TestLogger) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Set(java.util.Set) BlockingQueue(java.util.concurrent.BlockingQueue) SlotManagerMetricGroup(org.apache.flink.runtime.metrics.groups.SlotManagerMetricGroup) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TestingUtils(org.apache.flink.testutils.TestingUtils) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Assert.assertFalse(org.junit.Assert.assertFalse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FlinkException(org.apache.flink.util.FlinkException) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) CoreMatchers.not(org.hamcrest.CoreMatchers.not) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Assert.assertSame(org.junit.Assert.assertSame) ManuallyTriggeredScheduledExecutorService(org.apache.flink.core.testutils.ManuallyTriggeredScheduledExecutorService) TestingMetricRegistry(org.apache.flink.runtime.metrics.util.TestingMetricRegistry) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Matchers.hasSize(org.hamcrest.Matchers.hasSize) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Matchers.empty(org.hamcrest.Matchers.empty) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) SystemExitTrackingSecurityManager(org.apache.flink.runtime.testutils.SystemExitTrackingSecurityManager) Test(org.junit.Test) InstanceID(org.apache.flink.runtime.instance.InstanceID) Iterators(org.apache.flink.shaded.guava30.com.google.common.collect.Iterators) TimeUnit(java.util.concurrent.TimeUnit) JobID(org.apache.flink.api.common.JobID) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) ArrayList(java.util.ArrayList) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)

Example 7 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class DefaultJobLeaderIdServiceTest method testLeaderFutureWaitsForValidLeader.

/**
 * Tests that the leaderId future is only completed once the service is notified about an actual
 * leader being elected. Specifically, it tests that the future is not completed if the
 * leadership was revoked without a new leader having been elected.
 */
@Test(timeout = 10000)
public void testLeaderFutureWaitsForValidLeader() throws Exception {
    final JobID jobId = new JobID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, new ManuallyTriggeredScheduledExecutor(), Time.milliseconds(5000L));
    jobLeaderIdService.start(new NoOpJobLeaderIdActions());
    jobLeaderIdService.addJob(jobId);
    // elect some leader
    leaderRetrievalService.notifyListener("foo", UUID.randomUUID());
    // notify about leadership loss
    leaderRetrievalService.notifyListener(null, null);
    final CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
    // there is currently no leader, so this should not be completed
    assertThat(leaderIdFuture.isDone(), is(false));
    // elect a new leader
    final UUID newLeaderId = UUID.randomUUID();
    leaderRetrievalService.notifyListener("foo", newLeaderId);
    assertThat(leaderIdFuture.get(), is(JobMasterId.fromUuidOrNull(newLeaderId)));
}
Also used : TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Example 8 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class ExecutionGraphSuspendTest method testSuspendWhileRestarting.

/**
 * Tests that we can suspend a job when in state RESTARTING.
 */
@Test
public void testSuspendWhileRestarting() throws Exception {
    final ManuallyTriggeredScheduledExecutor taskRestartExecutor = new ManuallyTriggeredScheduledExecutor();
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.emptyJobGraph(), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(true, Long.MAX_VALUE)).setDelayExecutor(taskRestartExecutor).build();
    scheduler.startScheduling();
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    assertEquals(JobStatus.RUNNING, eg.getState());
    ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
    scheduler.handleGlobalFailure(new Exception("test"));
    assertEquals(JobStatus.RESTARTING, eg.getState());
    ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
    assertEquals(JobStatus.RESTARTING, eg.getState());
    scheduler.closeAsync();
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    taskRestartExecutor.triggerScheduledTasks();
    assertEquals(JobStatus.SUSPENDED, eg.getState());
}
Also used : TestRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Example 9 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class FileExecutionGraphInfoStoreTest method testExecutionGraphExpiration.

/**
 * Tests that an expired execution graph is removed from the execution graph store.
 */
@Test
public void testExecutionGraphExpiration() throws Exception {
    final File rootDir = temporaryFolder.newFolder();
    final Time expirationTime = Time.milliseconds(1L);
    final ManuallyTriggeredScheduledExecutor scheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    final ManualTicker manualTicker = new ManualTicker();
    try (final FileExecutionGraphInfoStore executionGraphInfoStore = new FileExecutionGraphInfoStore(rootDir, expirationTime, Integer.MAX_VALUE, 10000L, scheduledExecutor, manualTicker)) {
        final ExecutionGraphInfo executionGraphInfo = new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).build());
        executionGraphInfoStore.put(executionGraphInfo);
        // there should one execution graph
        assertThat(executionGraphInfoStore.size(), Matchers.equalTo(1));
        manualTicker.advanceTime(expirationTime.toMilliseconds(), TimeUnit.MILLISECONDS);
        // this should trigger the cleanup after expiration
        scheduledExecutor.triggerScheduledTasks();
        assertThat(executionGraphInfoStore.size(), Matchers.equalTo(0));
        assertThat(executionGraphInfoStore.get(executionGraphInfo.getJobId()), Matchers.nullValue());
        final File storageDirectory = executionGraphInfoStore.getStorageDir();
        // check that the persisted file has been deleted
        assertThat(storageDirectory.listFiles().length, Matchers.equalTo(0));
    }
}
Also used : ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) Time(org.apache.flink.api.common.time.Time) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) ManualTicker(org.apache.flink.runtime.util.ManualTicker) File(java.io.File) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Example 10 with ManuallyTriggeredScheduledExecutor

use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.

the class ZooKeeperCompletedCheckpointStoreITCase method testChekpointingPausesAndResumeWhenTooManyCheckpoints.

/**
 * FLINK-17073 tests that there is no request triggered when there are too many checkpoints
 * waiting to clean and that it resumes when the number of waiting checkpoints as gone below the
 * threshold.
 */
@Test
public void testChekpointingPausesAndResumeWhenTooManyCheckpoints() throws Exception {
    ManualClock clock = new ManualClock();
    clock.advanceTime(1, TimeUnit.DAYS);
    int maxCleaningCheckpoints = 1;
    CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner();
    CheckpointRequestDecider checkpointRequestDecider = new CheckpointRequestDecider(maxCleaningCheckpoints, unused -> {
    }, clock, 1, new AtomicInteger(0)::get, checkpointsCleaner::getNumberOfCheckpointsToClean);
    final int maxCheckpointsToRetain = 1;
    ManuallyTriggeredScheduledExecutor executor = new ManuallyTriggeredScheduledExecutor();
    CompletedCheckpointStore checkpointStore = createRecoveredCompletedCheckpointStore(maxCheckpointsToRetain, executor);
    int nbCheckpointsToInject = 3;
    for (int i = 1; i <= nbCheckpointsToInject; i++) {
        // add checkpoints to clean, the ManuallyTriggeredScheduledExecutor.execute() just
        // queues the runnables but does not execute them.
        TestCompletedCheckpoint completedCheckpoint = new TestCompletedCheckpoint(new JobID(), i, i, Collections.emptyMap(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.RETAIN_ON_FAILURE));
        checkpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, checkpointsCleaner, () -> {
        });
    }
    int nbCheckpointsSubmittedForCleaning = nbCheckpointsToInject - maxCheckpointsToRetain;
    // wait for cleaning request submission by checkpointsStore
    CommonTestUtils.waitUntilCondition(() -> checkpointsCleaner.getNumberOfCheckpointsToClean() == nbCheckpointsSubmittedForCleaning, Deadline.fromNow(Duration.ofSeconds(3)));
    assertEquals(nbCheckpointsSubmittedForCleaning, checkpointsCleaner.getNumberOfCheckpointsToClean());
    // checkpointing is on hold because checkpointsCleaner.getNumberOfCheckpointsToClean() >
    // maxCleaningCheckpoints
    assertFalse(checkpointRequestDecider.chooseRequestToExecute(regularCheckpoint(), false, 0).isPresent());
    // make the executor execute checkpoint requests.
    executor.triggerAll();
    // wait for a checkpoint to be cleaned
    CommonTestUtils.waitUntilCondition(() -> checkpointsCleaner.getNumberOfCheckpointsToClean() < nbCheckpointsSubmittedForCleaning, Deadline.fromNow(Duration.ofSeconds(3)));
    // some checkpoints were cleaned
    assertTrue(checkpointsCleaner.getNumberOfCheckpointsToClean() < nbCheckpointsSubmittedForCleaning);
    // checkpointing is resumed because checkpointsCleaner.getNumberOfCheckpointsToClean() <=
    // maxCleaningCheckpoints
    assertTrue(checkpointRequestDecider.chooseRequestToExecute(regularCheckpoint(), false, 0).isPresent());
    checkpointStore.shutdown(JobStatus.FINISHED, checkpointsCleaner);
}
Also used : ManualClock(org.apache.flink.util.clock.ManualClock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CheckpointRequestDeciderTest.regularCheckpoint(org.apache.flink.runtime.checkpoint.CheckpointRequestDeciderTest.regularCheckpoint) JobID(org.apache.flink.api.common.JobID) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Aggregations

ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)19 Test (org.junit.Test)18 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 CompletableFuture (java.util.concurrent.CompletableFuture)10 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)10 Executor (java.util.concurrent.Executor)9 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)9 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)8 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)7 ScheduledExecutor (org.apache.flink.util.concurrent.ScheduledExecutor)7 HashSet (java.util.HashSet)6 List (java.util.List)6 ExecutionException (java.util.concurrent.ExecutionException)5 Nullable (javax.annotation.Nullable)5 JobID (org.apache.flink.api.common.JobID)5 Arrays (java.util.Arrays)4 TimeUnit (java.util.concurrent.TimeUnit)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 JobStatus (org.apache.flink.api.common.JobStatus)4