Search in sources :

Example 21 with LocalUnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.

the class JobMasterTest method testRestoringFromSavepoint.

/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon initial
 * submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {
    // create savepoint data
    final long savepointId = 42L;
    final File savepointFile = createSavepoint(savepointId);
    // set savepoint settings
    final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(savepointFile.getAbsolutePath(), true);
    final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
    final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
    final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
    haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHighAvailabilityServices(haServices).createJobMaster();
    try {
        // we need to start and register the required slots to let the adaptive scheduler
        // restore from the savepoint
        jobMaster.start();
        final OneShotLatch taskSubmitLatch = new OneShotLatch();
        registerSlotsAtJobMaster(1, jobMaster.getSelfGateway(JobMasterGateway.class), jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
            taskSubmitLatch.trigger();
            return CompletableFuture.completedFuture(Acknowledge.get());
        }).createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation());
        // wait until a task has submitted because this guarantees that the ExecutionGraph has
        // been created
        taskSubmitLatch.await();
        final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
        assertThat(savepointCheckpoint, Matchers.notNullValue());
        assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) File(java.io.File) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Test(org.junit.Test)

Example 22 with LocalUnresolvedTaskManagerLocation

use of org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation in project flink by apache.

the class DefaultJobLeaderServiceTest method handlesConcurrentJobAdditionsAndLeaderChanges.

/**
 * Tests that we can concurrently modify the JobLeaderService and complete the leader retrieval
 * operation. See FLINK-16373.
 */
@Test
public void handlesConcurrentJobAdditionsAndLeaderChanges() throws Exception {
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(new LocalUnresolvedTaskManagerLocation(), RetryingRegistrationConfiguration.defaultConfiguration());
    final TestingJobLeaderListener jobLeaderListener = new TestingJobLeaderListener();
    final int numberOperations = 20;
    final BlockingQueue<SettableLeaderRetrievalService> instantiatedLeaderRetrievalServices = new ArrayBlockingQueue<>(numberOperations);
    final HighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(leaderForJobId -> {
        final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
        instantiatedLeaderRetrievalServices.offer(leaderRetrievalService);
        return leaderRetrievalService;
    }).build();
    jobLeaderService.start("foobar", rpcServiceResource.getTestingRpcService(), haServices, jobLeaderListener);
    final CheckedThread addJobAction = new CheckedThread() {

        @Override
        public void go() throws Exception {
            for (int i = 0; i < numberOperations; i++) {
                final JobID jobId = JobID.generate();
                jobLeaderService.addJob(jobId, "foobar");
                Thread.yield();
                jobLeaderService.removeJob(jobId);
            }
        }
    };
    addJobAction.start();
    for (int i = 0; i < numberOperations; i++) {
        final SettableLeaderRetrievalService leaderRetrievalService = instantiatedLeaderRetrievalServices.take();
        leaderRetrievalService.notifyListener("foobar", UUID.randomUUID());
    }
    addJobAction.sync();
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) CheckedThread(org.apache.flink.core.testutils.CheckedThread) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)22 Test (org.junit.Test)17 CompletableFuture (java.util.concurrent.CompletableFuture)13 JobMasterBuilder (org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)13 JobID (org.apache.flink.api.common.JobID)12 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)11 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)11 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)11 TestingTaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway)11 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)11 UnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation)11 FlinkException (org.apache.flink.util.FlinkException)11 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)10 SettableLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService)10 TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)10 BlockingQueue (java.util.concurrent.BlockingQueue)9 CountDownLatch (java.util.concurrent.CountDownLatch)9 CompletedCheckpoint (org.apache.flink.runtime.checkpoint.CompletedCheckpoint)9