Search in sources :

Example 1 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method testJobMasterRejectsTaskExecutorRegistrationIfJobIdsAreNotEqual.

/**
 * Tests that the JobMaster rejects a TaskExecutor registration attempt if the expected and
 * actual JobID are not equal. See FLINK-21606.
 */
@Test
public void testJobMasterRejectsTaskExecutorRegistrationIfJobIdsAreNotEqual() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
    try {
        jobMaster.start();
        final CompletableFuture<RegistrationResponse> registrationResponse = jobMaster.registerTaskManager(new JobID(), TaskManagerRegistrationInformation.create("foobar", new LocalUnresolvedTaskManagerLocation(), TestingUtils.zeroUUID()), testingTimeout);
        assertThat(registrationResponse.get(), instanceOf(JMTMRegistrationRejection.class));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method testJobMasterAcceptsSlotsWhenJobIsRestarting.

@Test
public void testJobMasterAcceptsSlotsWhenJobIsRestarting() throws Exception {
    configuration.set(RestartStrategyOptions.RESTART_STRATEGY, "fixed-delay");
    configuration.set(RestartStrategyOptions.RESTART_STRATEGY_FIXED_DELAY_DELAY, Duration.ofDays(1));
    final int numberSlots = 1;
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).createJobMaster();
    try {
        jobMaster.start();
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        final LocalUnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
        registerSlotsAtJobMaster(numberSlots, jobMasterGateway, jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setAddress("firstTaskManager").createTestingTaskExecutorGateway(), unresolvedTaskManagerLocation);
        CommonTestUtils.waitUntilCondition(() -> jobMasterGateway.requestJobStatus(testingTimeout).get() == JobStatus.RUNNING, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)));
        jobMasterGateway.disconnectTaskManager(unresolvedTaskManagerLocation.getResourceID(), new FlinkException("Test exception."));
        CommonTestUtils.waitUntilCondition(() -> jobMasterGateway.requestJobStatus(testingTimeout).get() == JobStatus.RESTARTING, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)));
        assertThat(registerSlotsAtJobMaster(numberSlots, jobMasterGateway, jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setAddress("secondTaskManager").createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation()), hasSize(numberSlots));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Example 3 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method runHeartbeatTest.

private void runHeartbeatTest(TestingTaskExecutorGatewayBuilder testingTaskExecutorGatewayBuilder, HeartbeatServices heartbeatServices) throws Exception {
    final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final TestingTaskExecutorGateway taskExecutorGateway = testingTaskExecutorGatewayBuilder.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId)).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        assertThat(disconnectedJobManager, equalTo(jobGraph.getJobID()));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) CompletableFuture(java.util.concurrent.CompletableFuture) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobID(org.apache.flink.api.common.JobID) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)

Example 4 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method runRequestNextInputSplitTest.

private void runRequestNextInputSplitTest(Function<List<List<InputSplit>>, Collection<InputSplit>> expectedRemainingInputSplits) throws Exception {
    final int parallelism = 2;
    final int splitsPerTask = 2;
    final int totalSplits = parallelism * splitsPerTask;
    final List<TestingInputSplit> allInputSplits = new ArrayList<>(totalSplits);
    for (int i = 0; i < totalSplits; i++) {
        allInputSplits.add(new TestingInputSplit(i));
    }
    final InputSplitSource<TestingInputSplit> inputSplitSource = new TestingInputSplitSource(allInputSplits);
    JobVertex source = new JobVertex("source");
    source.setParallelism(parallelism);
    source.setInputSplitSource(inputSplitSource);
    source.setInvokableClass(AbstractInvokable.class);
    final ExecutionConfig executionConfig = new ExecutionConfig();
    executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(100, 0));
    final JobGraph inputSplitJobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertex(source).setExecutionConfig(executionConfig).build();
    final JobMaster jobMaster = new JobMasterBuilder(inputSplitJobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        registerSlotsRequiredForJobExecution(jobMasterGateway, inputSplitJobGraph.getJobID(), parallelism);
        waitUntilAllExecutionsAreScheduledOrDeployed(jobMasterGateway);
        final JobVertexID sourceId = source.getID();
        final List<AccessExecution> executions = getExecutions(jobMasterGateway, sourceId);
        final ExecutionAttemptID initialAttemptId = executions.get(0).getAttemptId();
        final List<List<InputSplit>> inputSplitsPerTask = new ArrayList<>(parallelism);
        // request all input splits
        for (AccessExecution execution : executions) {
            inputSplitsPerTask.add(getInputSplits(splitsPerTask, getInputSplitSupplier(sourceId, jobMasterGateway, execution.getAttemptId())));
        }
        final List<InputSplit> allRequestedInputSplits = flattenCollection(inputSplitsPerTask);
        assertThat(allRequestedInputSplits, containsInAnyOrder(allInputSplits.toArray(EMPTY_TESTING_INPUT_SPLITS)));
        // fail the first execution to trigger a failover
        jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(initialAttemptId, ExecutionState.FAILED)).get();
        // wait until the job has been recovered
        waitUntilAllExecutionsAreScheduledOrDeployed(jobMasterGateway);
        final ExecutionAttemptID restartedAttemptId = getFirstExecution(jobMasterGateway, sourceId).getAttemptId();
        final List<InputSplit> inputSplits = getRemainingInputSplits(getInputSplitSupplier(sourceId, jobMasterGateway, restartedAttemptId));
        assertThat(inputSplits, containsInAnyOrder(expectedRemainingInputSplits.apply(inputSplitsPerTask).toArray(EMPTY_TESTING_INPUT_SPLITS)));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.flink.core.io.InputSplit)

Example 5 with JobMasterBuilder

use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.

the class JobMasterTest method testHeartbeatTimeoutWithResourceManager.

@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
    final String resourceManagerAddress = "rm";
    final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
    final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
    final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
    final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
    final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
    final CountDownLatch registrationAttempts = new CountDownLatch(2);
    resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
        jobManagerRegistrationFuture.complete(Tuple3.of(jobMasterId, resourceID, jobID));
        registrationAttempts.countDown();
        return CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess());
    });
    resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
    rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(fastHeartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        // define a leader and see that a registration happens
        rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
        // register job manager success will trigger monitor heartbeat target between jm and rm
        final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
        assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
        assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));
        final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        // heartbeat timeout should trigger disconnect JobManager from ResourceManager
        assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));
        // the JobMaster should try to reconnect to the RM
        registrationAttempts.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : CountDownLatch(java.util.concurrent.CountDownLatch) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobMasterBuilder (org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)28 Test (org.junit.Test)26 ExecutionException (java.util.concurrent.ExecutionException)13 JobID (org.apache.flink.api.common.JobID)13 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)13 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)13 CompletableFuture (java.util.concurrent.CompletableFuture)12 CompletedCheckpoint (org.apache.flink.runtime.checkpoint.CompletedCheckpoint)12 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)12 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)12 ResourceManagerId (org.apache.flink.runtime.resourcemanager.ResourceManagerId)11 File (java.io.File)10 CountDownLatch (java.util.concurrent.CountDownLatch)10 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)10 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)10 TaskExecutionState (org.apache.flink.runtime.taskmanager.TaskExecutionState)10 ArrayDeque (java.util.ArrayDeque)9 ArrayList (java.util.ArrayList)9