use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testJobMasterRejectsTaskExecutorRegistrationIfJobIdsAreNotEqual.
/**
* Tests that the JobMaster rejects a TaskExecutor registration attempt if the expected and
* actual JobID are not equal. See FLINK-21606.
*/
@Test
public void testJobMasterRejectsTaskExecutorRegistrationIfJobIdsAreNotEqual() throws Exception {
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
try {
jobMaster.start();
final CompletableFuture<RegistrationResponse> registrationResponse = jobMaster.registerTaskManager(new JobID(), TaskManagerRegistrationInformation.create("foobar", new LocalUnresolvedTaskManagerLocation(), TestingUtils.zeroUUID()), testingTimeout);
assertThat(registrationResponse.get(), instanceOf(JMTMRegistrationRejection.class));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testJobMasterAcceptsSlotsWhenJobIsRestarting.
@Test
public void testJobMasterAcceptsSlotsWhenJobIsRestarting() throws Exception {
configuration.set(RestartStrategyOptions.RESTART_STRATEGY, "fixed-delay");
configuration.set(RestartStrategyOptions.RESTART_STRATEGY_FIXED_DELAY_DELAY, Duration.ofDays(1));
final int numberSlots = 1;
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).createJobMaster();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final LocalUnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
registerSlotsAtJobMaster(numberSlots, jobMasterGateway, jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setAddress("firstTaskManager").createTestingTaskExecutorGateway(), unresolvedTaskManagerLocation);
CommonTestUtils.waitUntilCondition(() -> jobMasterGateway.requestJobStatus(testingTimeout).get() == JobStatus.RUNNING, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)));
jobMasterGateway.disconnectTaskManager(unresolvedTaskManagerLocation.getResourceID(), new FlinkException("Test exception."));
CommonTestUtils.waitUntilCondition(() -> jobMasterGateway.requestJobStatus(testingTimeout).get() == JobStatus.RESTARTING, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)));
assertThat(registerSlotsAtJobMaster(numberSlots, jobMasterGateway, jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setAddress("secondTaskManager").createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation()), hasSize(numberSlots));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method runHeartbeatTest.
private void runHeartbeatTest(TestingTaskExecutorGatewayBuilder testingTaskExecutorGatewayBuilder, HeartbeatServices heartbeatServices) throws Exception {
final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final TestingTaskExecutorGateway taskExecutorGateway = testingTaskExecutorGatewayBuilder.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId)).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
assertThat(disconnectedJobManager, equalTo(jobGraph.getJobID()));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method runRequestNextInputSplitTest.
private void runRequestNextInputSplitTest(Function<List<List<InputSplit>>, Collection<InputSplit>> expectedRemainingInputSplits) throws Exception {
final int parallelism = 2;
final int splitsPerTask = 2;
final int totalSplits = parallelism * splitsPerTask;
final List<TestingInputSplit> allInputSplits = new ArrayList<>(totalSplits);
for (int i = 0; i < totalSplits; i++) {
allInputSplits.add(new TestingInputSplit(i));
}
final InputSplitSource<TestingInputSplit> inputSplitSource = new TestingInputSplitSource(allInputSplits);
JobVertex source = new JobVertex("source");
source.setParallelism(parallelism);
source.setInputSplitSource(inputSplitSource);
source.setInvokableClass(AbstractInvokable.class);
final ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(100, 0));
final JobGraph inputSplitJobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().addJobVertex(source).setExecutionConfig(executionConfig).build();
final JobMaster jobMaster = new JobMasterBuilder(inputSplitJobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
registerSlotsRequiredForJobExecution(jobMasterGateway, inputSplitJobGraph.getJobID(), parallelism);
waitUntilAllExecutionsAreScheduledOrDeployed(jobMasterGateway);
final JobVertexID sourceId = source.getID();
final List<AccessExecution> executions = getExecutions(jobMasterGateway, sourceId);
final ExecutionAttemptID initialAttemptId = executions.get(0).getAttemptId();
final List<List<InputSplit>> inputSplitsPerTask = new ArrayList<>(parallelism);
// request all input splits
for (AccessExecution execution : executions) {
inputSplitsPerTask.add(getInputSplits(splitsPerTask, getInputSplitSupplier(sourceId, jobMasterGateway, execution.getAttemptId())));
}
final List<InputSplit> allRequestedInputSplits = flattenCollection(inputSplitsPerTask);
assertThat(allRequestedInputSplits, containsInAnyOrder(allInputSplits.toArray(EMPTY_TESTING_INPUT_SPLITS)));
// fail the first execution to trigger a failover
jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(initialAttemptId, ExecutionState.FAILED)).get();
// wait until the job has been recovered
waitUntilAllExecutionsAreScheduledOrDeployed(jobMasterGateway);
final ExecutionAttemptID restartedAttemptId = getFirstExecution(jobMasterGateway, sourceId).getAttemptId();
final List<InputSplit> inputSplits = getRemainingInputSplits(getInputSplitSupplier(sourceId, jobMasterGateway, restartedAttemptId));
assertThat(inputSplits, containsInAnyOrder(expectedRemainingInputSplits.apply(inputSplitsPerTask).toArray(EMPTY_TESTING_INPUT_SPLITS)));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testHeartbeatTimeoutWithResourceManager.
@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
final String resourceManagerAddress = "rm";
final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
jobManagerRegistrationFuture.complete(Tuple3.of(jobMasterId, resourceID, jobID));
registrationAttempts.countDown();
return CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess());
});
resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(fastHeartbeatServices).createJobMaster();
jobMaster.start();
try {
// define a leader and see that a registration happens
rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
// register job manager success will trigger monitor heartbeat target between jm and rm
final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));
final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
// heartbeat timeout should trigger disconnect JobManager from ResourceManager
assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));
// the JobMaster should try to reconnect to the RM
registrationAttempts.await();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations