use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testReconnectionAfterDisconnect.
/**
* Tests that we continue reconnecting to the latest known RM after a disconnection message.
*/
@Test
public void testReconnectionAfterDisconnect() throws Exception {
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
try {
final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);
testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
registrationsQueue.offer(jobMasterId);
return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess());
});
final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
notifyResourceManagerLeaderListeners(testingResourceManagerGateway);
// wait for first registration attempt
final JobMasterId firstRegistrationAttempt = registrationsQueue.take();
assertThat(firstRegistrationAttempt, equalTo(jobMasterId));
assertThat(registrationsQueue.isEmpty(), is(true));
jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));
// wait for the second registration attempt after the disconnect call
assertThat(registrationsQueue.take(), equalTo(jobMasterId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterSchedulerTest method testIfStartSchedulingFailsJobMasterFails.
/**
* Tests that the JobMaster fails if we cannot start the scheduling. See FLINK-20382.
*/
@Test
public void testIfStartSchedulingFailsJobMasterFails() throws Exception {
final SchedulerNGFactory schedulerFactory = new FailingSchedulerFactory();
final JobMasterBuilder.TestingOnCompletionActions onCompletionActions = new JobMasterBuilder.TestingOnCompletionActions();
final JobMaster jobMaster = new JobMasterBuilder(JobGraphTestUtils.emptyJobGraph(), TESTING_RPC_SERVICE_RESOURCE.getTestingRpcService()).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(TestingSlotPoolServiceBuilder.newBuilder(), schedulerFactory)).withOnCompletionActions(onCompletionActions).createJobMaster();
jobMaster.start();
assertThat(onCompletionActions.getJobMasterFailedFuture().join(), is(instanceOf(JobMasterException.class)));
// cleanly
try {
jobMaster.close();
} catch (Exception expected) {
// expected
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testCloseUnestablishedResourceManagerConnection.
/**
* Tests that we can close an unestablished ResourceManager connection.
*/
@Test
public void testCloseUnestablishedResourceManagerConnection() throws Exception {
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).createJobMaster();
try {
jobMaster.start();
final TestingResourceManagerGateway firstResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
final TestingResourceManagerGateway secondResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
final OneShotLatch firstJobManagerRegistration = new OneShotLatch();
final OneShotLatch secondJobManagerRegistration = new OneShotLatch();
firstResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
firstJobManagerRegistration.trigger();
return CompletableFuture.completedFuture(firstResourceManagerGateway.getJobMasterRegistrationSuccess());
});
secondResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
secondJobManagerRegistration.trigger();
return CompletableFuture.completedFuture(secondResourceManagerGateway.getJobMasterRegistrationSuccess());
});
notifyResourceManagerLeaderListeners(firstResourceManagerGateway);
// wait until we have seen the first registration attempt
firstJobManagerRegistration.await();
// this should stop the connection attempts towards the first RM
notifyResourceManagerLeaderListeners(secondResourceManagerGateway);
// check that we start registering at the second RM
secondJobManagerRegistration.await();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testRequestPartitionState.
/**
* Tests the {@link JobMaster#requestPartitionState(IntermediateDataSetID, ResultPartitionID)}
* call for a finished result partition.
*/
@Test
public void testRequestPartitionState() throws Exception {
final JobGraph producerConsumerJobGraph = producerConsumerJobGraph();
final JobMaster jobMaster = new JobMasterBuilder(producerConsumerJobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
final CompletableFuture<TaskDeploymentDescriptor> tddFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
tddFuture.complete(taskDeploymentDescriptor);
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway();
final LocalUnresolvedTaskManagerLocation taskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, producerConsumerJobGraph.getJobID(), testingTaskExecutorGateway, taskManagerLocation);
assertThat(slotOffers, hasSize(1));
// obtain tdd for the result partition ids
final TaskDeploymentDescriptor tdd = tddFuture.get();
assertThat(tdd.getProducedPartitions(), hasSize(1));
final ResultPartitionDeploymentDescriptor partition = tdd.getProducedPartitions().iterator().next();
final ExecutionAttemptID executionAttemptId = tdd.getExecutionAttemptId();
final ExecutionAttemptID copiedExecutionAttemptId = new ExecutionAttemptID(executionAttemptId);
// finish the producer task
jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(executionAttemptId, ExecutionState.FINISHED)).get();
// request the state of the result partition of the producer
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), copiedExecutionAttemptId);
CompletableFuture<ExecutionState> partitionStateFuture = jobMasterGateway.requestPartitionState(partition.getResultId(), partitionId);
assertThat(partitionStateFuture.get(), equalTo(ExecutionState.FINISHED));
// ask for unknown result partition
partitionStateFuture = jobMasterGateway.requestPartitionState(partition.getResultId(), new ResultPartitionID());
try {
partitionStateFuture.get();
fail("Expected failure.");
} catch (ExecutionException e) {
assertThat(ExceptionUtils.findThrowable(e, IllegalArgumentException.class).isPresent(), is(true));
}
// ask for wrong intermediate data set id
partitionStateFuture = jobMasterGateway.requestPartitionState(new IntermediateDataSetID(), partitionId);
try {
partitionStateFuture.get();
fail("Expected failure.");
} catch (ExecutionException e) {
assertThat(ExceptionUtils.findThrowable(e, IllegalArgumentException.class).isPresent(), is(true));
}
// ask for "old" execution
partitionStateFuture = jobMasterGateway.requestPartitionState(partition.getResultId(), new ResultPartitionID(partition.getPartitionId(), new ExecutionAttemptID()));
try {
partitionStateFuture.get();
fail("Expected failure.");
} catch (ExecutionException e) {
assertThat(ExceptionUtils.findThrowable(e, PartitionProducerDisposedException.class).isPresent(), is(true));
}
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated.
@Test
public void testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated() throws Exception {
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean isTrackingPartitions = new AtomicBoolean(true);
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setIsTrackingPartitionsForFunction(ignored -> isTrackingPartitions.get());
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withJobManagerSharedServices(jobManagerSharedServices).withHeartbeatServices(heartbeatServices).withPartitionTrackerFactory(ignored -> partitionTracker).createJobMaster();
final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setFreeSlotFunction((allocationID, throwable) -> {
freedSlotFuture.complete(allocationID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID)).createTestingTaskExecutorGateway();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, jobGraph.getJobID(), testingTaskExecutorGateway, taskManagerUnresolvedLocation);
// check that we accepted the offered slot
assertThat(slotOffers, hasSize(1));
final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();
jobMasterGateway.failSlot(taskManagerUnresolvedLocation.getResourceID(), allocationId, new FlinkException("Fail allocation test exception"));
// we should free the slot, but not disconnect from the TaskExecutor as we still have an
// allocated partition
assertThat(freedSlotFuture.get(), equalTo(allocationId));
// trigger some request to guarantee ensure the slotAllocationFailure processing if
// complete
jobMasterGateway.requestJobStatus(Time.seconds(5)).get();
assertThat(disconnectTaskExecutorFuture.isDone(), is(false));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations