use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder in project flink by apache.
the class TaskExecutorSubmissionTest method testFailingNotifyPartitionDataAvailable.
/**
* Test that a failing notifyPartitionDataAvailable call leads to the failing of the respective
* task.
*
* <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
* then the future is completed. We do this by not eagerly deploying consumer tasks and
* requiring the invokable to fill one memory segment. The completed memory segment will trigger
* the scheduling of the downstream operator since it is in pipeline mode. After we've filled
* the memory segment, we'll block the invokable and wait for the task failure due to the failed
* notifyPartitionDataAvailable call.
*/
@Test
public void testFailingNotifyPartitionDataAvailable() throws Exception {
final Configuration configuration = new Configuration();
// set the memory segment to the smallest size possible, because we have to fill one
// memory buffer to trigger notifyPartitionDataAvailable to the downstream
// operators
configuration.set(TaskManagerOptions.MEMORY_SEGMENT_SIZE, MemorySize.parse("4096"));
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
ExecutionAttemptID eid = tdd.getExecutionAttemptId();
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final Exception exception = new Exception("Failed notifyPartitionDataAvailable");
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setNotifyPartitionDataAvailableFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception)).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setSlotSize(1).setConfiguration(configuration).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();
taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd, jobMasterId, timeout).get();
taskRunningFuture.get();
CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();
assertTrue(cancelFuture.get());
assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder in project flink by apache.
the class TaskExecutorSubmissionTest method testRunJobWithForwardChannel.
@Test
public void testRunJobWithForwardChannel() throws Exception {
ResourceID producerLocation = ResourceID.generate();
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
TaskDeploymentDescriptor tdd1 = createSender(sdd);
TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setNotifyPartitionDataAvailableFunction(resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get())).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture).addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
task1RunningFuture.get();
taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
task2RunningFuture.get();
task1FinishedFuture.get();
task2FinishedFuture.get();
assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method internalTestPartitionRelease.
private void internalTestPartitionRelease(TaskExecutorPartitionTracker partitionTracker, ShuffleEnvironment<?, ?> shuffleEnvironment, CompletableFuture<ResultPartitionID> startTrackingFuture, TestAction testAction) throws Exception {
final ResultPartitionDeploymentDescriptor taskResultPartitionDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
final ExecutionAttemptID eid1 = taskResultPartitionDescriptor.getShuffleDescriptor().getResultPartitionID().getProducerId();
final TaskDeploymentDescriptor taskDeploymentDescriptor = TaskExecutorSubmissionTest.createTaskDeploymentDescriptor(jobId, "job", eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), TestingInvokable.class.getName(), Collections.singletonList(taskResultPartitionDescriptor), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
final TaskSlotTable<Task> taskSlotTable = createTaskSlotTable();
final TaskExecutorLocalStateStoresManager localStateStoresManager = new TaskExecutorLocalStateStoresManager(false, Reference.owned(new File[] { tmp.newFolder() }), Executors.directExecutor());
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).setShuffleEnvironment(shuffleEnvironment).build();
final CompletableFuture<Void> taskFinishedFuture = new CompletableFuture<>();
final OneShotLatch slotOfferedLatch = new OneShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(ResourceID.generate()))).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
}).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
if (taskExecutionState.getExecutionState() == ExecutionState.FINISHED) {
taskFinishedFuture.complete(null);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}).build();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);
final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
return CompletableFuture.completedFuture(Acknowledge.get());
});
testingResourceManagerGateway.setRegisterTaskExecutorFunction(input -> CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("blobServerHost", 55555))));
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final String jobMasterAddress = "jm";
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
// inform the task manager about the job leader
taskManagerServices.getJobLeaderService().addJob(jobId, jobMasterAddress);
jobManagerLeaderRetriever.notifyListener(jobMasterAddress, UUID.randomUUID());
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
final Optional<SlotStatus> slotStatusOptional = StreamSupport.stream(initialSlotReportFuture.get().spliterator(), false).findAny();
assertTrue(slotStatusOptional.isPresent());
final SlotStatus slotStatus = slotStatusOptional.get();
while (true) {
try {
taskExecutorGateway.requestSlot(slotStatus.getSlotID(), jobId, taskDeploymentDescriptor.getAllocationId(), ResourceProfile.ZERO, jobMasterAddress, testingResourceManagerGateway.getFencingToken(), timeout).get();
break;
} catch (Exception e) {
// the proper establishment of the RM connection is tracked
// asynchronously, so we have to poll here until it went through
// until then, slot requests will fail with an exception
Thread.sleep(50);
}
}
TestingInvokable.sync = new BlockerSync();
// Wait till the slot has been successfully offered before submitting the task.
// This ensures TM has been successfully registered to JM.
slotOfferedLatch.await();
taskExecutorGateway.submitTask(taskDeploymentDescriptor, jobMasterGateway.getFencingToken(), timeout).get();
TestingInvokable.sync.awaitBlocker();
// the task is still running => the partition is in in-progress and should be tracked
assertThat(startTrackingFuture.get(), equalTo(taskResultPartitionDescriptor.getShuffleDescriptor().getResultPartitionID()));
TestingInvokable.sync.releaseBlocker();
taskFinishedFuture.get(timeout.getSize(), timeout.getUnit());
testAction.accept(jobId, taskResultPartitionDescriptor, taskExecutor, taskExecutorGateway);
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
// the shutdown of the backing shuffle environment releases all partitions
// the book-keeping is not aware of this
assertTrue(shuffleEnvironment.getPartitionsOccupyingLocalResources().isEmpty());
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder in project flink by apache.
the class TaskExecutorExecutionDeploymentReconciliationTest method setupJobManagerGateway.
private static TestingJobMasterGateway setupJobManagerGateway(OneShotLatch slotOfferLatch, BlockingQueue<Set<ExecutionAttemptID>> deployedExecutionsFuture, CompletableFuture<Void> taskFinishedFuture, ResourceID jobManagerResourceId) {
return new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
}).setTaskManagerHeartbeatFunction((resourceID, taskExecutorToJobManagerHeartbeatPayload) -> {
ExecutionDeploymentReport executionDeploymentReport = taskExecutorToJobManagerHeartbeatPayload.getExecutionDeploymentReport();
deployedExecutionsFuture.add(executionDeploymentReport.getExecutions());
return FutureUtils.completedVoidFuture();
}).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
if (taskExecutionState.getExecutionState() == ExecutionState.FINISHED) {
taskFinishedFuture.complete(null);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}).build();
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder in project flink by apache.
the class DefaultJobLeaderServiceTest method removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications.
@Test
public void removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications() throws Exception {
final FailingSettableLeaderRetrievalService leaderRetrievalService = new FailingSettableLeaderRetrievalService();
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
final JobID jobId = new JobID();
final CompletableFuture<JobID> newLeaderFuture = new CompletableFuture<>();
final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(newLeaderFuture::complete);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
try {
jobLeaderService.addJob(jobId, "foobar");
jobLeaderService.removeJob(jobId);
leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
try {
newLeaderFuture.get(10, TimeUnit.MILLISECONDS);
fail("The leader future should not be completed.");
} catch (TimeoutException expected) {
}
} finally {
jobLeaderService.stop();
}
}
Aggregations