use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class JobMasterTest method testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated.
@Test
public void testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated() throws Exception {
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean isTrackingPartitions = new AtomicBoolean(true);
final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
partitionTracker.setIsTrackingPartitionsForFunction(ignored -> isTrackingPartitions.get());
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).withJobManagerSharedServices(jobManagerSharedServices).withHeartbeatServices(heartbeatServices).withPartitionTrackerFactory(ignored -> partitionTracker).createJobMaster();
final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setFreeSlotFunction((allocationID, throwable) -> {
freedSlotFuture.complete(allocationID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID)).createTestingTaskExecutorGateway();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, jobGraph.getJobID(), testingTaskExecutorGateway, taskManagerUnresolvedLocation);
// check that we accepted the offered slot
assertThat(slotOffers, hasSize(1));
final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();
jobMasterGateway.failSlot(taskManagerUnresolvedLocation.getResourceID(), allocationId, new FlinkException("Fail allocation test exception"));
// we should free the slot, but not disconnect from the TaskExecutor as we still have an
// allocated partition
assertThat(freedSlotFuture.get(), equalTo(allocationId));
// trigger some request to guarantee ensure the slotAllocationFailure processing if
// complete
jobMasterGateway.requestJobStatus(Time.seconds(5)).get();
assertThat(disconnectTaskExecutorFuture.isDone(), is(false));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class JobMasterTest method registerSlotsAtJobMaster.
private Collection<SlotOffer> registerSlotsAtJobMaster(int numberSlots, JobMasterGateway jobMasterGateway, JobID jobId, TaskExecutorGateway taskExecutorGateway, UnresolvedTaskManagerLocation unresolvedTaskManagerLocation) throws ExecutionException, InterruptedException {
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
jobMasterGateway.registerTaskManager(jobId, TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout).get();
Collection<SlotOffer> slotOffers = IntStream.range(0, numberSlots).mapToObj(index -> new SlotOffer(new AllocationID(), index, ResourceProfile.ANY)).collect(Collectors.toList());
return jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), slotOffers, testingTimeout).get();
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class JobMasterExecutionDeploymentReconciliationTest method registerTaskExecutorAndOfferSlots.
private void registerTaskExecutorAndOfferSlots(JobMasterGateway jobMasterGateway, JobID jobId, TaskExecutorGateway taskExecutorGateway, UnresolvedTaskManagerLocation taskManagerLocation) throws ExecutionException, InterruptedException {
jobMasterGateway.registerTaskManager(jobId, TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), taskManagerLocation, TestingUtils.zeroUUID()), testingTimeout).get();
Collection<SlotOffer> slotOffers = Collections.singleton(new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY));
jobMasterGateway.offerSlots(taskManagerLocation.getResourceID(), slotOffers, testingTimeout).get();
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class SharedSlotTest method testReleaseEmptyDoesNotCallAllocatorReleaseBack.
@Test
public void testReleaseEmptyDoesNotCallAllocatorReleaseBack() {
CompletableFuture<PhysicalSlot> slotContextFuture = CompletableFuture.completedFuture(new TestingPhysicalSlot(RP, new AllocationID()));
CompletableFuture<SharedSlot> sharedSlotReleaseFuture = new CompletableFuture<>();
AtomicInteger released = new AtomicInteger(0);
SharedSlot sharedSlot = SharedSlotBuilder.newBuilder().withSlotContextFuture(slotContextFuture).withExternalReleaseCallback(g -> {
// checks that release -> externalReleaseCallback -> release
// does not lead to infinite recursion
// due to SharedSlot.state.RELEASED check
sharedSlotReleaseFuture.join().release(new Throwable());
released.incrementAndGet();
}).build();
sharedSlotReleaseFuture.complete(sharedSlot);
LogicalSlot logicalSlot = sharedSlot.allocateLogicalSlot(EV1).join();
assertThat(released.get(), is(0));
// returns the only and last slot, calling the external release callback
sharedSlot.returnLogicalSlot(logicalSlot);
assertThat(released.get(), is(1));
// slot is already released, it should not get released again
sharedSlot.release(new Throwable());
assertThat(released.get(), is(1));
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class SharedSlotTest method testLogicalSlotAllocation.
@Test
public void testLogicalSlotAllocation() {
CompletableFuture<PhysicalSlot> slotContextFuture = new CompletableFuture<>();
CompletableFuture<ExecutionSlotSharingGroup> released = new CompletableFuture<>();
SharedSlot sharedSlot = SharedSlotBuilder.newBuilder().withSlotContextFuture(slotContextFuture).slotWillBeOccupiedIndefinitely().withExternalReleaseCallback(released::complete).build();
CompletableFuture<LogicalSlot> logicalSlotFuture = sharedSlot.allocateLogicalSlot(EV1);
assertThat(logicalSlotFuture.isDone(), is(false));
AllocationID allocationId = new AllocationID();
LocalTaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
slotContextFuture.complete(new TestingPhysicalSlot(allocationId, taskManagerLocation, 3, taskManagerGateway, RP));
assertThat(sharedSlot.isEmpty(), is(false));
assertThat(released.isDone(), is(false));
assertThat(logicalSlotFuture.isDone(), is(true));
LogicalSlot logicalSlot = logicalSlotFuture.join();
assertThat(logicalSlot.getAllocationId(), is(allocationId));
assertThat(logicalSlot.getTaskManagerLocation(), is(taskManagerLocation));
assertThat(logicalSlot.getTaskManagerGateway(), is(taskManagerGateway));
assertThat(logicalSlot.getLocality(), is(Locality.UNKNOWN));
}
Aggregations