use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class RpcGatewayRetrieverTest method testRpcGatewayRetrieval.
/**
* Tests that the RpcGatewayRetriever can retrieve the specified gateway type from a leader
* retrieval service.
*/
@Test
public void testRpcGatewayRetrieval() throws Exception {
final String expectedValue = "foobar";
final String expectedValue2 = "barfoo";
final UUID leaderSessionId = UUID.randomUUID();
RpcGatewayRetriever<UUID, DummyGateway> gatewayRetriever = new RpcGatewayRetriever<>(rpcService, DummyGateway.class, Function.identity(), TestingRetryStrategies.NO_RETRY_STRATEGY);
SettableLeaderRetrievalService settableLeaderRetrievalService = new SettableLeaderRetrievalService();
DummyRpcEndpoint dummyRpcEndpoint = new DummyRpcEndpoint(rpcService, "dummyRpcEndpoint1", expectedValue);
DummyRpcEndpoint dummyRpcEndpoint2 = new DummyRpcEndpoint(rpcService, "dummyRpcEndpoint2", expectedValue2);
rpcService.registerGateway(dummyRpcEndpoint.getAddress(), dummyRpcEndpoint.getSelfGateway(DummyGateway.class));
rpcService.registerGateway(dummyRpcEndpoint2.getAddress(), dummyRpcEndpoint2.getSelfGateway(DummyGateway.class));
try {
dummyRpcEndpoint.start();
dummyRpcEndpoint2.start();
settableLeaderRetrievalService.start(gatewayRetriever);
final CompletableFuture<DummyGateway> gatewayFuture = gatewayRetriever.getFuture();
assertFalse(gatewayFuture.isDone());
settableLeaderRetrievalService.notifyListener(dummyRpcEndpoint.getAddress(), leaderSessionId);
final DummyGateway dummyGateway = gatewayFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS);
assertEquals(dummyRpcEndpoint.getAddress(), dummyGateway.getAddress());
assertEquals(expectedValue, dummyGateway.foobar(TIMEOUT).get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS));
// elect a new leader
settableLeaderRetrievalService.notifyListener(dummyRpcEndpoint2.getAddress(), leaderSessionId);
final CompletableFuture<DummyGateway> gatewayFuture2 = gatewayRetriever.getFuture();
final DummyGateway dummyGateway2 = gatewayFuture2.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS);
assertEquals(dummyRpcEndpoint2.getAddress(), dummyGateway2.getAddress());
assertEquals(expectedValue2, dummyGateway2.foobar(TIMEOUT).get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS));
} finally {
RpcUtils.terminateRpcEndpoints(TIMEOUT, dummyRpcEndpoint, dummyRpcEndpoint2);
}
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class TaskExecutorRecoveryTest method testRecoveredTaskExecutorWillRestoreAllocationState.
@Test
public void testRecoveredTaskExecutorWillRestoreAllocationState(@TempDir File tempDir) throws Exception {
final ResourceID resourceId = ResourceID.generate();
final Configuration configuration = new Configuration();
configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, 2);
configuration.set(CheckpointingOptions.LOCAL_RECOVERY, true);
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final ArrayBlockingQueue<TaskExecutorSlotReport> queue = new ArrayBlockingQueue<>(2);
testingResourceManagerGateway.setSendSlotReportFunction(slotReportInformation -> {
queue.offer(TaskExecutorSlotReport.create(slotReportInformation.f0, slotReportInformation.f2));
return CompletableFuture.completedFuture(Acknowledge.get());
});
final TestingRpcService rpcService = rpcServiceExtension.getTestingRpcService();
rpcService.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
final JobID jobId = new JobID();
final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()));
final SettableLeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);
final WorkingDirectory workingDirectory = WorkingDirectory.create(tempDir);
final TaskExecutor taskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final TaskExecutorSlotReport taskExecutorSlotReport = queue.take();
final SlotReport slotReport = taskExecutorSlotReport.getSlotReport();
assertThat(slotReport.getNumSlotStatus(), is(2));
final SlotStatus slotStatus = slotReport.iterator().next();
final SlotID allocatedSlotID = slotStatus.getSlotID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(allocatedSlotID, jobId, allocationId, slotStatus.getResourceProfile(), "localhost", testingResourceManagerGateway.getFencingToken(), Time.seconds(10L)).join();
taskExecutor.close();
final BlockingQueue<Collection<SlotOffer>> offeredSlots = new ArrayBlockingQueue<>(1);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offeredSlots.offer(new HashSet<>(slotOffers));
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
jobMasterLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// recover the TaskExecutor
final TaskExecutor recoveredTaskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
recoveredTaskExecutor.start();
final TaskExecutorSlotReport recoveredSlotReport = queue.take();
for (SlotStatus status : recoveredSlotReport.getSlotReport()) {
if (status.getSlotID().equals(allocatedSlotID)) {
assertThat(status.getJobID(), is(jobId));
assertThat(status.getAllocationID(), is(allocationId));
} else {
assertThat(status.getJobID(), is(nullValue()));
}
}
final Collection<SlotOffer> take = offeredSlots.take();
assertThat(take, hasSize(1));
final SlotOffer offeredSlot = take.iterator().next();
assertThat(offeredSlot.getAllocationId(), is(allocationId));
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class TaskExecutorSlotLifetimeTest method testUserCodeClassLoaderIsBoundToSlot.
/**
* Tests that the user code class loader is bound to the lifetime of the slot. This means that
* it is being reused across a failover, for example. See FLINK-16408.
*/
@Test
public void testUserCodeClassLoaderIsBoundToSlot() throws Exception {
final Configuration configuration = new Configuration();
final TestingRpcService rpcService = TESTING_RPC_SERVICE_RESOURCE.getTestingRpcService();
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
final CompletableFuture<SlotReport> firstSlotReportFuture = new CompletableFuture<>();
resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
firstSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
return CompletableFuture.completedFuture(Acknowledge.get());
});
final BlockingQueue<TaskExecutionState> taskExecutionStates = new ArrayBlockingQueue<>(3);
final OneShotLatch slotsOfferedLatch = new OneShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
slotsOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
}).setUpdateTaskExecutionStateFunction(FunctionUtils.uncheckedFunction(taskExecutionState -> {
taskExecutionStates.put(taskExecutionState);
return CompletableFuture.completedFuture(Acknowledge.get());
})).build();
final LeaderRetrievalService resourceManagerLeaderRetriever = new SettableLeaderRetrievalService(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
final LeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setResourceManagerLeaderRetriever(resourceManagerLeaderRetriever).setJobMasterLeaderRetrieverFunction(ignored -> jobMasterLeaderRetriever).build();
rpcService.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final LocalUnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
try (final TaskExecutor taskExecutor = createTaskExecutor(configuration, rpcService, haServices, unresolvedTaskManagerLocation)) {
taskExecutor.start();
final SlotReport slotReport = firstSlotReportFuture.join();
final SlotID firstSlotId = slotReport.iterator().next().getSlotID();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final JobID jobId = new JobID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(firstSlotId, jobId, allocationId, ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final TaskDeploymentDescriptor tdd = TaskDeploymentDescriptorBuilder.newBuilder(jobId, UserClassLoaderExtractingInvokable.class).setAllocationId(allocationId).build();
slotsOfferedLatch.await();
taskExecutorGateway.submitTask(tdd, jobMasterGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final ClassLoader firstClassLoader = UserClassLoaderExtractingInvokable.take();
// wait for the first task to finish
TaskExecutionState taskExecutionState;
do {
taskExecutionState = taskExecutionStates.take();
} while (!taskExecutionState.getExecutionState().isTerminal());
// check that a second task will re-use the same class loader
taskExecutorGateway.submitTask(tdd, jobMasterGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final ClassLoader secondClassLoader = UserClassLoaderExtractingInvokable.take();
assertThat(firstClassLoader, sameInstance(secondClassLoader));
}
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class TaskExecutorTest method setup.
@Before
public void setup() throws IOException {
rpc = new TestingRpcService();
configuration = new Configuration();
TaskExecutorResourceUtils.adjustForLocalExecution(configuration);
unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
jobId = new JobID();
jobId2 = new JobID();
testingFatalErrorHandler = new TestingFatalErrorHandler();
haServices = new TestingHighAvailabilityServices();
resourceManagerLeaderRetriever = new SettableLeaderRetrievalService();
jobManagerLeaderRetriever = new SettableLeaderRetrievalService();
jobManagerLeaderRetriever2 = new SettableLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetriever);
haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetriever);
haServices.setJobMasterLeaderRetriever(jobId2, jobManagerLeaderRetriever2);
nettyShuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class DefaultJobLeaderIdServiceTest method testInitialJobTimeout.
/**
* Tests that the initial job registration registers a timeout which will call {@link
* JobLeaderIdActions#notifyJobTimeout(JobID, UUID)} when executed.
*/
@Test
public void testInitialJobTimeout() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
assertTrue(jobLeaderIdService.containsJob(jobId));
ArgumentCaptor<Runnable> runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(runnableArgumentCaptor.capture(), anyLong(), any(TimeUnit.class));
Runnable timeoutRunnable = runnableArgumentCaptor.getValue();
timeoutRunnable.run();
ArgumentCaptor<UUID> timeoutIdArgumentCaptor = ArgumentCaptor.forClass(UUID.class);
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), timeoutIdArgumentCaptor.capture());
assertTrue(jobLeaderIdService.isValidTimeout(jobId, timeoutIdArgumentCaptor.getValue()));
}
Aggregations