use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class TaskManagerRunnerStartupTest method setupTest.
@Before
public void setupTest() {
highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService());
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class TaskExecutorRecoveryTest method testRecoveredTaskExecutorWillRestoreAllocationState.
@Test
public void testRecoveredTaskExecutorWillRestoreAllocationState(@TempDir File tempDir) throws Exception {
final ResourceID resourceId = ResourceID.generate();
final Configuration configuration = new Configuration();
configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, 2);
configuration.set(CheckpointingOptions.LOCAL_RECOVERY, true);
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final ArrayBlockingQueue<TaskExecutorSlotReport> queue = new ArrayBlockingQueue<>(2);
testingResourceManagerGateway.setSendSlotReportFunction(slotReportInformation -> {
queue.offer(TaskExecutorSlotReport.create(slotReportInformation.f0, slotReportInformation.f2));
return CompletableFuture.completedFuture(Acknowledge.get());
});
final TestingRpcService rpcService = rpcServiceExtension.getTestingRpcService();
rpcService.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
final JobID jobId = new JobID();
final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()));
final SettableLeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);
final WorkingDirectory workingDirectory = WorkingDirectory.create(tempDir);
final TaskExecutor taskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final TaskExecutorSlotReport taskExecutorSlotReport = queue.take();
final SlotReport slotReport = taskExecutorSlotReport.getSlotReport();
assertThat(slotReport.getNumSlotStatus(), is(2));
final SlotStatus slotStatus = slotReport.iterator().next();
final SlotID allocatedSlotID = slotStatus.getSlotID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(allocatedSlotID, jobId, allocationId, slotStatus.getResourceProfile(), "localhost", testingResourceManagerGateway.getFencingToken(), Time.seconds(10L)).join();
taskExecutor.close();
final BlockingQueue<Collection<SlotOffer>> offeredSlots = new ArrayBlockingQueue<>(1);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offeredSlots.offer(new HashSet<>(slotOffers));
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
jobMasterLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// recover the TaskExecutor
final TaskExecutor recoveredTaskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
recoveredTaskExecutor.start();
final TaskExecutorSlotReport recoveredSlotReport = queue.take();
for (SlotStatus status : recoveredSlotReport.getSlotReport()) {
if (status.getSlotID().equals(allocatedSlotID)) {
assertThat(status.getJobID(), is(jobId));
assertThat(status.getAllocationID(), is(allocationId));
} else {
assertThat(status.getJobID(), is(nullValue()));
}
}
final Collection<SlotOffer> take = offeredSlots.take();
assertThat(take, hasSize(1));
final SlotOffer offeredSlot = take.iterator().next();
assertThat(offeredSlot.getAllocationId(), is(allocationId));
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class TaskExecutorSlotLifetimeTest method testUserCodeClassLoaderIsBoundToSlot.
/**
* Tests that the user code class loader is bound to the lifetime of the slot. This means that
* it is being reused across a failover, for example. See FLINK-16408.
*/
@Test
public void testUserCodeClassLoaderIsBoundToSlot() throws Exception {
final Configuration configuration = new Configuration();
final TestingRpcService rpcService = TESTING_RPC_SERVICE_RESOURCE.getTestingRpcService();
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
final CompletableFuture<SlotReport> firstSlotReportFuture = new CompletableFuture<>();
resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
firstSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
return CompletableFuture.completedFuture(Acknowledge.get());
});
final BlockingQueue<TaskExecutionState> taskExecutionStates = new ArrayBlockingQueue<>(3);
final OneShotLatch slotsOfferedLatch = new OneShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
slotsOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
}).setUpdateTaskExecutionStateFunction(FunctionUtils.uncheckedFunction(taskExecutionState -> {
taskExecutionStates.put(taskExecutionState);
return CompletableFuture.completedFuture(Acknowledge.get());
})).build();
final LeaderRetrievalService resourceManagerLeaderRetriever = new SettableLeaderRetrievalService(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
final LeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setResourceManagerLeaderRetriever(resourceManagerLeaderRetriever).setJobMasterLeaderRetrieverFunction(ignored -> jobMasterLeaderRetriever).build();
rpcService.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final LocalUnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
try (final TaskExecutor taskExecutor = createTaskExecutor(configuration, rpcService, haServices, unresolvedTaskManagerLocation)) {
taskExecutor.start();
final SlotReport slotReport = firstSlotReportFuture.join();
final SlotID firstSlotId = slotReport.iterator().next().getSlotID();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final JobID jobId = new JobID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(firstSlotId, jobId, allocationId, ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final TaskDeploymentDescriptor tdd = TaskDeploymentDescriptorBuilder.newBuilder(jobId, UserClassLoaderExtractingInvokable.class).setAllocationId(allocationId).build();
slotsOfferedLatch.await();
taskExecutorGateway.submitTask(tdd, jobMasterGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final ClassLoader firstClassLoader = UserClassLoaderExtractingInvokable.take();
// wait for the first task to finish
TaskExecutionState taskExecutionState;
do {
taskExecutionState = taskExecutionStates.take();
} while (!taskExecutionState.getExecutionState().isTerminal());
// check that a second task will re-use the same class loader
taskExecutorGateway.submitTask(tdd, jobMasterGateway.getFencingToken(), RpcUtils.INF_TIMEOUT).join();
final ClassLoader secondClassLoader = UserClassLoaderExtractingInvokable.take();
assertThat(firstClassLoader, sameInstance(secondClassLoader));
}
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class TaskExecutorTest method setup.
@Before
public void setup() throws IOException {
rpc = new TestingRpcService();
configuration = new Configuration();
TaskExecutorResourceUtils.adjustForLocalExecution(configuration);
unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
jobId = new JobID();
jobId2 = new JobID();
testingFatalErrorHandler = new TestingFatalErrorHandler();
haServices = new TestingHighAvailabilityServices();
resourceManagerLeaderRetriever = new SettableLeaderRetrievalService();
jobManagerLeaderRetriever = new SettableLeaderRetrievalService();
jobManagerLeaderRetriever2 = new SettableLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetriever);
haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetriever);
haServices.setJobMasterLeaderRetriever(jobId2, jobManagerLeaderRetriever2);
nettyShuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class TaskExecutorITCase method testSlotAllocation.
@Test
public void testSlotAllocation() throws Exception {
TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TestingHighAvailabilityServices testingHAServices = new TestingHighAvailabilityServices();
final Configuration configuration = new Configuration();
final ScheduledExecutorService scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
final ResourceID taskManagerResourceId = new ResourceID("foobar");
final UUID rmLeaderId = UUID.randomUUID();
final TestingLeaderElectionService rmLeaderElectionService = new TestingLeaderElectionService();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
final String rmAddress = "rm";
final String jmAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
testingHAServices.setResourceManagerLeaderElectionService(rmLeaderElectionService);
testingHAServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
testingHAServices.setJobMasterLeaderRetriever(jobId, new TestingLeaderRetrievalService(jmAddress, jmLeaderId));
TestingSerialRpcService rpcService = new TestingSerialRpcService();
ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.milliseconds(500L), Time.milliseconds(500L), Time.minutes(5L));
SlotManagerFactory slotManagerFactory = new DefaultSlotManager.Factory();
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHAServices, rpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
MetricRegistry metricRegistry = mock(MetricRegistry.class);
HeartbeatServices heartbeatServices = mock(HeartbeatServices.class, RETURNS_MOCKS);
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(taskManagerResourceId, InetAddress.getLocalHost(), 1234);
final MemoryManager memoryManager = mock(MemoryManager.class);
final IOManager ioManager = mock(IOManager.class);
final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
final BroadcastVariableManager broadcastVariableManager = mock(BroadcastVariableManager.class);
final FileCache fileCache = mock(FileCache.class);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(resourceProfile), new TimerService<AllocationID>(scheduledExecutorService, 100L));
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
ResourceManager<ResourceID> resourceManager = new StandaloneResourceManager(rpcService, resourceManagerConfiguration, testingHAServices, slotManagerFactory, metricRegistry, jobLeaderIdService, testingFatalErrorHandler);
TaskExecutor taskExecutor = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpcService, memoryManager, ioManager, networkEnvironment, testingHAServices, heartbeatServices, metricRegistry, taskManagerMetricGroup, broadcastVariableManager, fileCache, taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
JobMasterGateway jmGateway = mock(JobMasterGateway.class);
when(jmGateway.registerTaskManager(any(String.class), any(TaskManagerLocation.class), eq(jmLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(taskManagerResourceId, 1234)));
when(jmGateway.getHostname()).thenReturn(jmAddress);
rpcService.registerGateway(rmAddress, resourceManager.getSelf());
rpcService.registerGateway(jmAddress, jmGateway);
final AllocationID allocationId = new AllocationID();
final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile);
final SlotOffer slotOffer = new SlotOffer(allocationId, 0, resourceProfile);
try {
resourceManager.start();
taskExecutor.start();
// notify the RM that it is the leader
rmLeaderElectionService.isLeader(rmLeaderId);
// notify the TM about the new RM leader
rmLeaderRetrievalService.notifyListener(rmAddress, rmLeaderId);
Future<RegistrationResponse> registrationResponseFuture = resourceManager.registerJobManager(rmLeaderId, jmLeaderId, jmAddress, jobId);
RegistrationResponse registrationResponse = registrationResponseFuture.get();
assertTrue(registrationResponse instanceof JobMasterRegistrationSuccess);
resourceManager.requestSlot(jmLeaderId, rmLeaderId, slotRequest);
verify(jmGateway).offerSlots(eq(taskManagerResourceId), (Iterable<SlotOffer>) argThat(Matchers.contains(slotOffer)), eq(jmLeaderId), any(Time.class));
} finally {
if (testingFatalErrorHandler.hasExceptionOccurred()) {
testingFatalErrorHandler.rethrowError();
}
}
}
Aggregations