use of org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable in project flink by apache.
the class TaskExecutorTest method testTaskSubmission.
/**
* Tests that we can submit a task to the TaskManager given that we've allocated a slot there.
*/
@Test(timeout = 1000L)
public void testTaskSubmission() throws Exception {
final Configuration configuration = new Configuration();
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final JobID jobId = new JobID();
final AllocationID allocationId = new AllocationID();
final UUID jobManagerLeaderId = UUID.randomUUID();
final JobVertexID jobVertexId = new JobVertexID();
JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, ResourceID.generate(), mock(JobMasterGateway.class), jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
final JobManagerTable jobManagerTable = new JobManagerTable();
jobManagerTable.put(jobId, jobManagerConnection);
final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
when(taskSlotTable.existsActiveSlot(eq(jobId), eq(allocationId))).thenReturn(true);
when(taskSlotTable.addTask(any(Task.class))).thenReturn(true);
final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
when(networkEnvironment.createKvStateTaskRegistry(eq(jobId), eq(jobVertexId))).thenReturn(mock(TaskKvStateRegistry.class));
final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
when(taskManagerMetricGroup.addTaskForJob(any(JobID.class), anyString(), any(JobVertexID.class), any(ExecutionAttemptID.class), anyString(), anyInt(), anyInt())).thenReturn(mock(TaskMetricGroup.class));
final HighAvailabilityServices haServices = mock(HighAvailabilityServices.class);
when(haServices.getResourceManagerLeaderRetriever()).thenReturn(mock(LeaderRetrievalService.class));
try {
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, mock(TaskManagerLocation.class), rpc, mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), taskManagerMetricGroup, mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
taskManager.submitTask(tdd, jobManagerLeaderId);
Future<Boolean> completionFuture = TestInvokable.completableFuture;
completionFuture.get();
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable in project flink by apache.
the class TaskExecutorTest method testHeartbeatTimeoutWithJobManager.
@Test
public void testHeartbeatTimeoutWithJobManager() throws Exception {
final JobID jobId = new JobID();
final Configuration configuration = new Configuration();
final TaskManagerConfiguration tmConfig = TaskManagerConfiguration.fromConfiguration(configuration);
final ResourceID tmResourceId = new ResourceID("tm");
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class)), mock(TimerService.class));
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
final TestingLeaderRetrievalService jmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setJobMasterLeaderRetriever(jobId, jmLeaderRetrievalService);
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final long heartbeatTimeout = 10L;
HeartbeatServices heartbeatServices = mock(HeartbeatServices.class);
when(heartbeatServices.createHeartbeatManager(eq(taskManagerLocation.getResourceID()), any(HeartbeatListener.class), any(ScheduledExecutor.class), any(Logger.class))).thenAnswer(new Answer<HeartbeatManagerImpl<Void, Void>>() {
@Override
public HeartbeatManagerImpl<Void, Void> answer(InvocationOnMock invocation) throws Throwable {
return new HeartbeatManagerImpl<>(heartbeatTimeout, taskManagerLocation.getResourceID(), (HeartbeatListener<Void, Void>) invocation.getArguments()[1], (Executor) invocation.getArguments()[2], (ScheduledExecutor) invocation.getArguments()[2], (Logger) invocation.getArguments()[3]);
}
});
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobMasterAddress);
final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
final int blobPort = 42;
when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jmLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
when(jobMasterGateway.getAddress()).thenReturn(jobMasterAddress);
when(jobMasterGateway.getHostname()).thenReturn("localhost");
try {
final TaskExecutor taskManager = new TaskExecutor(tmConfig, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, heartbeatServices, mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, new JobManagerTable(), jobLeaderService, testingFatalErrorHandler);
taskManager.start();
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
// we have to add the job after the TaskExecutor, because otherwise the service has not
// been properly started.
jobLeaderService.addJob(jobId, jobMasterAddress);
// now inform the task manager about the new job leader
jmLeaderRetrievalService.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and jm
verify(jobMasterGateway).registerTaskManager(eq(taskManager.getAddress()), eq(taskManagerLocation), eq(jmLeaderId), any(Time.class));
// the timeout should trigger disconnecting from the JobManager
verify(jobMasterGateway, timeout(heartbeatTimeout * 5)).disconnectTaskManager(eq(taskManagerLocation.getResourceID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable in project flink by apache.
the class TaskExecutorTest method testImmediatelyRegistersIfLeaderIsKnown.
@Test
public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception {
final ResourceID resourceID = ResourceID.generate();
final String resourceManagerAddress = "/resource/manager/address/one";
final TestingSerialRpcService rpc = new TestingSerialRpcService();
try {
final FatalErrorHandler errorHandler = mock(FatalErrorHandler.class);
// register a mock resource manager gateway
ResourceManagerGateway rmGateway = mock(ResourceManagerGateway.class);
when(rmGateway.registerTaskExecutor(any(UUID.class), anyString(), any(ResourceID.class), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new RegistrationResponse.Success()));
TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
rpc.registerGateway(resourceManagerAddress, rmGateway);
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
NonHaServices haServices = new NonHaServices(resourceManagerAddress);
final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
final SlotReport slotReport = new SlotReport();
when(taskSlotTable.createSlotReport(any(ResourceID.class))).thenReturn(slotReport);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
String taskManagerAddress = taskManager.getAddress();
verify(rmGateway).registerTaskExecutor(any(UUID.class), eq(taskManagerAddress), eq(resourceID), eq(slotReport), any(Time.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable in project flink by apache.
the class TaskExecutorTest method testSlotAcceptance.
/**
* Tests that accepted slots go into state assigned and the others are returned to the resource
* manager.
*/
@Test
public void testSlotAcceptance() throws Exception {
final JobID jobId = new JobID();
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final Configuration configuration = new Configuration();
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final ResourceID resourceId = new ResourceID("foobar");
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TimerService<AllocationID> timerService = mock(TimerService.class);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String resourceManagerAddress = "rm";
final UUID resourceManagerLeaderId = UUID.randomUUID();
final String jobManagerAddress = "jm";
final UUID jobManagerLeaderId = UUID.randomUUID();
final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
final InstanceID registrationId = new InstanceID();
when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final int blobPort = 42;
final AllocationID allocationId1 = new AllocationID();
final AllocationID allocationId2 = new AllocationID();
final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.completed((Iterable<SlotOffer>) Collections.singleton(offer1)));
rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
rpc.registerGateway(jobManagerAddress, jobMasterGateway);
try {
TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
taskManager.start();
taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
// we have to add the job after the TaskExecutor, because otherwise the service has not
// been properly started.
jobLeaderService.addJob(jobId, jobManagerAddress);
verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
assertTrue(taskSlotTable.isSlotFree(1));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
Aggregations