use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class JobMasterPartitionReleaseTest method testPartitionReleaseOrPromotionOnJobTermination.
private void testPartitionReleaseOrPromotionOnJobTermination(Function<TestSetup, CompletableFuture<Collection<ResultPartitionID>>> callSelector, ExecutionState finalExecutionState) throws Exception {
final CompletableFuture<TaskDeploymentDescriptor> taskDeploymentDescriptorFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((tdd, ignored) -> {
taskDeploymentDescriptorFuture.complete(tdd);
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway();
try (final TestSetup testSetup = new TestSetup(rpcService, testingFatalErrorHandler, testingTaskExecutorGateway)) {
ResultPartitionID partitionID0 = new ResultPartitionID();
ResultPartitionID partitionID1 = new ResultPartitionID();
testSetup.getPartitionTracker().setGetAllTrackedPartitionsSupplier(() -> {
ResultPartitionDeploymentDescriptor partitionDesc0 = AbstractPartitionTrackerTest.createResultPartitionDeploymentDescriptor(partitionID0, true);
ResultPartitionDeploymentDescriptor partitionDesc1 = AbstractPartitionTrackerTest.createResultPartitionDeploymentDescriptor(partitionID1, false);
return Arrays.asList(partitionDesc0, partitionDesc1);
});
final JobMasterGateway jobMasterGateway = testSetup.getJobMasterGateway();
// update the execution state of the only execution to target state
// this should trigger the job to finish
final TaskDeploymentDescriptor taskDeploymentDescriptor = taskDeploymentDescriptorFuture.get();
jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(taskDeploymentDescriptor.getExecutionAttemptId(), finalExecutionState));
assertThat(callSelector.apply(testSetup).get(), containsInAnyOrder(partitionID0, partitionID1));
}
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class TaskExecutorITCase method testSlotAllocation.
@Test
public void testSlotAllocation() throws Exception {
TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TestingHighAvailabilityServices testingHAServices = new TestingHighAvailabilityServices();
final Configuration configuration = new Configuration();
final ScheduledExecutorService scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
final ResourceID taskManagerResourceId = new ResourceID("foobar");
final UUID rmLeaderId = UUID.randomUUID();
final TestingLeaderElectionService rmLeaderElectionService = new TestingLeaderElectionService();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
final String rmAddress = "rm";
final String jmAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
testingHAServices.setResourceManagerLeaderElectionService(rmLeaderElectionService);
testingHAServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
testingHAServices.setJobMasterLeaderRetriever(jobId, new TestingLeaderRetrievalService(jmAddress, jmLeaderId));
TestingSerialRpcService rpcService = new TestingSerialRpcService();
ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.milliseconds(500L), Time.milliseconds(500L), Time.minutes(5L));
SlotManagerFactory slotManagerFactory = new DefaultSlotManager.Factory();
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHAServices, rpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
MetricRegistry metricRegistry = mock(MetricRegistry.class);
HeartbeatServices heartbeatServices = mock(HeartbeatServices.class, RETURNS_MOCKS);
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(taskManagerResourceId, InetAddress.getLocalHost(), 1234);
final MemoryManager memoryManager = mock(MemoryManager.class);
final IOManager ioManager = mock(IOManager.class);
final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
final BroadcastVariableManager broadcastVariableManager = mock(BroadcastVariableManager.class);
final FileCache fileCache = mock(FileCache.class);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(resourceProfile), new TimerService<AllocationID>(scheduledExecutorService, 100L));
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
ResourceManager<ResourceID> resourceManager = new StandaloneResourceManager(rpcService, resourceManagerConfiguration, testingHAServices, slotManagerFactory, metricRegistry, jobLeaderIdService, testingFatalErrorHandler);
TaskExecutor taskExecutor = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpcService, memoryManager, ioManager, networkEnvironment, testingHAServices, heartbeatServices, metricRegistry, taskManagerMetricGroup, broadcastVariableManager, fileCache, taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
JobMasterGateway jmGateway = mock(JobMasterGateway.class);
when(jmGateway.registerTaskManager(any(String.class), any(TaskManagerLocation.class), eq(jmLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(taskManagerResourceId, 1234)));
when(jmGateway.getHostname()).thenReturn(jmAddress);
rpcService.registerGateway(rmAddress, resourceManager.getSelf());
rpcService.registerGateway(jmAddress, jmGateway);
final AllocationID allocationId = new AllocationID();
final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile);
final SlotOffer slotOffer = new SlotOffer(allocationId, 0, resourceProfile);
try {
resourceManager.start();
taskExecutor.start();
// notify the RM that it is the leader
rmLeaderElectionService.isLeader(rmLeaderId);
// notify the TM about the new RM leader
rmLeaderRetrievalService.notifyListener(rmAddress, rmLeaderId);
Future<RegistrationResponse> registrationResponseFuture = resourceManager.registerJobManager(rmLeaderId, jmLeaderId, jmAddress, jobId);
RegistrationResponse registrationResponse = registrationResponseFuture.get();
assertTrue(registrationResponse instanceof JobMasterRegistrationSuccess);
resourceManager.requestSlot(jmLeaderId, rmLeaderId, slotRequest);
verify(jmGateway).offerSlots(eq(taskManagerResourceId), (Iterable<SlotOffer>) argThat(Matchers.contains(slotOffer)), eq(jmLeaderId), any(Time.class));
} finally {
if (testingFatalErrorHandler.hasExceptionOccurred()) {
testingFatalErrorHandler.rethrowError();
}
}
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class TaskExecutorTest method testTaskSubmission.
/**
* Tests that we can submit a task to the TaskManager given that we've allocated a slot there.
*/
@Test(timeout = 1000L)
public void testTaskSubmission() throws Exception {
final Configuration configuration = new Configuration();
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final JobID jobId = new JobID();
final AllocationID allocationId = new AllocationID();
final UUID jobManagerLeaderId = UUID.randomUUID();
final JobVertexID jobVertexId = new JobVertexID();
JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, ResourceID.generate(), mock(JobMasterGateway.class), jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
final JobManagerTable jobManagerTable = new JobManagerTable();
jobManagerTable.put(jobId, jobManagerConnection);
final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
when(taskSlotTable.existsActiveSlot(eq(jobId), eq(allocationId))).thenReturn(true);
when(taskSlotTable.addTask(any(Task.class))).thenReturn(true);
final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
when(networkEnvironment.createKvStateTaskRegistry(eq(jobId), eq(jobVertexId))).thenReturn(mock(TaskKvStateRegistry.class));
final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
when(taskManagerMetricGroup.addTaskForJob(any(JobID.class), anyString(), any(JobVertexID.class), any(ExecutionAttemptID.class), anyString(), anyInt(), anyInt())).thenReturn(mock(TaskMetricGroup.class));
final HighAvailabilityServices haServices = mock(HighAvailabilityServices.class);
when(haServices.getResourceManagerLeaderRetriever()).thenReturn(mock(LeaderRetrievalService.class));
try {
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, mock(TaskManagerLocation.class), rpc, mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), taskManagerMetricGroup, mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
taskManager.submitTask(tdd, jobManagerLeaderId);
Future<Boolean> completionFuture = TestInvokable.completableFuture;
completionFuture.get();
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class TaskExecutorTest method testRejectAllocationRequestsForOutOfSyncSlots.
/**
* Tests that all allocation requests for slots are ignored if the slot has been reported as
* free by the TaskExecutor but this report hasn't been confirmed by the ResourceManager.
*
* This is essential for the correctness of the state of the ResourceManager.
*/
@Ignore
@Test
public void testRejectAllocationRequestsForOutOfSyncSlots() throws Exception {
final ResourceID resourceID = ResourceID.generate();
final String address1 = "/resource/manager/address/one";
final UUID leaderId = UUID.randomUUID();
final JobID jobId = new JobID();
final String jobManagerAddress = "foobar";
final TestingSerialRpcService rpc = new TestingSerialRpcService();
try {
// register the mock resource manager gateways
ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
rpc.registerGateway(address1, rmGateway1);
TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
haServices.setResourceManagerLeaderRetriever(testLeaderService);
TaskManagerConfiguration taskManagerServicesConfiguration = mock(TaskManagerConfiguration.class);
when(taskManagerServicesConfiguration.getNumberSlots()).thenReturn(1);
TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
when(taskManagerLocation.getResourceID()).thenReturn(resourceID);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
TaskExecutor taskManager = new TaskExecutor(taskManagerServicesConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), mock(TaskSlotTable.class), mock(JobManagerTable.class), mock(JobLeaderService.class), testingFatalErrorHandler);
taskManager.start();
String taskManagerAddress = taskManager.getAddress();
// no connection initially, since there is no leader
assertNull(taskManager.getResourceManagerConnection());
// define a leader and see that a registration happens
testLeaderService.notifyListener(address1, leaderId);
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
assertNotNull(taskManager.getResourceManagerConnection());
// test that allocating a slot works
final SlotID slotID = new SlotID(resourceID, 0);
TMSlotRequestReply tmSlotRequestReply = taskManager.requestSlot(slotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply instanceof TMSlotRequestRegistered);
// TODO: Figure out the concrete allocation behaviour between RM and TM. Maybe we don't need the SlotID...
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
final SlotID unconfirmedFreeSlotID = new SlotID(resourceID, 1);
TMSlotRequestReply tmSlotRequestReply2 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply2 instanceof TMSlotRequestRejected);
// re-register
verify(rmGateway1).registerTaskExecutor(eq(leaderId), eq(taskManagerAddress), eq(resourceID), any(SlotReport.class), any(Time.class));
testLeaderService.notifyListener(address1, leaderId);
// now we should be successful because the slots status has been synced
// test that we can't allocate slots which are blacklisted due to pending confirmation of the RM
TMSlotRequestReply tmSlotRequestReply3 = taskManager.requestSlot(unconfirmedFreeSlotID, jobId, new AllocationID(), jobManagerAddress, leaderId);
assertTrue(tmSlotRequestReply3 instanceof TMSlotRequestRegistered);
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String jobManagerAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final String taskManagerAddress = "tm";
final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
final TestingSerialRpcService rpc = new TestingSerialRpcService();
rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
final long heartbeatInterval = 1L;
final long heartbeatTimeout = 5L;
final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
final JobGraph jobGraph = new JobGraph();
try {
final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
// also start the heartbeat manager in job manager
jobMaster.start(jmLeaderId);
// register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
// run the first heartbeat request
heartbeatRunnable.run();
verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
// run the timeout runnable to simulate a heartbeat timeout
timeoutRunnable.run();
verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
Aggregations