use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class TaskExecutorSubmissionTest method testRunJobWithForwardChannel.
@Test
public void testRunJobWithForwardChannel() throws Exception {
ResourceID producerLocation = ResourceID.generate();
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
TaskDeploymentDescriptor tdd1 = createSender(sdd);
TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setNotifyPartitionDataAvailableFunction(resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get())).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture).addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
task1RunningFuture.get();
taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
task2RunningFuture.get();
task1FinishedFuture.get();
task2FinishedFuture.get();
assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
}
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class DefaultJobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.
/**
* Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
* that a new timeout is registered after the jobmanager has lost leadership.
*/
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
final JobID jobId = new JobID();
final String address = "foobar";
final JobMasterId leaderId = JobMasterId.generate();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastRunnable.set((Runnable) invocation.getArguments()[0]);
return timeoutQueue.poll();
}
}).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastTimeoutId.set((UUID) invocation.getArguments()[1]);
return null;
}
}).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// notify the leader id service about the new leader
leaderRetrievalService.notifyListener(address, leaderId.toUUID());
assertEquals(leaderId, leaderIdFuture.get());
assertTrue(jobLeaderIdService.containsJob(jobId));
// check that the first timeout got cancelled
verify(timeout1, times(1)).cancel(anyBoolean());
verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// initial timeout runnable which should no longer have an effect
Runnable runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the timeout should no longer be valid
assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
// lose leadership
leaderRetrievalService.notifyListener("", null);
verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// the second runnable should be the new timeout
runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the new timeout should be valid
assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class ResourceManagerJobMasterTest method testRegisterJobMasterWithUnmatchedLeaderSessionId2.
/**
* Test receive registration with unmatched leadershipId from job master.
*/
@Test
public void testRegisterJobMasterWithUnmatchedLeaderSessionId2() throws Exception {
// test throw exception when receive a registration from job master which takes unmatched
// leaderSessionId
JobMasterId differentJobMasterId = JobMasterId.generate();
CompletableFuture<RegistrationResponse> unMatchedLeaderFuture = resourceManagerGateway.registerJobMaster(differentJobMasterId, jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertTrue(unMatchedLeaderFuture.get() instanceof RegistrationResponse.Failure);
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class TaskExecutorOperatorEventHandlingTest method createExecutorWithRunningTask.
// ------------------------------------------------------------------------
// test setup helpers
// ------------------------------------------------------------------------
private TaskSubmissionTestEnvironment createExecutorWithRunningTask(JobID jobId, ExecutionAttemptID executionAttemptId, Class<? extends AbstractInvokable> invokableClass) throws Exception {
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jobId, executionAttemptId, invokableClass);
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final JobMasterId token = JobMasterId.generate();
final TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setJobMasterId(token).setSlotSize(1).addTaskManagerActionListener(executionAttemptId, ExecutionState.RUNNING, taskRunningFuture).setMetricQueryServiceAddress(metricRegistry.getMetricQueryServiceGatewayRpcAddress()).setJobMasterGateway(new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> token).setOperatorEventSender((eio, oid, value) -> {
throw new RuntimeException();
}).setDeliverCoordinationRequestFunction((oid, value) -> {
throw new RuntimeException();
}).build()).build();
env.getTaskSlotTable().allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
final TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
tmGateway.submitTask(tdd, env.getJobMasterId(), Time.seconds(10)).get();
taskRunningFuture.get();
return env;
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class TaskExecutorSubmissionTest method testCancellingDependentAndStateUpdateFails.
/**
* This tests creates two tasks. The sender sends data but fails to send the state update back
* to the job manager. the second one blocks to be canceled
*/
@Test
public void testCancellingDependentAndStateUpdateFails() throws Exception {
ResourceID producerLocation = ResourceID.generate();
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
TaskDeploymentDescriptor tdd1 = createSender(sdd);
TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
if (taskExecutionState != null && taskExecutionState.getID().equals(eid1) && taskExecutionState.getExecutionState() == ExecutionState.RUNNING) {
return FutureUtils.completedExceptionally(new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
} else {
return CompletableFuture.completedFuture(Acknowledge.get());
}
}).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture).addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
task1RunningFuture.get();
taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
task2RunningFuture.get();
task1FailedFuture.get();
assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);
tmGateway.cancelTask(eid2, timeout);
task2CanceledFuture.get();
assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
}
}
Aggregations