use of org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier in project flink by apache.
the class TaskTest method testTriggerPartitionStateUpdate.
/**
* Tests the trigger partition state update future completions.
*/
@Test
public void testTriggerPartitionStateUpdate() throws Exception {
final IntermediateDataSetID resultId = new IntermediateDataSetID();
final ResultPartitionID partitionId = new ResultPartitionID();
final PartitionProducerStateChecker partitionChecker = mock(PartitionProducerStateChecker.class);
final ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
AtomicInteger callCount = new AtomicInteger(0);
RemoteChannelStateChecker remoteChannelStateChecker = new RemoteChannelStateChecker(partitionId, "test task");
// Test all branches of trigger partition state check
{
// Reset latches
setup();
// PartitionProducerDisposedException
final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
TestTaskBuilder.setTaskState(task, ExecutionState.RUNNING);
final CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.requestPartitionProducerState(resultId, partitionId, checkResult -> assertThat(remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult), is(false)));
promise.completeExceptionally(new PartitionProducerDisposedException(partitionId));
assertEquals(ExecutionState.CANCELING, task.getExecutionState());
}
{
// Reset latches
setup();
// Any other exception
final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
TestTaskBuilder.setTaskState(task, ExecutionState.RUNNING);
final CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.requestPartitionProducerState(resultId, partitionId, checkResult -> assertThat(remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult), is(false)));
promise.completeExceptionally(new RuntimeException("Any other exception"));
assertEquals(ExecutionState.FAILED, task.getExecutionState());
}
{
callCount.set(0);
// Reset latches
setup();
// TimeoutException handled special => retry
// Any other exception
final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
try {
task.startTaskThread();
awaitLatch.await();
CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.requestPartitionProducerState(resultId, partitionId, checkResult -> {
if (remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult)) {
callCount.incrementAndGet();
}
});
promise.completeExceptionally(new TimeoutException());
assertEquals(ExecutionState.RUNNING, task.getExecutionState());
assertEquals(1, callCount.get());
} finally {
task.getExecutingThread().interrupt();
task.getExecutingThread().join();
}
}
{
callCount.set(0);
// Reset latches
setup();
// Success
final Task task = createTaskBuilder().setInvokable(InvokableBlockingInInvoke.class).setConsumableNotifier(consumableNotifier).setPartitionProducerStateChecker(partitionChecker).setExecutor(Executors.directExecutor()).build();
try {
task.startTaskThread();
awaitLatch.await();
CompletableFuture<ExecutionState> promise = new CompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.requestPartitionProducerState(resultId, partitionId, checkResult -> {
if (remoteChannelStateChecker.isProducerReadyOrAbortConsumption(checkResult)) {
callCount.incrementAndGet();
}
});
promise.complete(ExecutionState.RUNNING);
assertEquals(ExecutionState.RUNNING, task.getExecutionState());
assertEquals(1, callCount.get());
} finally {
task.getExecutingThread().interrupt();
task.getExecutingThread().join();
}
}
}
use of org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier in project flink by apache.
the class TaskAsyncCallTest method createTask.
private Task createTask(Class<? extends AbstractInvokable> invokableClass) throws Exception {
final TestingClassLoaderLease classLoaderHandle = TestingClassLoaderLease.newBuilder().setGetOrResolveClassLoaderFunction((permanentBlobKeys, urls) -> TestingUserCodeClassLoader.newBuilder().setClassLoader(new TestUserCodeClassLoader()).build()).build();
ResultPartitionConsumableNotifier consumableNotifier = new NoOpResultPartitionConsumableNotifier();
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
TaskMetricGroup taskMetricGroup = UnregisteredMetricGroups.createUnregisteredTaskMetricGroup();
JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.emptyList(), Collections.emptyList());
TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokableClass.getName(), new Configuration());
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), mock(MemoryManager.class), mock(IOManager.class), shuffleEnvironment, new KvStateService(new KvStateRegistry(), null, null), mock(BroadcastVariableManager.class), new TaskEventDispatcher(), ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES, new TestTaskStateManager(), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new NoOpTaskOperatorEventGateway(), new TestGlobalAggregateManager(), classLoaderHandle, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(), taskMetricGroup, consumableNotifier, partitionProducerStateChecker, executor);
}
use of org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier in project flink by apache.
the class TaskTest method testExecutionFailsInNetworkRegistration.
@Test
public void testExecutionFailsInNetworkRegistration() {
try {
// mock a working library cache
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(getClass().getClassLoader());
// mock a network manager that rejects registration
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
NetworkEnvironment network = mock(NetworkEnvironment.class);
when(network.getResultPartitionManager()).thenReturn(partitionManager);
when(network.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
doThrow(new RuntimeException("buffers")).when(network).registerTask(any(Task.class));
Task task = createTask(TestInvokableCorrect.class, libCache, network, consumableNotifier, partitionProducerStateChecker, executor);
task.registerExecutionListener(listener);
task.run();
assertEquals(ExecutionState.FAILED, task.getExecutionState());
assertTrue(task.isCanceledOrFailed());
assertTrue(task.getFailureCause().getMessage().contains("buffers"));
validateUnregisterTask(task.getExecutionId());
validateListenerMessage(ExecutionState.FAILED, task, true);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier in project flink by apache.
the class TaskExecutor method associateWithJobManager.
private JobManagerConnection associateWithJobManager(JobID jobID, ResourceID resourceID, JobMasterGateway jobMasterGateway, UUID jobManagerLeaderId, int blobPort) {
Preconditions.checkNotNull(jobID);
Preconditions.checkNotNull(resourceID);
Preconditions.checkNotNull(jobManagerLeaderId);
Preconditions.checkNotNull(jobMasterGateway);
Preconditions.checkArgument(blobPort > 0 || blobPort < MAX_BLOB_PORT, "Blob server port is out of range.");
TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobManagerLeaderId, jobMasterGateway);
CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
InetSocketAddress blobServerAddress = new InetSocketAddress(jobMasterGateway.getHostname(), blobPort);
final LibraryCacheManager libraryCacheManager;
try {
final BlobCache blobCache = new BlobCache(blobServerAddress, taskManagerConfiguration.getConfiguration(), haServices);
libraryCacheManager = new BlobLibraryCacheManager(blobCache, taskManagerConfiguration.getCleanupInterval());
} catch (IOException e) {
// Can't pass the IOException up - we need a RuntimeException anyway
// two levels up where this is run asynchronously. Also, we don't
// know whether this is caught in the thread running this method.
final String message = "Could not create BLOB cache or library cache.";
log.error(message, e);
throw new RuntimeException(message, e);
}
ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = new RpcResultPartitionConsumableNotifier(jobManagerLeaderId, jobMasterGateway, getRpcService().getExecutor(), taskManagerConfiguration.getTimeout());
PartitionProducerStateChecker partitionStateChecker = new RpcPartitionStateChecker(jobManagerLeaderId, jobMasterGateway);
return new JobManagerConnection(jobID, resourceID, jobMasterGateway, jobManagerLeaderId, taskManagerActions, checkpointResponder, libraryCacheManager, resultPartitionConsumableNotifier, partitionStateChecker);
}
use of org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier in project flink by apache.
the class TaskExecutor method submitTask.
// ======================================================================
// RPC methods
// ======================================================================
// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge submitTask(TaskDeploymentDescriptor tdd, UUID jobManagerLeaderId) throws TaskSubmissionException {
// first, deserialize the pre-serialized information
final JobInformation jobInformation;
final TaskInformation taskInformation;
try {
jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
} catch (IOException | ClassNotFoundException e) {
throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
}
final JobID jobId = jobInformation.getJobId();
final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
if (jobManagerConnection == null) {
final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
if (!jobManagerConnection.getLeaderId().equals(jobManagerLeaderId)) {
final String message = "Rejecting the task submission because the job manager leader id " + jobManagerLeaderId + " does not match the expected job manager leader id " + jobManagerConnection.getLeaderId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
if (!taskSlotTable.existsActiveSlot(jobId, tdd.getAllocationId())) {
final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
TaskMetricGroup taskMetricGroup = taskManagerMetricGroup.addTaskForJob(jobInformation.getJobId(), jobInformation.getJobName(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getLeaderId(), jobManagerConnection.getJobManagerGateway(), jobInformation.getJobId(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getTimeout());
TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
LibraryCacheManager libraryCache = jobManagerConnection.getLibraryCacheManager();
ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), tdd.getTargetSlotNumber(), tdd.getTaskStateHandles(), memoryManager, ioManager, networkEnvironment, broadcastVariableManager, taskManagerActions, inputSplitProvider, checkpointResponder, libraryCache, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getExecutor());
log.info("Received task {}.", task.getTaskInfo().getTaskNameWithSubtasks());
boolean taskAdded;
try {
taskAdded = taskSlotTable.addTask(task);
} catch (SlotNotFoundException | SlotNotActiveException e) {
throw new TaskSubmissionException("Could not submit task.", e);
}
if (taskAdded) {
task.startTaskThread();
return Acknowledge.get();
} else {
final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
log.debug(message);
throw new TaskSubmissionException(message);
}
}
Aggregations