use of org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion.
private void testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion(TriConsumer<TaskExecutorGateway, JobID, ResultPartitionID> releaseOrPromoteCall) throws Exception {
final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setDisconnectTaskManagerFunction(resourceID -> {
disconnectFuture.complete(null);
return CompletableFuture.completedFuture(Acknowledge.get());
}).build();
final DefaultJobTable jobTable = DefaultJobTable.create();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setJobTable(jobTable).setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build()).setTaskSlotTable(createTaskSlotTable()).build();
final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();
final AtomicBoolean trackerIsTrackingPartitions = new AtomicBoolean(false);
partitionTracker.setIsTrackingPartitionsForFunction(jobId -> trackerIsTrackingPartitions.get());
final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(firstReleasePartitionsCallFuture::complete);
final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
final ResultPartitionID resultPartitionId = resultPartitionDeploymentDescriptor.getShuffleDescriptor().getResultPartitionID();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
TaskSubmissionTestEnvironment.registerJobMasterConnection(jobTable, jobId, rpc, jobMasterGateway, new NoOpTaskManagerActions(), timeout, taskExecutor.getMainThreadExecutableForTesting());
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
trackerIsTrackingPartitions.set(true);
assertThat(firstReleasePartitionsCallFuture.isDone(), is(false));
taskExecutorGateway.releaseOrPromotePartitions(jobId, Collections.singleton(new ResultPartitionID()), Collections.emptySet());
// at this point we only know that the TE has entered releasePartitions; we cannot be
// certain whether it
// has already checked whether it should disconnect or not
firstReleasePartitionsCallFuture.get();
// connection should be kept alive since the table still contains partitions
assertThat(disconnectFuture.isDone(), is(false));
trackerIsTrackingPartitions.set(false);
// the TM should check whether partitions are still stored, and afterwards terminate the
// connection
releaseOrPromoteCall.accept(taskExecutorGateway, jobId, resultPartitionId);
disconnectFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method testBlockingLocalPartitionReleaseDoesNotBlockTaskExecutor.
@Test
public void testBlockingLocalPartitionReleaseDoesNotBlockTaskExecutor() throws Exception {
BlockerSync sync = new BlockerSync();
ResultPartitionManager blockingResultPartitionManager = new ResultPartitionManager() {
@Override
public void releasePartition(ResultPartitionID partitionId, Throwable cause) {
sync.blockNonInterruptible();
super.releasePartition(partitionId, cause);
}
};
NettyShuffleEnvironment shuffleEnvironment = new NettyShuffleEnvironmentBuilder().setResultPartitionManager(blockingResultPartitionManager).setIoExecutor(TEST_EXECUTOR_SERVICE_RESOURCE.getExecutor()).build();
final CompletableFuture<ResultPartitionID> startTrackingFuture = new CompletableFuture<>();
final TaskExecutorPartitionTracker partitionTracker = new TaskExecutorPartitionTrackerImpl(shuffleEnvironment) {
@Override
public void startTrackingPartition(JobID producingJobId, TaskExecutorPartitionInfo partitionInfo) {
super.startTrackingPartition(producingJobId, partitionInfo);
startTrackingFuture.complete(partitionInfo.getResultPartitionId());
}
};
try {
internalTestPartitionRelease(partitionTracker, shuffleEnvironment, startTrackingFuture, (jobId, resultPartitionDeploymentDescriptor, taskExecutor, taskExecutorGateway) -> {
final IntermediateDataSetID dataSetId = resultPartitionDeploymentDescriptor.getResultId();
taskExecutorGateway.releaseClusterPartitions(Collections.singleton(dataSetId), timeout);
// execute some operation to check whether the TaskExecutor is blocked
taskExecutorGateway.canBeReleased().get(5, TimeUnit.SECONDS);
});
} finally {
sync.releaseBlocker();
}
}
use of org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder in project flink by apache.
the class TaskExecutorExecutionDeploymentReconciliationTest method testDeployedExecutionReporting.
@Test
public void testDeployedExecutionReporting() throws Exception {
final OneShotLatch slotOfferLatch = new OneShotLatch();
final BlockingQueue<Set<ExecutionAttemptID>> deployedExecutionsQueue = new ArrayBlockingQueue<>(3);
final CompletableFuture<Void> taskFinishedFuture = new CompletableFuture<>();
final ResourceID jobManagerResourceId = ResourceID.generate();
final TestingJobMasterGateway jobMasterGateway = setupJobManagerGateway(slotOfferLatch, deployedExecutionsQueue, taskFinishedFuture, jobManagerResourceId);
final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
final TestingResourceManagerGateway testingResourceManagerGateway = setupResourceManagerGateway(initialSlotReportFuture);
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1, timeout)).setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build()).build();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final TaskDeploymentDescriptor taskDeploymentDescriptor = createTaskDeploymentDescriptor(jobId);
connectComponentsAndRequestSlot(jobMasterGateway, testingResourceManagerGateway, taskExecutorGateway, taskManagerServices.getJobLeaderService(), initialSlotReportFuture, taskDeploymentDescriptor.getAllocationId());
TestingInvokable.sync = new BlockerSync();
// This ensures TM has been successfully registered to JM.
slotOfferLatch.await();
AllocatedSlotReport slotAllocationReport = new AllocatedSlotReport(jobId, Collections.singleton(new AllocatedSlotInfo(0, taskDeploymentDescriptor.getAllocationId())));
// nothing as deployed, so the deployment report should be empty
taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, slotAllocationReport);
assertThat(deployedExecutionsQueue.take(), hasSize(0));
taskExecutorGateway.submitTask(taskDeploymentDescriptor, jobMasterGateway.getFencingToken(), timeout).get();
TestingInvokable.sync.awaitBlocker();
// task is deployed, so the deployment report should contain it
taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, slotAllocationReport);
assertThat(deployedExecutionsQueue.take(), hasItem(taskDeploymentDescriptor.getExecutionAttemptId()));
TestingInvokable.sync.releaseBlocker();
// task is finished ans was cleaned up, so the deployment report should be empty
taskFinishedFuture.get();
taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, slotAllocationReport);
assertThat(deployedExecutionsQueue.take(), hasSize(0));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder in project flink by apache.
the class SourceStreamTaskTest method testTriggeringCheckpointAfterSourceThreadFinished.
@Test
public void testTriggeringCheckpointAfterSourceThreadFinished() throws Exception {
ResultPartition[] partitionWriters = new ResultPartition[2];
try (NettyShuffleEnvironment env = new NettyShuffleEnvironmentBuilder().setNumNetworkBuffers(partitionWriters.length * 2).build()) {
for (int i = 0; i < partitionWriters.length; ++i) {
partitionWriters[i] = PartitionTestUtils.createPartition(env, ResultPartitionType.PIPELINED_BOUNDED, 1);
partitionWriters[i].setup();
}
final CompletableFuture<Long> checkpointCompleted = new CompletableFuture<>();
try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).modifyStreamConfig(config -> config.setCheckpointingEnabled(true)).setCheckpointResponder(new TestCheckpointResponder() {
@Override
public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
checkpointCompleted.complete(checkpointId);
}
}).addAdditionalOutput(partitionWriters).setupOperatorChain(new StreamSource<>(new MockSource(0, 0, 1))).finishForSingletonOperatorChain(StringSerializer.INSTANCE).build()) {
testHarness.processAll();
CompletableFuture<Void> taskFinished = testHarness.getStreamTask().getCompletionFuture();
do {
testHarness.processAll();
} while (!taskFinished.isDone());
Future<Boolean> checkpointFuture = triggerCheckpoint(testHarness, 2);
// Notifies the result partition that all records are processed after the
// last checkpoint is triggered.
checkState(checkpointFuture instanceof CompletableFuture, "The trigger future should " + " be also CompletableFuture.");
((CompletableFuture<?>) checkpointFuture).thenAccept((ignored) -> {
for (ResultPartition resultPartition : partitionWriters) {
resultPartition.onSubpartitionAllDataProcessed(0);
}
});
checkpointCompleted.whenComplete((id, error) -> testHarness.getStreamTask().notifyCheckpointCompleteAsync(2));
testHarness.finishProcessing();
assertTrue(checkpointFuture.isDone());
// EndOfUserRecordEvent.
for (ResultPartition resultPartition : partitionWriters) {
assertEquals(3, resultPartition.getNumberOfQueuedBuffers());
}
}
} finally {
for (ResultPartitionWriter writer : partitionWriters) {
if (writer != null) {
writer.close();
}
}
}
}
use of org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder in project flink by apache.
the class StreamTaskSystemExitTest method createSystemExitTask.
private Task createSystemExitTask(final String invokableClassName, StreamOperator<?> operator) throws Exception {
final Configuration taskConfiguration = new Configuration();
final StreamConfig streamConfig = new StreamConfig(taskConfiguration);
streamConfig.setOperatorID(new OperatorID());
streamConfig.setStreamOperator(operator);
// for source run
streamConfig.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
final JobInformation jobInformation = new JobInformation(new JobID(), "Test Job", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.emptyList(), Collections.emptyList());
final TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokableClassName, taskConfiguration);
final TaskManagerRuntimeInfo taskManagerRuntimeInfo = new TestingTaskManagerRuntimeInfo();
final ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), MemoryManagerBuilder.newBuilder().setMemorySize(32L * 1024L).build(), new IOManagerAsync(), shuffleEnvironment, new KvStateService(new KvStateRegistry(), null, null), mock(BroadcastVariableManager.class), new TaskEventDispatcher(), ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES, new TestTaskStateManager(), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new NoOpTaskOperatorEventGateway(), new TestGlobalAggregateManager(), TestingClassLoaderLease.newBuilder().build(), mock(FileCache.class), taskManagerRuntimeInfo, UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(), new NoOpResultPartitionConsumableNotifier(), mock(PartitionProducerStateChecker.class), Executors.directExecutor());
}
Aggregations