use of org.apache.flink.runtime.io.network.NetworkEnvironment in project flink by apache.
the class TaskManagerServices method createNetworkEnvironment.
/**
* Creates the {@link NetworkEnvironment} from the given {@link TaskManagerServicesConfiguration}.
*
* @param taskManagerServicesConfiguration to construct the network environment from
* @return Network environment
* @throws IOException
*/
private static NetworkEnvironment createNetworkEnvironment(TaskManagerServicesConfiguration taskManagerServicesConfiguration) throws IOException {
NetworkEnvironmentConfiguration networkEnvironmentConfiguration = taskManagerServicesConfiguration.getNetworkConfig();
NetworkBufferPool networkBufferPool = new NetworkBufferPool(networkEnvironmentConfiguration.numNetworkBuffers(), networkEnvironmentConfiguration.networkBufferSize(), networkEnvironmentConfiguration.memoryType());
ConnectionManager connectionManager;
if (networkEnvironmentConfiguration.nettyConfig() != null) {
connectionManager = new NettyConnectionManager(networkEnvironmentConfiguration.nettyConfig());
} else {
connectionManager = new LocalConnectionManager();
}
ResultPartitionManager resultPartitionManager = new ResultPartitionManager();
TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
KvStateRegistry kvStateRegistry = new KvStateRegistry();
KvStateServer kvStateServer;
if (taskManagerServicesConfiguration.getQueryableStateConfig().enabled()) {
QueryableStateConfiguration qsConfig = taskManagerServicesConfiguration.getQueryableStateConfig();
int numNetworkThreads = qsConfig.numServerThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numServerThreads();
int numQueryThreads = qsConfig.numQueryThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numQueryThreads();
kvStateServer = new KvStateServer(taskManagerServicesConfiguration.getTaskManagerAddress(), qsConfig.port(), numNetworkThreads, numQueryThreads, kvStateRegistry, new DisabledKvStateRequestStats());
} else {
kvStateServer = null;
}
// we start the network first, to make sure it can allocate its buffers first
return new NetworkEnvironment(networkBufferPool, connectionManager, resultPartitionManager, taskEventDispatcher, kvStateRegistry, kvStateServer, networkEnvironmentConfiguration.ioMode(), networkEnvironmentConfiguration.partitionRequestInitialBackoff(), networkEnvironmentConfiguration.partitionRequestMaxBackoff(), networkEnvironmentConfiguration.networkBuffersPerChannel(), networkEnvironmentConfiguration.extraNetworkBuffersPerGate());
}
use of org.apache.flink.runtime.io.network.NetworkEnvironment in project flink by apache.
the class TaskManagerServices method fromConfiguration.
// --------------------------------------------------------------------------------------------
// Static factory methods for task manager services
// --------------------------------------------------------------------------------------------
/**
* Creates and returns the task manager services.
*
* @param resourceID resource ID of the task manager
* @param taskManagerServicesConfiguration task manager configuration
* @return task manager components
* @throws Exception
*/
public static TaskManagerServices fromConfiguration(TaskManagerServicesConfiguration taskManagerServicesConfiguration, ResourceID resourceID) throws Exception {
// pre-start checks
checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths());
final NetworkEnvironment network = createNetworkEnvironment(taskManagerServicesConfiguration);
network.start();
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceID, taskManagerServicesConfiguration.getTaskManagerAddress(), network.getConnectionManager().getDataPort());
// this call has to happen strictly after the network stack has been initialized
final MemoryManager memoryManager = createMemoryManager(taskManagerServicesConfiguration);
// start the I/O manager, it will create some temp directories.
final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths());
final MetricRegistry metricRegistry = new MetricRegistry(taskManagerServicesConfiguration.getMetricRegistryConfiguration());
final TaskManagerMetricGroup taskManagerMetricGroup = new TaskManagerMetricGroup(metricRegistry, taskManagerLocation.getHostname(), taskManagerLocation.getResourceID().toString());
// Initialize the TM metrics
TaskExecutorMetricsInitializer.instantiateStatusMetrics(taskManagerMetricGroup, network);
final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager();
final FileCache fileCache = new FileCache(taskManagerServicesConfiguration.getTmpDirPaths());
final List<ResourceProfile> resourceProfiles = new ArrayList<>(taskManagerServicesConfiguration.getNumberOfSlots());
for (int i = 0; i < taskManagerServicesConfiguration.getNumberOfSlots(); i++) {
resourceProfiles.add(new ResourceProfile(1.0, 42));
}
final TimerService<AllocationID> timerService = new TimerService<>(new ScheduledThreadPoolExecutor(1), taskManagerServicesConfiguration.getTimerServiceShutdownTimeout());
final TaskSlotTable taskSlotTable = new TaskSlotTable(resourceProfiles, timerService);
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
return new TaskManagerServices(taskManagerLocation, memoryManager, ioManager, network, metricRegistry, taskManagerMetricGroup, broadcastVariableManager, fileCache, taskSlotTable, jobManagerTable, jobLeaderService);
}
use of org.apache.flink.runtime.io.network.NetworkEnvironment in project flink by apache.
the class StreamTaskTest method createTask.
public static Task createTask(Class<? extends AbstractInvokable> invokable, StreamConfig taskConfig, Configuration taskManagerConfig) throws Exception {
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(StreamTaskTest.class.getClassLoader());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
NetworkEnvironment network = mock(NetworkEnvironment.class);
when(network.getResultPartitionManager()).thenReturn(partitionManager);
when(network.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, invokable.getName(), taskConfig.getConfiguration());
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, new TaskStateHandles(), mock(MemoryManager.class), mock(IOManager.class), network, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), libCache, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(taskManagerConfig, new String[] { System.getProperty("java.io.tmpdir") }), new UnregisteredTaskMetricsGroup(), consumableNotifier, partitionProducerStateChecker, executor);
}
use of org.apache.flink.runtime.io.network.NetworkEnvironment in project flink by apache.
the class TaskAsyncCallTest method createTask.
private static Task createTask() throws Exception {
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
when(networkEnvironment.getResultPartitionManager()).thenReturn(partitionManager);
when(networkEnvironment.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
when(networkEnvironment.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, CheckpointsInOrderInvokable.class.getName(), new Configuration());
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, new TaskStateHandles(), mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), libCache, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(), mock(TaskMetricGroup.class), consumableNotifier, partitionProducerStateChecker, executor);
}
use of org.apache.flink.runtime.io.network.NetworkEnvironment in project flink by apache.
the class TaskTest method testTriggerPartitionStateUpdate.
/**
* Tests the trigger partition state update future completions.
*/
@Test
public void testTriggerPartitionStateUpdate() throws Exception {
IntermediateDataSetID resultId = new IntermediateDataSetID();
ResultPartitionID partitionId = new ResultPartitionID();
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(getClass().getClassLoader());
PartitionProducerStateChecker partitionChecker = mock(PartitionProducerStateChecker.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
NetworkEnvironment network = mock(NetworkEnvironment.class);
when(network.getResultPartitionManager()).thenReturn(mock(ResultPartitionManager.class));
when(network.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
createTask(InvokableBlockingInInvoke.class, libCache, network, consumableNotifier, partitionChecker, Executors.directExecutor());
// Test all branches of trigger partition state check
{
// Reset latches
createQueuesAndActors();
// PartitionProducerDisposedException
Task task = createTask(InvokableBlockingInInvoke.class, libCache, network, consumableNotifier, partitionChecker, Executors.directExecutor());
FlinkCompletableFuture<ExecutionState> promise = new FlinkCompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.triggerPartitionProducerStateCheck(task.getJobID(), resultId, partitionId);
promise.completeExceptionally(new PartitionProducerDisposedException(partitionId));
assertEquals(ExecutionState.CANCELING, task.getExecutionState());
}
{
// Reset latches
createQueuesAndActors();
// Any other exception
Task task = createTask(InvokableBlockingInInvoke.class, libCache, network, consumableNotifier, partitionChecker, Executors.directExecutor());
FlinkCompletableFuture<ExecutionState> promise = new FlinkCompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.triggerPartitionProducerStateCheck(task.getJobID(), resultId, partitionId);
promise.completeExceptionally(new RuntimeException("Any other exception"));
assertEquals(ExecutionState.FAILED, task.getExecutionState());
}
{
// Reset latches
createQueuesAndActors();
// TimeoutException handled special => retry
Task task = createTask(InvokableBlockingInInvoke.class, libCache, network, consumableNotifier, partitionChecker, Executors.directExecutor());
SingleInputGate inputGate = mock(SingleInputGate.class);
when(inputGate.getConsumedResultId()).thenReturn(resultId);
try {
task.startTaskThread();
awaitLatch.await();
setInputGate(task, inputGate);
FlinkCompletableFuture<ExecutionState> promise = new FlinkCompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.triggerPartitionProducerStateCheck(task.getJobID(), resultId, partitionId);
promise.completeExceptionally(new TimeoutException());
assertEquals(ExecutionState.RUNNING, task.getExecutionState());
verify(inputGate, times(1)).retriggerPartitionRequest(eq(partitionId.getPartitionId()));
} finally {
task.getExecutingThread().interrupt();
task.getExecutingThread().join();
}
}
{
// Reset latches
createQueuesAndActors();
// Success
Task task = createTask(InvokableBlockingInInvoke.class, libCache, network, consumableNotifier, partitionChecker, Executors.directExecutor());
SingleInputGate inputGate = mock(SingleInputGate.class);
when(inputGate.getConsumedResultId()).thenReturn(resultId);
try {
task.startTaskThread();
awaitLatch.await();
setInputGate(task, inputGate);
FlinkCompletableFuture<ExecutionState> promise = new FlinkCompletableFuture<>();
when(partitionChecker.requestPartitionProducerState(eq(task.getJobID()), eq(resultId), eq(partitionId))).thenReturn(promise);
task.triggerPartitionProducerStateCheck(task.getJobID(), resultId, partitionId);
promise.complete(ExecutionState.RUNNING);
assertEquals(ExecutionState.RUNNING, task.getExecutionState());
verify(inputGate, times(1)).retriggerPartitionRequest(eq(partitionId.getPartitionId()));
} finally {
task.getExecutingThread().interrupt();
task.getExecutingThread().join();
}
}
}
Aggregations