Search in sources :

Example 1 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class TaskManagerServices method createNetworkEnvironment.

/**
	 * Creates the {@link NetworkEnvironment} from the given {@link TaskManagerServicesConfiguration}.
	 *
	 * @param taskManagerServicesConfiguration to construct the network environment from
	 * @return Network environment
	 * @throws IOException
	 */
private static NetworkEnvironment createNetworkEnvironment(TaskManagerServicesConfiguration taskManagerServicesConfiguration) throws IOException {
    NetworkEnvironmentConfiguration networkEnvironmentConfiguration = taskManagerServicesConfiguration.getNetworkConfig();
    NetworkBufferPool networkBufferPool = new NetworkBufferPool(networkEnvironmentConfiguration.numNetworkBuffers(), networkEnvironmentConfiguration.networkBufferSize(), networkEnvironmentConfiguration.memoryType());
    ConnectionManager connectionManager;
    if (networkEnvironmentConfiguration.nettyConfig() != null) {
        connectionManager = new NettyConnectionManager(networkEnvironmentConfiguration.nettyConfig());
    } else {
        connectionManager = new LocalConnectionManager();
    }
    ResultPartitionManager resultPartitionManager = new ResultPartitionManager();
    TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
    KvStateRegistry kvStateRegistry = new KvStateRegistry();
    KvStateServer kvStateServer;
    if (taskManagerServicesConfiguration.getQueryableStateConfig().enabled()) {
        QueryableStateConfiguration qsConfig = taskManagerServicesConfiguration.getQueryableStateConfig();
        int numNetworkThreads = qsConfig.numServerThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numServerThreads();
        int numQueryThreads = qsConfig.numQueryThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numQueryThreads();
        kvStateServer = new KvStateServer(taskManagerServicesConfiguration.getTaskManagerAddress(), qsConfig.port(), numNetworkThreads, numQueryThreads, kvStateRegistry, new DisabledKvStateRequestStats());
    } else {
        kvStateServer = null;
    }
    // we start the network first, to make sure it can allocate its buffers first
    return new NetworkEnvironment(networkBufferPool, connectionManager, resultPartitionManager, taskEventDispatcher, kvStateRegistry, kvStateServer, networkEnvironmentConfiguration.ioMode(), networkEnvironmentConfiguration.partitionRequestInitialBackoff(), networkEnvironmentConfiguration.partitionRequestMaxBackoff(), networkEnvironmentConfiguration.networkBuffersPerChannel(), networkEnvironmentConfiguration.extraNetworkBuffersPerGate());
}
Also used : KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) KvStateServer(org.apache.flink.runtime.query.netty.KvStateServer) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) NetworkEnvironmentConfiguration(org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) NettyConnectionManager(org.apache.flink.runtime.io.network.netty.NettyConnectionManager) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) NettyConnectionManager(org.apache.flink.runtime.io.network.netty.NettyConnectionManager) DisabledKvStateRequestStats(org.apache.flink.runtime.query.netty.DisabledKvStateRequestStats)

Example 2 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class SingleInputGateTest method testUpdateChannelBeforeRequest.

/**
	 * Tests that an update channel does not trigger a partition request before the UDF has
	 * requested any partitions. Otherwise, this can lead to races when registering a listener at
	 * the gate (e.g. in UnionInputGate), which can result in missed buffer notifications at the
	 * listener.
	 */
@Test
public void testUpdateChannelBeforeRequest() throws Exception {
    SingleInputGate inputGate = new SingleInputGate("t1", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
    InputChannel unknown = new UnknownInputChannel(inputGate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), new LocalConnectionManager(), 0, 0, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    inputGate.setInputChannel(unknown.partitionId.getPartitionId(), unknown);
    // Update to a local channel and verify that no request is triggered
    inputGate.updateInputChannel(new InputChannelDeploymentDescriptor(unknown.partitionId, ResultPartitionLocation.createLocal()));
    verify(partitionManager, never()).createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class));
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) BufferAvailabilityListener(org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener) BufferProvider(org.apache.flink.runtime.io.network.buffer.BufferProvider) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class LocalInputChannelTest method testConcurrentConsumeMultiplePartitions.

/**
	 * Tests the consumption of multiple subpartitions via local input channels.
	 *
	 * <p> Multiple producer tasks produce pipelined partitions, which are consumed by multiple
	 * tasks via local input channels.
	 */
@Test
public void testConcurrentConsumeMultiplePartitions() throws Exception {
    // Config
    final int parallelism = 32;
    final int producerBufferPoolSize = parallelism + 1;
    final int numberOfBuffersPerChannel = 1024;
    checkArgument(parallelism >= 1);
    checkArgument(producerBufferPoolSize >= parallelism);
    checkArgument(numberOfBuffersPerChannel >= 1);
    // Setup
    // One thread per produced partition and one per consumer
    final ExecutorService executor = Executors.newFixedThreadPool(2 * parallelism);
    final NetworkBufferPool networkBuffers = new NetworkBufferPool((parallelism * producerBufferPoolSize) + (parallelism * parallelism), TestBufferFactory.BUFFER_SIZE, MemoryType.HEAP);
    final ResultPartitionConsumableNotifier partitionConsumableNotifier = mock(ResultPartitionConsumableNotifier.class);
    final TaskActions taskActions = mock(TaskActions.class);
    final IOManager ioManager = mock(IOManager.class);
    final JobID jobId = new JobID();
    final ResultPartitionManager partitionManager = new ResultPartitionManager();
    final ResultPartitionID[] partitionIds = new ResultPartitionID[parallelism];
    final TestPartitionProducer[] partitionProducers = new TestPartitionProducer[parallelism];
    // Create all partitions
    for (int i = 0; i < parallelism; i++) {
        partitionIds[i] = new ResultPartitionID();
        final ResultPartition partition = new ResultPartition("Test Name", taskActions, jobId, partitionIds[i], ResultPartitionType.PIPELINED, parallelism, parallelism, partitionManager, partitionConsumableNotifier, ioManager, true);
        // Create a buffer pool for this partition
        partition.registerBufferPool(networkBuffers.createBufferPool(producerBufferPoolSize, producerBufferPoolSize));
        // Create the producer
        partitionProducers[i] = new TestPartitionProducer(partition, false, new TestPartitionProducerBufferSource(parallelism, partition.getBufferProvider(), numberOfBuffersPerChannel));
        // Register with the partition manager in order to allow the local input channels to
        // request their respective partitions.
        partitionManager.registerResultPartition(partition);
    }
    // Test
    try {
        // Submit producer tasks
        List<Future<?>> results = Lists.newArrayListWithCapacity(parallelism + 1);
        for (int i = 0; i < parallelism; i++) {
            results.add(executor.submit(partitionProducers[i]));
        }
        // Submit consumer
        for (int i = 0; i < parallelism; i++) {
            results.add(executor.submit(new TestLocalInputChannelConsumer(i, parallelism, numberOfBuffersPerChannel, networkBuffers.createBufferPool(parallelism, parallelism), partitionManager, new TaskEventDispatcher(), partitionIds)));
        }
        // Wait for all to finish
        for (Future<?> result : results) {
            result.get();
        }
    } finally {
        networkBuffers.destroy();
        executor.shutdown();
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TestPartitionProducer(org.apache.flink.runtime.io.network.util.TestPartitionProducer) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class LocalInputChannelTest method testGetNextAfterPartitionReleased.

/**
	 * Tests that reading from a channel when after the partition has been
	 * released are handled and don't lead to NPEs.
	 */
@Test
public void testGetNextAfterPartitionReleased() throws Exception {
    ResultSubpartitionView reader = mock(ResultSubpartitionView.class);
    SingleInputGate gate = mock(SingleInputGate.class);
    ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
    when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenReturn(reader);
    LocalInputChannel channel = new LocalInputChannel(gate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    channel.requestSubpartition(0);
    // Null buffer but not released
    when(reader.getNextBuffer()).thenReturn(null);
    when(reader.isReleased()).thenReturn(false);
    try {
        channel.getNextBuffer();
        fail("Did not throw expected IllegalStateException");
    } catch (IllegalStateException ignored) {
    }
    // Null buffer and released
    when(reader.getNextBuffer()).thenReturn(null);
    when(reader.isReleased()).thenReturn(true);
    try {
        channel.getNextBuffer();
        fail("Did not throw expected CancelTaskException");
    } catch (CancelTaskException ignored) {
    }
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ResultSubpartitionView(org.apache.flink.runtime.io.network.partition.ResultSubpartitionView) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) BufferAvailabilityListener(org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener) BufferProvider(org.apache.flink.runtime.io.network.buffer.BufferProvider) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) Test(org.junit.Test)

Example 5 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class LocalInputChannelTest method testConcurrentReleaseAndRetriggerPartitionRequest.

/**
	 * Verifies that concurrent release via the SingleInputGate and re-triggering
	 * of a partition request works smoothly.
	 *
	 * - SingleInputGate acquires its request lock and tries to release all
	 * registered channels. When releasing a channel, it needs to acquire
	 * the channel's shared request-release lock.
	 * - If a LocalInputChannel concurrently retriggers a partition request via
	 * a Timer Thread it acquires the channel's request-release lock and calls
	 * the retrigger callback on the SingleInputGate, which again tries to
	 * acquire the gate's request lock.
	 *
	 * For certain timings this obviously leads to a deadlock. This test reliably
	 * reproduced such a timing (reported in FLINK-5228). This test is pretty much
	 * testing the buggy implementation and has not much more general value. If it
	 * becomes obsolete at some point (future greatness ;)), feel free to remove it.
	 *
	 * The fix in the end was to to not acquire the channels lock when releasing it
	 * and/or not doing any input gate callbacks while holding the channel's lock.
	 * I decided to do both.
	 */
@Test
public void testConcurrentReleaseAndRetriggerPartitionRequest() throws Exception {
    final SingleInputGate gate = new SingleInputGate("test task name", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
    when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenAnswer(new Answer<ResultSubpartitionView>() {

        @Override
        public ResultSubpartitionView answer(InvocationOnMock invocationOnMock) throws Throwable {
            // Sleep here a little to give the releaser Thread
            // time to acquire the input gate lock. We throw
            // the Exception to retrigger the request.
            Thread.sleep(100);
            throw new PartitionNotFoundException(new ResultPartitionID());
        }
    });
    final LocalInputChannel channel = new LocalInputChannel(gate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), 1, 1, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    gate.setInputChannel(new IntermediateResultPartitionID(), channel);
    Thread releaser = new Thread() {

        @Override
        public void run() {
            try {
                gate.releaseAllResources();
            } catch (IOException ignored) {
            }
        }
    };
    Thread requester = new Thread() {

        @Override
        public void run() {
            try {
                channel.requestSubpartition(0);
            } catch (IOException | InterruptedException ignored) {
            }
        }
    };
    requester.start();
    releaser.start();
    releaser.join();
    requester.join();
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ResultSubpartitionView(org.apache.flink.runtime.io.network.partition.ResultSubpartitionView) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) IOException(java.io.IOException) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) InvocationOnMock(org.mockito.invocation.InvocationOnMock) BufferAvailabilityListener(org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener) BufferProvider(org.apache.flink.runtime.io.network.buffer.BufferProvider) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

TaskEventDispatcher (org.apache.flink.runtime.io.network.TaskEventDispatcher)9 ResultPartitionManager (org.apache.flink.runtime.io.network.partition.ResultPartitionManager)9 Test (org.junit.Test)8 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)7 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)7 JobID (org.apache.flink.api.common.JobID)6 LocalConnectionManager (org.apache.flink.runtime.io.network.LocalConnectionManager)6 UnregisteredTaskMetricsGroup (org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup)6 TaskActions (org.apache.flink.runtime.taskmanager.TaskActions)6 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)5 BufferProvider (org.apache.flink.runtime.io.network.buffer.BufferProvider)4 BufferAvailabilityListener (org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener)4 InputChannelDeploymentDescriptor (org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor)3 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)3 NetworkBufferPool (org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)3 ResultSubpartitionView (org.apache.flink.runtime.io.network.partition.ResultSubpartitionView)3 IOException (java.io.IOException)2 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)2 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)2 ConnectionManager (org.apache.flink.runtime.io.network.ConnectionManager)2