use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.
the class TaskManagerServices method createNetworkEnvironment.
/**
* Creates the {@link NetworkEnvironment} from the given {@link TaskManagerServicesConfiguration}.
*
* @param taskManagerServicesConfiguration to construct the network environment from
* @return Network environment
* @throws IOException
*/
private static NetworkEnvironment createNetworkEnvironment(TaskManagerServicesConfiguration taskManagerServicesConfiguration) throws IOException {
NetworkEnvironmentConfiguration networkEnvironmentConfiguration = taskManagerServicesConfiguration.getNetworkConfig();
NetworkBufferPool networkBufferPool = new NetworkBufferPool(networkEnvironmentConfiguration.numNetworkBuffers(), networkEnvironmentConfiguration.networkBufferSize(), networkEnvironmentConfiguration.memoryType());
ConnectionManager connectionManager;
if (networkEnvironmentConfiguration.nettyConfig() != null) {
connectionManager = new NettyConnectionManager(networkEnvironmentConfiguration.nettyConfig());
} else {
connectionManager = new LocalConnectionManager();
}
ResultPartitionManager resultPartitionManager = new ResultPartitionManager();
TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
KvStateRegistry kvStateRegistry = new KvStateRegistry();
KvStateServer kvStateServer;
if (taskManagerServicesConfiguration.getQueryableStateConfig().enabled()) {
QueryableStateConfiguration qsConfig = taskManagerServicesConfiguration.getQueryableStateConfig();
int numNetworkThreads = qsConfig.numServerThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numServerThreads();
int numQueryThreads = qsConfig.numQueryThreads() == 0 ? taskManagerServicesConfiguration.getNumberOfSlots() : qsConfig.numQueryThreads();
kvStateServer = new KvStateServer(taskManagerServicesConfiguration.getTaskManagerAddress(), qsConfig.port(), numNetworkThreads, numQueryThreads, kvStateRegistry, new DisabledKvStateRequestStats());
} else {
kvStateServer = null;
}
// we start the network first, to make sure it can allocate its buffers first
return new NetworkEnvironment(networkBufferPool, connectionManager, resultPartitionManager, taskEventDispatcher, kvStateRegistry, kvStateServer, networkEnvironmentConfiguration.ioMode(), networkEnvironmentConfiguration.partitionRequestInitialBackoff(), networkEnvironmentConfiguration.partitionRequestMaxBackoff(), networkEnvironmentConfiguration.networkBuffersPerChannel(), networkEnvironmentConfiguration.extraNetworkBuffersPerGate());
}
use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.
the class SingleInputGateTest method testUpdateChannelBeforeRequest.
/**
* Tests that an update channel does not trigger a partition request before the UDF has
* requested any partitions. Otherwise, this can lead to races when registering a listener at
* the gate (e.g. in UnionInputGate), which can result in missed buffer notifications at the
* listener.
*/
@Test
public void testUpdateChannelBeforeRequest() throws Exception {
SingleInputGate inputGate = new SingleInputGate("t1", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
InputChannel unknown = new UnknownInputChannel(inputGate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), new LocalConnectionManager(), 0, 0, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
inputGate.setInputChannel(unknown.partitionId.getPartitionId(), unknown);
// Update to a local channel and verify that no request is triggered
inputGate.updateInputChannel(new InputChannelDeploymentDescriptor(unknown.partitionId, ResultPartitionLocation.createLocal()));
verify(partitionManager, never()).createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class));
}
use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.
the class LocalInputChannelTest method testConcurrentConsumeMultiplePartitions.
/**
* Tests the consumption of multiple subpartitions via local input channels.
*
* <p> Multiple producer tasks produce pipelined partitions, which are consumed by multiple
* tasks via local input channels.
*/
@Test
public void testConcurrentConsumeMultiplePartitions() throws Exception {
// Config
final int parallelism = 32;
final int producerBufferPoolSize = parallelism + 1;
final int numberOfBuffersPerChannel = 1024;
checkArgument(parallelism >= 1);
checkArgument(producerBufferPoolSize >= parallelism);
checkArgument(numberOfBuffersPerChannel >= 1);
// Setup
// One thread per produced partition and one per consumer
final ExecutorService executor = Executors.newFixedThreadPool(2 * parallelism);
final NetworkBufferPool networkBuffers = new NetworkBufferPool((parallelism * producerBufferPoolSize) + (parallelism * parallelism), TestBufferFactory.BUFFER_SIZE, MemoryType.HEAP);
final ResultPartitionConsumableNotifier partitionConsumableNotifier = mock(ResultPartitionConsumableNotifier.class);
final TaskActions taskActions = mock(TaskActions.class);
final IOManager ioManager = mock(IOManager.class);
final JobID jobId = new JobID();
final ResultPartitionManager partitionManager = new ResultPartitionManager();
final ResultPartitionID[] partitionIds = new ResultPartitionID[parallelism];
final TestPartitionProducer[] partitionProducers = new TestPartitionProducer[parallelism];
// Create all partitions
for (int i = 0; i < parallelism; i++) {
partitionIds[i] = new ResultPartitionID();
final ResultPartition partition = new ResultPartition("Test Name", taskActions, jobId, partitionIds[i], ResultPartitionType.PIPELINED, parallelism, parallelism, partitionManager, partitionConsumableNotifier, ioManager, true);
// Create a buffer pool for this partition
partition.registerBufferPool(networkBuffers.createBufferPool(producerBufferPoolSize, producerBufferPoolSize));
// Create the producer
partitionProducers[i] = new TestPartitionProducer(partition, false, new TestPartitionProducerBufferSource(parallelism, partition.getBufferProvider(), numberOfBuffersPerChannel));
// Register with the partition manager in order to allow the local input channels to
// request their respective partitions.
partitionManager.registerResultPartition(partition);
}
// Test
try {
// Submit producer tasks
List<Future<?>> results = Lists.newArrayListWithCapacity(parallelism + 1);
for (int i = 0; i < parallelism; i++) {
results.add(executor.submit(partitionProducers[i]));
}
// Submit consumer
for (int i = 0; i < parallelism; i++) {
results.add(executor.submit(new TestLocalInputChannelConsumer(i, parallelism, numberOfBuffersPerChannel, networkBuffers.createBufferPool(parallelism, parallelism), partitionManager, new TaskEventDispatcher(), partitionIds)));
}
// Wait for all to finish
for (Future<?> result : results) {
result.get();
}
} finally {
networkBuffers.destroy();
executor.shutdown();
}
}
use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.
the class LocalInputChannelTest method testGetNextAfterPartitionReleased.
/**
* Tests that reading from a channel when after the partition has been
* released are handled and don't lead to NPEs.
*/
@Test
public void testGetNextAfterPartitionReleased() throws Exception {
ResultSubpartitionView reader = mock(ResultSubpartitionView.class);
SingleInputGate gate = mock(SingleInputGate.class);
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenReturn(reader);
LocalInputChannel channel = new LocalInputChannel(gate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
channel.requestSubpartition(0);
// Null buffer but not released
when(reader.getNextBuffer()).thenReturn(null);
when(reader.isReleased()).thenReturn(false);
try {
channel.getNextBuffer();
fail("Did not throw expected IllegalStateException");
} catch (IllegalStateException ignored) {
}
// Null buffer and released
when(reader.getNextBuffer()).thenReturn(null);
when(reader.isReleased()).thenReturn(true);
try {
channel.getNextBuffer();
fail("Did not throw expected CancelTaskException");
} catch (CancelTaskException ignored) {
}
}
use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.
the class LocalInputChannelTest method testConcurrentReleaseAndRetriggerPartitionRequest.
/**
* Verifies that concurrent release via the SingleInputGate and re-triggering
* of a partition request works smoothly.
*
* - SingleInputGate acquires its request lock and tries to release all
* registered channels. When releasing a channel, it needs to acquire
* the channel's shared request-release lock.
* - If a LocalInputChannel concurrently retriggers a partition request via
* a Timer Thread it acquires the channel's request-release lock and calls
* the retrigger callback on the SingleInputGate, which again tries to
* acquire the gate's request lock.
*
* For certain timings this obviously leads to a deadlock. This test reliably
* reproduced such a timing (reported in FLINK-5228). This test is pretty much
* testing the buggy implementation and has not much more general value. If it
* becomes obsolete at some point (future greatness ;)), feel free to remove it.
*
* The fix in the end was to to not acquire the channels lock when releasing it
* and/or not doing any input gate callbacks while holding the channel's lock.
* I decided to do both.
*/
@Test
public void testConcurrentReleaseAndRetriggerPartitionRequest() throws Exception {
final SingleInputGate gate = new SingleInputGate("test task name", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenAnswer(new Answer<ResultSubpartitionView>() {
@Override
public ResultSubpartitionView answer(InvocationOnMock invocationOnMock) throws Throwable {
// Sleep here a little to give the releaser Thread
// time to acquire the input gate lock. We throw
// the Exception to retrigger the request.
Thread.sleep(100);
throw new PartitionNotFoundException(new ResultPartitionID());
}
});
final LocalInputChannel channel = new LocalInputChannel(gate, 0, new ResultPartitionID(), partitionManager, new TaskEventDispatcher(), 1, 1, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
gate.setInputChannel(new IntermediateResultPartitionID(), channel);
Thread releaser = new Thread() {
@Override
public void run() {
try {
gate.releaseAllResources();
} catch (IOException ignored) {
}
}
};
Thread requester = new Thread() {
@Override
public void run() {
try {
channel.requestSubpartition(0);
} catch (IOException | InterruptedException ignored) {
}
}
};
requester.start();
releaser.start();
releaser.join();
requester.join();
}
Aggregations