Search in sources :

Example 16 with ResultPartitionManager

use of org.apache.flink.runtime.io.network.partition.ResultPartitionManager in project flink by apache.

the class LocalInputChannelTest method testConcurrentConsumeMultiplePartitions.

/**
 * Tests the consumption of multiple subpartitions via local input channels.
 *
 * <p>Multiple producer tasks produce pipelined partitions, which are consumed by multiple tasks
 * via local input channels.
 */
@Test
public void testConcurrentConsumeMultiplePartitions() throws Exception {
    // Config
    final int parallelism = 32;
    final int producerBufferPoolSize = parallelism + 1;
    final int numberOfBuffersPerChannel = 1024;
    // Setup
    // One thread per produced partition and one per consumer
    final ExecutorService executor = Executors.newFixedThreadPool(2 * parallelism);
    final NetworkBufferPool networkBuffers = new NetworkBufferPool((parallelism * producerBufferPoolSize) + (parallelism * parallelism), TestBufferFactory.BUFFER_SIZE);
    final ResultPartitionManager partitionManager = new ResultPartitionManager();
    final ResultPartitionID[] partitionIds = new ResultPartitionID[parallelism];
    final TestPartitionProducer[] partitionProducers = new TestPartitionProducer[parallelism];
    // Create all partitions
    for (int i = 0; i < parallelism; i++) {
        partitionIds[i] = new ResultPartitionID();
        final ResultPartition partition = new ResultPartitionBuilder().setResultPartitionId(partitionIds[i]).setNumberOfSubpartitions(parallelism).setNumTargetKeyGroups(parallelism).setResultPartitionManager(partitionManager).setBufferPoolFactory(() -> networkBuffers.createBufferPool(producerBufferPoolSize, producerBufferPoolSize, parallelism, Integer.MAX_VALUE)).build();
        // Create a buffer pool for this partition
        partition.setup();
        // Create the producer
        partitionProducers[i] = new TestPartitionProducer((BufferWritingResultPartition) partition, false, new TestPartitionProducerBufferSource(parallelism, TestBufferFactory.BUFFER_SIZE, numberOfBuffersPerChannel));
    }
    // Test
    try {
        // Submit producer tasks
        List<CompletableFuture<?>> results = Lists.newArrayListWithCapacity(parallelism + 1);
        for (int i = 0; i < parallelism; i++) {
            results.add(CompletableFuture.supplyAsync(CheckedSupplier.unchecked(partitionProducers[i]::call), executor));
        }
        // Submit consumer
        for (int i = 0; i < parallelism; i++) {
            final TestLocalInputChannelConsumer consumer = new TestLocalInputChannelConsumer(i, parallelism, numberOfBuffersPerChannel, networkBuffers.createBufferPool(parallelism, parallelism), partitionManager, new TaskEventDispatcher(), partitionIds);
            results.add(CompletableFuture.supplyAsync(CheckedSupplier.unchecked(consumer::call), executor));
        }
        FutureUtils.waitForAll(results).get();
    } finally {
        networkBuffers.destroyAllBufferPools();
        networkBuffers.destroy();
        executor.shutdown();
    }
}
Also used : TestPartitionProducer(org.apache.flink.runtime.io.network.util.TestPartitionProducer) ResultPartitionBuilder(org.apache.flink.runtime.io.network.partition.ResultPartitionBuilder) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) TestingResultPartitionManager(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) BufferWritingResultPartition(org.apache.flink.runtime.io.network.partition.BufferWritingResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CompletableFuture(java.util.concurrent.CompletableFuture) BufferWritingResultPartition(org.apache.flink.runtime.io.network.partition.BufferWritingResultPartition) ExecutorService(java.util.concurrent.ExecutorService) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) Test(org.junit.Test)

Example 17 with ResultPartitionManager

use of org.apache.flink.runtime.io.network.partition.ResultPartitionManager in project flink by apache.

the class LocalInputChannelTest method testUnblockReleasedChannel.

@Test(expected = IllegalStateException.class)
public void testUnblockReleasedChannel() throws Exception {
    SingleInputGate inputGate = createSingleInputGate(1);
    LocalInputChannel localChannel = createLocalInputChannel(inputGate, new ResultPartitionManager());
    localChannel.releaseAllResources();
    localChannel.resumeConsumption();
}
Also used : InputChannelTestUtils.createLocalInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) TestingResultPartitionManager(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager) InputChannelTestUtils.createSingleInputGate(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate) Test(org.junit.Test)

Example 18 with ResultPartitionManager

use of org.apache.flink.runtime.io.network.partition.ResultPartitionManager in project flink by apache.

the class TaskManagerComponentsStartupShutdownTest method testComponentsStartupShutdown.

/**
	 * Makes sure that all components are shut down when the TaskManager
	 * actor is shut down.
	 */
@Test
public void testComponentsStartupShutdown() {
    final String[] TMP_DIR = new String[] { ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH };
    final Time timeout = Time.seconds(100);
    final int BUFFER_SIZE = 32 * 1024;
    Configuration config = new Configuration();
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "200 ms");
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "1 s");
    config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 1);
    ActorSystem actorSystem = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(config);
        final ActorRef jobManager = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        FlinkResourceManager.startResourceManagerActors(config, actorSystem, LeaderRetrievalUtils.createLeaderRetrievalService(config, jobManager), StandaloneResourceManager.class);
        final int numberOfSlots = 1;
        // create the components for the TaskManager manually
        final TaskManagerConfiguration tmConfig = new TaskManagerConfiguration(numberOfSlots, TMP_DIR, timeout, null, Time.milliseconds(500), Time.seconds(30), Time.seconds(10), // cleanup interval
        1000000, config, // exit-jvm-on-fatal-error
        false);
        final NetworkEnvironmentConfiguration netConf = new NetworkEnvironmentConfiguration(32, BUFFER_SIZE, MemoryType.HEAP, IOManager.IOMode.SYNC, 0, 0, 2, 8, null);
        ResourceID taskManagerId = ResourceID.generate();
        final TaskManagerLocation connectionInfo = new TaskManagerLocation(taskManagerId, InetAddress.getLocalHost(), 10000);
        final MemoryManager memManager = new MemoryManager(32 * BUFFER_SIZE, 1, BUFFER_SIZE, MemoryType.HEAP, false);
        final IOManager ioManager = new IOManagerAsync(TMP_DIR);
        final NetworkEnvironment network = new NetworkEnvironment(new NetworkBufferPool(netConf.numNetworkBuffers(), netConf.networkBufferSize(), netConf.memoryType()), new LocalConnectionManager(), new ResultPartitionManager(), new TaskEventDispatcher(), new KvStateRegistry(), null, netConf.ioMode(), netConf.partitionRequestInitialBackoff(), netConf.partitionRequestMaxBackoff(), netConf.networkBuffersPerChannel(), netConf.extraNetworkBuffersPerGate());
        network.start();
        LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(jobManager.path().toString());
        MetricRegistryConfiguration metricRegistryConfiguration = MetricRegistryConfiguration.fromConfiguration(config);
        // create the task manager
        final Props tmProps = Props.create(TaskManager.class, tmConfig, taskManagerId, connectionInfo, memManager, ioManager, network, numberOfSlots, leaderRetrievalService, new MetricRegistry(metricRegistryConfiguration));
        final ActorRef taskManager = actorSystem.actorOf(tmProps);
        new JavaTestKit(actorSystem) {

            {
                // wait for the TaskManager to be registered
                new Within(new FiniteDuration(5000, TimeUnit.SECONDS)) {

                    @Override
                    protected void run() {
                        taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
                        expectMsgEquals(TaskManagerMessages.getRegisteredAtJobManagerMessage());
                    }
                };
            }
        };
        // shut down all actors and the actor system
        // Kill the Task down the JobManager
        taskManager.tell(Kill.getInstance(), ActorRef.noSender());
        jobManager.tell(Kill.getInstance(), ActorRef.noSender());
        // shut down the actors and the actor system
        actorSystem.shutdown();
        actorSystem.awaitTermination();
        actorSystem = null;
        // now that the TaskManager is shut down, the components should be shut down as well
        assertTrue(network.isShutdown());
        assertTrue(ioManager.isProperlyShutDown());
        assertTrue(memManager.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) ActorRef(akka.actor.ActorRef) Time(org.apache.flink.api.common.time.Time) JobManager(org.apache.flink.runtime.jobmanager.JobManager) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Props(akka.actor.Props) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FiniteDuration(scala.concurrent.duration.FiniteDuration) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 19 with ResultPartitionManager

use of org.apache.flink.runtime.io.network.partition.ResultPartitionManager in project flink by apache.

the class TaskTest method testExecutionFailsInNetworkRegistration.

@Test
public void testExecutionFailsInNetworkRegistration() {
    try {
        // mock a working library cache
        LibraryCacheManager libCache = mock(LibraryCacheManager.class);
        when(libCache.getClassLoader(any(JobID.class))).thenReturn(getClass().getClassLoader());
        // mock a network manager that rejects registration
        ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
        ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
        PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
        Executor executor = mock(Executor.class);
        NetworkEnvironment network = mock(NetworkEnvironment.class);
        when(network.getResultPartitionManager()).thenReturn(partitionManager);
        when(network.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
        doThrow(new RuntimeException("buffers")).when(network).registerTask(any(Task.class));
        Task task = createTask(TestInvokableCorrect.class, libCache, network, consumableNotifier, partitionProducerStateChecker, executor);
        task.registerExecutionListener(listener);
        task.run();
        assertEquals(ExecutionState.FAILED, task.getExecutionState());
        assertTrue(task.isCanceledOrFailed());
        assertTrue(task.getFailureCause().getMessage().contains("buffers"));
        validateUnregisterTask(task.getExecutionId());
        validateListenerMessage(ExecutionState.FAILED, task, true);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Executor(java.util.concurrent.Executor) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) IOException(java.io.IOException) Test(org.junit.Test)

Example 20 with ResultPartitionManager

use of org.apache.flink.runtime.io.network.partition.ResultPartitionManager in project flink by apache.

the class LocalInputChannelTest method testRetriggerPartitionRequestWhilePartitionNotFound.

/**
 * Tests that {@link SingleInputGate#retriggerPartitionRequest(IntermediateResultPartitionID)}
 * is triggered after {@link LocalInputChannel#requestSubpartition()} throws {@link
 * PartitionNotFoundException} within backoff.
 */
@Test
public void testRetriggerPartitionRequestWhilePartitionNotFound() throws Exception {
    final SingleInputGate inputGate = createSingleInputGate(1);
    final LocalInputChannel localChannel = createLocalInputChannel(inputGate, new ResultPartitionManager(), 1, 1);
    inputGate.setInputChannels(localChannel);
    localChannel.requestSubpartition();
    // The timer should be initialized at the first time of retriggering partition request.
    assertNotNull(inputGate.getRetriggerLocalRequestTimer());
}
Also used : InputChannelTestUtils.createLocalInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) TestingResultPartitionManager(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager) InputChannelTestUtils.createSingleInputGate(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate) Test(org.junit.Test)

Aggregations

ResultPartitionManager (org.apache.flink.runtime.io.network.partition.ResultPartitionManager)27 Test (org.junit.Test)22 TaskEventDispatcher (org.apache.flink.runtime.io.network.TaskEventDispatcher)13 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)12 InputChannelID (org.apache.flink.runtime.io.network.partition.consumer.InputChannelID)9 InputChannelTestUtils.createLocalInputChannel (org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel)7 PartitionNotFoundException (org.apache.flink.runtime.io.network.partition.PartitionNotFoundException)7 TestingResultPartitionManager (org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager)7 IOException (java.io.IOException)6 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)6 NetworkBufferPool (org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)6 BufferAvailabilityListener (org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener)6 InputChannelTestUtils.createSingleInputGate (org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate)6 ResultPartition (org.apache.flink.runtime.io.network.partition.ResultPartition)6 ResultSubpartitionView (org.apache.flink.runtime.io.network.partition.ResultSubpartitionView)6 EmbeddedChannel (org.apache.flink.shaded.netty4.io.netty.channel.embedded.EmbeddedChannel)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 JobID (org.apache.flink.api.common.JobID)5 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)5 Executor (java.util.concurrent.Executor)4