Search in sources :

Example 6 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class SingleInputGateTest method testBackwardsEventWithUninitializedChannel.

@Test
public void testBackwardsEventWithUninitializedChannel() throws Exception {
    // Setup environment
    final TaskEventDispatcher taskEventDispatcher = mock(TaskEventDispatcher.class);
    when(taskEventDispatcher.publish(any(ResultPartitionID.class), any(TaskEvent.class))).thenReturn(true);
    final ResultSubpartitionView iterator = mock(ResultSubpartitionView.class);
    when(iterator.getNextBuffer()).thenReturn(new Buffer(MemorySegmentFactory.allocateUnpooledSegment(1024), mock(BufferRecycler.class)));
    final ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
    when(partitionManager.createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class))).thenReturn(iterator);
    // Setup reader with one local and one unknown input channel
    final IntermediateDataSetID resultId = new IntermediateDataSetID();
    final SingleInputGate inputGate = new SingleInputGate("Test Task Name", new JobID(), resultId, ResultPartitionType.PIPELINED, 0, 2, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    final BufferPool bufferPool = mock(BufferPool.class);
    when(bufferPool.getNumberOfRequiredMemorySegments()).thenReturn(2);
    inputGate.setBufferPool(bufferPool);
    // Local
    ResultPartitionID localPartitionId = new ResultPartitionID(new IntermediateResultPartitionID(), new ExecutionAttemptID());
    InputChannel local = new LocalInputChannel(inputGate, 0, localPartitionId, partitionManager, taskEventDispatcher, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    // Unknown
    ResultPartitionID unknownPartitionId = new ResultPartitionID(new IntermediateResultPartitionID(), new ExecutionAttemptID());
    InputChannel unknown = new UnknownInputChannel(inputGate, 1, unknownPartitionId, partitionManager, taskEventDispatcher, mock(ConnectionManager.class), 0, 0, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    // Set channels
    inputGate.setInputChannel(localPartitionId.getPartitionId(), local);
    inputGate.setInputChannel(unknownPartitionId.getPartitionId(), unknown);
    // Request partitions
    inputGate.requestPartitions();
    // Only the local channel can request
    verify(partitionManager, times(1)).createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class));
    // Send event backwards and initialize unknown channel afterwards
    final TaskEvent event = new TestTaskEvent();
    inputGate.sendTaskEvent(event);
    // Only the local channel can send out the event
    verify(taskEventDispatcher, times(1)).publish(any(ResultPartitionID.class), any(TaskEvent.class));
    // After the update, the pending event should be send to local channel
    inputGate.updateInputChannel(new InputChannelDeploymentDescriptor(new ResultPartitionID(unknownPartitionId.getPartitionId(), unknownPartitionId.getProducerId()), ResultPartitionLocation.createLocal()));
    verify(partitionManager, times(2)).createSubpartitionView(any(ResultPartitionID.class), anyInt(), any(BufferProvider.class), any(BufferAvailabilityListener.class));
    verify(taskEventDispatcher, times(2)).publish(any(ResultPartitionID.class), any(TaskEvent.class));
}
Also used : Buffer(org.apache.flink.runtime.io.network.buffer.Buffer) UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ResultSubpartitionView(org.apache.flink.runtime.io.network.partition.ResultSubpartitionView) TestTaskEvent(org.apache.flink.runtime.io.network.util.TestTaskEvent) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) BufferPool(org.apache.flink.runtime.io.network.buffer.BufferPool) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) TaskEvent(org.apache.flink.runtime.event.TaskEvent) TestTaskEvent(org.apache.flink.runtime.io.network.util.TestTaskEvent) BufferAvailabilityListener(org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) BufferProvider(org.apache.flink.runtime.io.network.buffer.BufferProvider) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 7 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class TaskManagerComponentsStartupShutdownTest method testComponentsStartupShutdown.

/**
	 * Makes sure that all components are shut down when the TaskManager
	 * actor is shut down.
	 */
@Test
public void testComponentsStartupShutdown() {
    final String[] TMP_DIR = new String[] { ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH };
    final Time timeout = Time.seconds(100);
    final int BUFFER_SIZE = 32 * 1024;
    Configuration config = new Configuration();
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "200 ms");
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "1 s");
    config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 1);
    ActorSystem actorSystem = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(config);
        final ActorRef jobManager = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        FlinkResourceManager.startResourceManagerActors(config, actorSystem, LeaderRetrievalUtils.createLeaderRetrievalService(config, jobManager), StandaloneResourceManager.class);
        final int numberOfSlots = 1;
        // create the components for the TaskManager manually
        final TaskManagerConfiguration tmConfig = new TaskManagerConfiguration(numberOfSlots, TMP_DIR, timeout, null, Time.milliseconds(500), Time.seconds(30), Time.seconds(10), // cleanup interval
        1000000, config, // exit-jvm-on-fatal-error
        false);
        final NetworkEnvironmentConfiguration netConf = new NetworkEnvironmentConfiguration(32, BUFFER_SIZE, MemoryType.HEAP, IOManager.IOMode.SYNC, 0, 0, 2, 8, null);
        ResourceID taskManagerId = ResourceID.generate();
        final TaskManagerLocation connectionInfo = new TaskManagerLocation(taskManagerId, InetAddress.getLocalHost(), 10000);
        final MemoryManager memManager = new MemoryManager(32 * BUFFER_SIZE, 1, BUFFER_SIZE, MemoryType.HEAP, false);
        final IOManager ioManager = new IOManagerAsync(TMP_DIR);
        final NetworkEnvironment network = new NetworkEnvironment(new NetworkBufferPool(netConf.numNetworkBuffers(), netConf.networkBufferSize(), netConf.memoryType()), new LocalConnectionManager(), new ResultPartitionManager(), new TaskEventDispatcher(), new KvStateRegistry(), null, netConf.ioMode(), netConf.partitionRequestInitialBackoff(), netConf.partitionRequestMaxBackoff(), netConf.networkBuffersPerChannel(), netConf.extraNetworkBuffersPerGate());
        network.start();
        LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(jobManager.path().toString());
        MetricRegistryConfiguration metricRegistryConfiguration = MetricRegistryConfiguration.fromConfiguration(config);
        // create the task manager
        final Props tmProps = Props.create(TaskManager.class, tmConfig, taskManagerId, connectionInfo, memManager, ioManager, network, numberOfSlots, leaderRetrievalService, new MetricRegistry(metricRegistryConfiguration));
        final ActorRef taskManager = actorSystem.actorOf(tmProps);
        new JavaTestKit(actorSystem) {

            {
                // wait for the TaskManager to be registered
                new Within(new FiniteDuration(5000, TimeUnit.SECONDS)) {

                    @Override
                    protected void run() {
                        taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
                        expectMsgEquals(TaskManagerMessages.getRegisteredAtJobManagerMessage());
                    }
                };
            }
        };
        // shut down all actors and the actor system
        // Kill the Task down the JobManager
        taskManager.tell(Kill.getInstance(), ActorRef.noSender());
        jobManager.tell(Kill.getInstance(), ActorRef.noSender());
        // shut down the actors and the actor system
        actorSystem.shutdown();
        actorSystem.awaitTermination();
        actorSystem = null;
        // now that the TaskManager is shut down, the components should be shut down as well
        assertTrue(network.isShutdown());
        assertTrue(ioManager.isProperlyShutDown());
        assertTrue(memManager.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) ActorRef(akka.actor.ActorRef) Time(org.apache.flink.api.common.time.Time) JobManager(org.apache.flink.runtime.jobmanager.JobManager) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Props(akka.actor.Props) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FiniteDuration(scala.concurrent.duration.FiniteDuration) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 8 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class SingleInputGateTest method testReleaseWhilePollingChannel.

/**
	 * Tests that the release of the input gate is noticed while polling the
	 * channels for available data.
	 */
@Test
public void testReleaseWhilePollingChannel() throws Exception {
    final AtomicReference<Exception> asyncException = new AtomicReference<>();
    // Setup the input gate with a single channel that does nothing
    final SingleInputGate inputGate = new SingleInputGate("InputGate", new JobID(), new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, 1, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    InputChannel unknown = new UnknownInputChannel(inputGate, 0, new ResultPartitionID(), new ResultPartitionManager(), new TaskEventDispatcher(), new LocalConnectionManager(), 0, 0, new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    inputGate.setInputChannel(unknown.partitionId.getPartitionId(), unknown);
    // Start the consumer in a separate Thread
    Thread asyncConsumer = new Thread() {

        @Override
        public void run() {
            try {
                inputGate.getNextBufferOrEvent();
            } catch (Exception e) {
                asyncException.set(e);
            }
        }
    };
    asyncConsumer.start();
    // Wait for blocking queue poll call and release input gate
    boolean success = false;
    for (int i = 0; i < 50; i++) {
        if (asyncConsumer.isAlive()) {
            success = asyncConsumer.getState() == Thread.State.WAITING;
        }
        if (success) {
            break;
        } else {
            // Retry
            Thread.sleep(100);
        }
    }
    // Verify that async consumer is in blocking request
    assertTrue("Did not trigger blocking buffer request.", success);
    // Release the input gate
    inputGate.releaseAllResources();
    // Wait for Thread to finish and verify expected Exceptions. If the
    // input gate status is not properly checked during requests, this
    // call will never return.
    asyncConsumer.join();
    assertNotNull(asyncException.get());
    assertEquals(IllegalStateException.class, asyncException.get().getClass());
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) IOException(java.io.IOException) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 9 with TaskEventDispatcher

use of org.apache.flink.runtime.io.network.TaskEventDispatcher in project flink by apache.

the class SingleInputGateTest method testRequestBackoffConfiguration.

/**
	 * Tests request back off configuration is correctly forwarded to the channels.
	 */
@Test
public void testRequestBackoffConfiguration() throws Exception {
    ResultPartitionID[] partitionIds = new ResultPartitionID[] { new ResultPartitionID(), new ResultPartitionID(), new ResultPartitionID() };
    InputChannelDeploymentDescriptor[] channelDescs = new InputChannelDeploymentDescriptor[] { // Local
    new InputChannelDeploymentDescriptor(partitionIds[0], ResultPartitionLocation.createLocal()), // Remote
    new InputChannelDeploymentDescriptor(partitionIds[1], ResultPartitionLocation.createRemote(new ConnectionID(new InetSocketAddress("localhost", 5000), 0))), // Unknown
    new InputChannelDeploymentDescriptor(partitionIds[2], ResultPartitionLocation.createUnknown()) };
    InputGateDeploymentDescriptor gateDesc = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, channelDescs);
    int initialBackoff = 137;
    int maxBackoff = 1001;
    NetworkEnvironment netEnv = mock(NetworkEnvironment.class);
    when(netEnv.getResultPartitionManager()).thenReturn(new ResultPartitionManager());
    when(netEnv.getTaskEventDispatcher()).thenReturn(new TaskEventDispatcher());
    when(netEnv.getPartitionRequestInitialBackoff()).thenReturn(initialBackoff);
    when(netEnv.getPartitionRequestMaxBackoff()).thenReturn(maxBackoff);
    when(netEnv.getConnectionManager()).thenReturn(new LocalConnectionManager());
    SingleInputGate gate = SingleInputGate.create("TestTask", new JobID(), new ExecutionAttemptID(), gateDesc, netEnv, mock(TaskActions.class), new UnregisteredTaskMetricsGroup.DummyTaskIOMetricGroup());
    assertEquals(gateDesc.getConsumedPartitionType(), gate.getConsumedPartitionType());
    Map<IntermediateResultPartitionID, InputChannel> channelMap = gate.getInputChannels();
    assertEquals(3, channelMap.size());
    InputChannel localChannel = channelMap.get(partitionIds[0].getPartitionId());
    assertEquals(LocalInputChannel.class, localChannel.getClass());
    InputChannel remoteChannel = channelMap.get(partitionIds[1].getPartitionId());
    assertEquals(RemoteInputChannel.class, remoteChannel.getClass());
    InputChannel unknownChannel = channelMap.get(partitionIds[2].getPartitionId());
    assertEquals(UnknownInputChannel.class, unknownChannel.getClass());
    InputChannel[] channels = new InputChannel[] { localChannel, remoteChannel, unknownChannel };
    for (InputChannel ch : channels) {
        assertEquals(0, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff * 2, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(initialBackoff * 2 * 2, ch.getCurrentBackoff());
        assertTrue(ch.increaseBackoff());
        assertEquals(maxBackoff, ch.getCurrentBackoff());
        assertFalse(ch.increaseBackoff());
    }
}
Also used : UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) InetSocketAddress(java.net.InetSocketAddress) TaskActions(org.apache.flink.runtime.taskmanager.TaskActions) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

TaskEventDispatcher (org.apache.flink.runtime.io.network.TaskEventDispatcher)9 ResultPartitionManager (org.apache.flink.runtime.io.network.partition.ResultPartitionManager)9 Test (org.junit.Test)8 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)7 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)7 JobID (org.apache.flink.api.common.JobID)6 LocalConnectionManager (org.apache.flink.runtime.io.network.LocalConnectionManager)6 UnregisteredTaskMetricsGroup (org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup)6 TaskActions (org.apache.flink.runtime.taskmanager.TaskActions)6 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)5 BufferProvider (org.apache.flink.runtime.io.network.buffer.BufferProvider)4 BufferAvailabilityListener (org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener)4 InputChannelDeploymentDescriptor (org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor)3 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)3 NetworkBufferPool (org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)3 ResultSubpartitionView (org.apache.flink.runtime.io.network.partition.ResultSubpartitionView)3 IOException (java.io.IOException)2 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)2 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)2 ConnectionManager (org.apache.flink.runtime.io.network.ConnectionManager)2