Search in sources :

Example 56 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class SingleInputGateTest method createInputGateWithLocalChannels.

private static Map<InputGateID, SingleInputGate> createInputGateWithLocalChannels(NettyShuffleEnvironment network, int numberOfGates, @SuppressWarnings("SameParameterValue") int numberOfLocalChannels) throws IOException {
    ShuffleDescriptor[] channelDescs = new NettyShuffleDescriptor[numberOfLocalChannels];
    for (int i = 0; i < numberOfLocalChannels; i++) {
        channelDescs[i] = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
    }
    InputGateDeploymentDescriptor[] gateDescs = new InputGateDeploymentDescriptor[numberOfGates];
    IntermediateDataSetID[] ids = new IntermediateDataSetID[numberOfGates];
    for (int i = 0; i < numberOfGates; i++) {
        ids[i] = new IntermediateDataSetID();
        gateDescs[i] = new InputGateDeploymentDescriptor(ids[i], ResultPartitionType.PIPELINED, 0, channelDescs);
    }
    ExecutionAttemptID consumerID = new ExecutionAttemptID();
    SingleInputGate[] gates = network.createInputGates(network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()), SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER, asList(gateDescs)).toArray(new SingleInputGate[] {});
    Map<InputGateID, SingleInputGate> inputGatesById = new HashMap<>();
    for (int i = 0; i < numberOfGates; i++) {
        inputGatesById.put(new InputGateID(ids[i], consumerID), gates[i]);
    }
    return inputGatesById;
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) HashMap(java.util.HashMap) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) UnknownShuffleDescriptor(org.apache.flink.runtime.shuffle.UnknownShuffleDescriptor) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 57 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class SingleInputGateTest method testRequestBackoffConfiguration.

/**
 * Tests request back off configuration is correctly forwarded to the channels.
 */
@Test
public void testRequestBackoffConfiguration() throws Exception {
    IntermediateResultPartitionID[] partitionIds = new IntermediateResultPartitionID[] { new IntermediateResultPartitionID(), new IntermediateResultPartitionID(), new IntermediateResultPartitionID() };
    int initialBackoff = 137;
    int maxBackoff = 1001;
    final NettyShuffleEnvironment netEnv = new NettyShuffleEnvironmentBuilder().setPartitionRequestInitialBackoff(initialBackoff).setPartitionRequestMaxBackoff(maxBackoff).build();
    SingleInputGate gate = createSingleInputGate(partitionIds, ResultPartitionType.PIPELINED, netEnv);
    gate.setChannelStateWriter(ChannelStateWriter.NO_OP);
    gate.finishReadRecoveredState();
    while (!gate.getStateConsumedFuture().isDone()) {
        gate.pollNext();
    }
    gate.convertRecoveredInputChannels();
    try (Closer closer = Closer.create()) {
        closer.register(netEnv::close);
        closer.register(gate::close);
        assertEquals(ResultPartitionType.PIPELINED, gate.getConsumedPartitionType());
        Map<SubpartitionInfo, InputChannel> channelMap = gate.getInputChannels();
        assertEquals(3, channelMap.size());
        channelMap.values().forEach(channel -> {
            try {
                channel.checkError();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        InputChannel localChannel = channelMap.get(createSubpartitionInfo(partitionIds[0]));
        assertEquals(LocalInputChannel.class, localChannel.getClass());
        InputChannel remoteChannel = channelMap.get(createSubpartitionInfo(partitionIds[1]));
        assertEquals(RemoteInputChannel.class, remoteChannel.getClass());
        InputChannel unknownChannel = channelMap.get(createSubpartitionInfo(partitionIds[2]));
        assertEquals(UnknownInputChannel.class, unknownChannel.getClass());
        InputChannel[] channels = new InputChannel[] { localChannel, remoteChannel, unknownChannel };
        for (InputChannel ch : channels) {
            assertEquals(0, ch.getCurrentBackoff());
            assertTrue(ch.increaseBackoff());
            assertEquals(initialBackoff, ch.getCurrentBackoff());
            assertTrue(ch.increaseBackoff());
            assertEquals(initialBackoff * 2, ch.getCurrentBackoff());
            assertTrue(ch.increaseBackoff());
            assertEquals(initialBackoff * 2 * 2, ch.getCurrentBackoff());
            assertTrue(ch.increaseBackoff());
            assertEquals(maxBackoff, ch.getCurrentBackoff());
            assertFalse(ch.increaseBackoff());
        }
    }
}
Also used : Closer(org.apache.flink.shaded.guava30.com.google.common.io.Closer) SubpartitionInfo(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.SubpartitionInfo) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IOException(java.io.IOException) InputChannelTestUtils.createRemoteInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createRemoteInputChannel) InputChannelTestUtils.createLocalInputChannel(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 58 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class SingleInputGateTest method createSingleInputGate.

static SingleInputGate createSingleInputGate(IntermediateResultPartitionID[] partitionIds, ResultPartitionType resultPartitionType, SubpartitionIndexRange subpartitionIndexRange, NettyShuffleEnvironment netEnv, ResourceID localLocation, ConnectionManager connectionManager, ResultPartitionManager resultPartitionManager) throws IOException {
    ShuffleDescriptor[] channelDescs = new ShuffleDescriptor[] { // Local
    createRemoteWithIdAndLocation(partitionIds[0], localLocation), // Remote
    createRemoteWithIdAndLocation(partitionIds[1], ResourceID.generate()), // Unknown
    new UnknownShuffleDescriptor(new ResultPartitionID(partitionIds[2], new ExecutionAttemptID())) };
    InputGateDeploymentDescriptor gateDesc = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), resultPartitionType, subpartitionIndexRange, new TaskDeploymentDescriptor.NonOffloaded<>(CompressedSerializedValue.fromObject(channelDescs)));
    final TaskMetricGroup taskMetricGroup = UnregisteredMetricGroups.createUnregisteredTaskMetricGroup();
    return new SingleInputGateFactory(localLocation, netEnv.getConfiguration(), connectionManager != null ? connectionManager : netEnv.getConnectionManager(), resultPartitionManager != null ? resultPartitionManager : netEnv.getResultPartitionManager(), new TaskEventDispatcher(), netEnv.getNetworkBufferPool()).create(netEnv.createShuffleIOOwnerContext("TestTask", taskMetricGroup.executionId(), taskMetricGroup), 0, gateDesc, SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER);
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) UnknownShuffleDescriptor(org.apache.flink.runtime.shuffle.UnknownShuffleDescriptor) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) UnknownShuffleDescriptor(org.apache.flink.runtime.shuffle.UnknownShuffleDescriptor)

Example 59 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class TaskExecutorSubmissionTest method testUpdateTaskInputPartitionsFailure.

/**
 * Tests that the TaskManager fails the task if the partition update fails.
 */
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
    final ExecutionAttemptID eid = new ExecutionAttemptID();
    final TaskDeploymentDescriptor tdd = createTestTaskDeploymentDescriptor("test task", eid, BlockingNoOpInvokable.class);
    final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
    final ShuffleEnvironment<?, ?> shuffleEnvironment = mock(ShuffleEnvironment.class, Mockito.RETURNS_MOCKS);
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setShuffleEnvironment(shuffleEnvironment).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
        taskRunningFuture.get();
        final ResourceID producerLocation = env.getTaskExecutor().getResourceID();
        NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
        final PartitionInfo partitionUpdate = new PartitionInfo(new IntermediateDataSetID(), shuffleDescriptor);
        doThrow(new IOException()).when(shuffleEnvironment).updatePartitionInfo(eid, partitionUpdate);
        final CompletableFuture<Acknowledge> updateFuture = tmGateway.updatePartitions(eid, Collections.singletonList(partitionUpdate), timeout);
        updateFuture.get();
        taskFailedFuture.get();
        Task task = taskSlotTable.getTask(tdd.getExecutionAttemptId());
        assertThat(task.getExecutionState(), is(ExecutionState.FAILED));
        assertThat(task.getFailureCause(), instanceOf(IOException.class));
    }
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 60 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class TaskExecutorSubmissionTest method testLocalPartitionNotFound.

/**
 * Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
 */
@Test
public void testLocalPartitionNotFound() throws Exception {
    ResourceID producerLocation = ResourceID.generate();
    NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
    TaskDeploymentDescriptor tdd = createReceiver(shuffleDescriptor);
    ExecutionAttemptID eid = tdd.getExecutionAttemptId();
    Configuration config = new Configuration();
    config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
    config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
    final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).setConfiguration(config).useRealNonMockShuffleEnvironment().build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
        taskRunningFuture.get();
        taskFailedFuture.get();
        assertSame(taskSlotTable.getTask(eid).getExecutionState(), ExecutionState.FAILED);
        assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
    }
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) CompletableFuture(java.util.concurrent.CompletableFuture) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)66 Test (org.junit.Test)41 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)18 ConsumedPartitionGroup (org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup)14 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)13 JobID (org.apache.flink.api.common.JobID)12 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)12 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 ArrayList (java.util.ArrayList)10 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)10 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)10 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)9 ResultPartitionDeploymentDescriptor (org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor)9 CompletableFuture (java.util.concurrent.CompletableFuture)8 Configuration (org.apache.flink.configuration.Configuration)8 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)8 IOException (java.io.IOException)7 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 Collection (java.util.Collection)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6