Search in sources :

Example 6 with NettyShuffleDescriptor

use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.

the class SingleInputGateTest method createInputGateWithLocalChannels.

private static Map<InputGateID, SingleInputGate> createInputGateWithLocalChannels(NettyShuffleEnvironment network, int numberOfGates, @SuppressWarnings("SameParameterValue") int numberOfLocalChannels) throws IOException {
    ShuffleDescriptor[] channelDescs = new NettyShuffleDescriptor[numberOfLocalChannels];
    for (int i = 0; i < numberOfLocalChannels; i++) {
        channelDescs[i] = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
    }
    InputGateDeploymentDescriptor[] gateDescs = new InputGateDeploymentDescriptor[numberOfGates];
    IntermediateDataSetID[] ids = new IntermediateDataSetID[numberOfGates];
    for (int i = 0; i < numberOfGates; i++) {
        ids[i] = new IntermediateDataSetID();
        gateDescs[i] = new InputGateDeploymentDescriptor(ids[i], ResultPartitionType.PIPELINED, 0, channelDescs);
    }
    ExecutionAttemptID consumerID = new ExecutionAttemptID();
    SingleInputGate[] gates = network.createInputGates(network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()), SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER, asList(gateDescs)).toArray(new SingleInputGate[] {});
    Map<InputGateID, SingleInputGate> inputGatesById = new HashMap<>();
    for (int i = 0; i < numberOfGates; i++) {
        inputGatesById.put(new InputGateID(ids[i], consumerID), gates[i]);
    }
    return inputGatesById;
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) HashMap(java.util.HashMap) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) UnknownShuffleDescriptor(org.apache.flink.runtime.shuffle.UnknownShuffleDescriptor) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 7 with NettyShuffleDescriptor

use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.

the class TaskExecutorSubmissionTest method testUpdateTaskInputPartitionsFailure.

/**
 * Tests that the TaskManager fails the task if the partition update fails.
 */
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
    final ExecutionAttemptID eid = new ExecutionAttemptID();
    final TaskDeploymentDescriptor tdd = createTestTaskDeploymentDescriptor("test task", eid, BlockingNoOpInvokable.class);
    final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
    final ShuffleEnvironment<?, ?> shuffleEnvironment = mock(ShuffleEnvironment.class, Mockito.RETURNS_MOCKS);
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setShuffleEnvironment(shuffleEnvironment).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
        taskRunningFuture.get();
        final ResourceID producerLocation = env.getTaskExecutor().getResourceID();
        NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
        final PartitionInfo partitionUpdate = new PartitionInfo(new IntermediateDataSetID(), shuffleDescriptor);
        doThrow(new IOException()).when(shuffleEnvironment).updatePartitionInfo(eid, partitionUpdate);
        final CompletableFuture<Acknowledge> updateFuture = tmGateway.updatePartitions(eid, Collections.singletonList(partitionUpdate), timeout);
        updateFuture.get();
        taskFailedFuture.get();
        Task task = taskSlotTable.getTask(tdd.getExecutionAttemptId());
        assertThat(task.getExecutionState(), is(ExecutionState.FAILED));
        assertThat(task.getFailureCause(), instanceOf(IOException.class));
    }
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 8 with NettyShuffleDescriptor

use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.

the class TaskExecutorSubmissionTest method testRemotePartitionNotFound.

/**
 * Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
 */
@Test
public void testRemotePartitionNotFound() throws Exception {
    try (NetUtils.Port port = NetUtils.getAvailablePort()) {
        final int dataPort = port.getPort();
        Configuration config = new Configuration();
        config.setInteger(NettyShuffleEnvironmentOptions.DATA_PORT, dataPort);
        config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
        config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
        // Remote location (on the same TM though) for the partition
        NettyShuffleDescriptor sdd = NettyShuffleDescriptorBuilder.newBuilder().setDataPort(dataPort).buildRemote();
        TaskDeploymentDescriptor tdd = createReceiver(sdd);
        ExecutionAttemptID eid = tdd.getExecutionAttemptId();
        final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
        final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
        try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setSlotSize(2).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).setConfiguration(config).setLocalCommunication(false).useRealNonMockShuffleEnvironment().build()) {
            TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
            TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
            taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
            tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
            taskRunningFuture.get();
            taskFailedFuture.get();
            assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
        }
    }
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) CompletableFuture(java.util.concurrent.CompletableFuture) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) NetUtils(org.apache.flink.util.NetUtils) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) Test(org.junit.Test)

Example 9 with NettyShuffleDescriptor

use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.

the class TaskExecutorSubmissionTest method testLocalPartitionNotFound.

/**
 * Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
 */
@Test
public void testLocalPartitionNotFound() throws Exception {
    ResourceID producerLocation = ResourceID.generate();
    NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
    TaskDeploymentDescriptor tdd = createReceiver(shuffleDescriptor);
    ExecutionAttemptID eid = tdd.getExecutionAttemptId();
    Configuration config = new Configuration();
    config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
    config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
    final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).setConfiguration(config).useRealNonMockShuffleEnvironment().build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
        taskRunningFuture.get();
        taskFailedFuture.get();
        assertSame(taskSlotTable.getTask(eid).getExecutionState(), ExecutionState.FAILED);
        assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
    }
}
Also used : NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) CompletableFuture(java.util.concurrent.CompletableFuture) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 10 with NettyShuffleDescriptor

use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.

the class TaskExecutorSubmissionTest method testCancellingDependentAndStateUpdateFails.

/**
 * This tests creates two tasks. The sender sends data but fails to send the state update back
 * to the job manager. the second one blocks to be canceled
 */
@Test
public void testCancellingDependentAndStateUpdateFails() throws Exception {
    ResourceID producerLocation = ResourceID.generate();
    NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
    TaskDeploymentDescriptor tdd1 = createSender(sdd);
    TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
    ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
    ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
    final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();
    final JobMasterId jobMasterId = JobMasterId.generate();
    TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
        if (taskExecutionState != null && taskExecutionState.getID().equals(eid1) && taskExecutionState.getExecutionState() == ExecutionState.RUNNING) {
            return FutureUtils.completedExceptionally(new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
        } else {
            return CompletableFuture.completedFuture(Acknowledge.get());
        }
    }).build();
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture).addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
        task1RunningFuture.get();
        taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
        task2RunningFuture.get();
        task1FailedFuture.get();
        assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);
        tmGateway.cancelTask(eid2, timeout);
        task2CanceledFuture.get();
        assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) URL(java.net.URL) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) ExceptionUtils(org.apache.flink.util.ExceptionUtils) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) Assert.assertThat(org.junit.Assert.assertThat) Mockito.doThrow(org.mockito.Mockito.doThrow) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Environment(org.apache.flink.runtime.execution.Environment) Mockito.mock(org.mockito.Mockito.mock) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Assert.assertSame(org.junit.Assert.assertSame) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) TestingAbstractInvokables(org.apache.flink.runtime.jobmaster.TestingAbstractInvokables) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito(org.mockito.Mockito) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) Task(org.apache.flink.runtime.taskmanager.Task) Rule(org.junit.Rule) PartitionDescriptor(org.apache.flink.runtime.shuffle.PartitionDescriptor) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) Collections(java.util.Collections) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

NettyShuffleDescriptor (org.apache.flink.runtime.shuffle.NettyShuffleDescriptor)10 Test (org.junit.Test)8 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)7 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)7 CompletableFuture (java.util.concurrent.CompletableFuture)6 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)6 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)6 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)6 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)6 Configuration (org.apache.flink.configuration.Configuration)5 PartitionNotFoundException (org.apache.flink.runtime.io.network.partition.PartitionNotFoundException)5 IOException (java.io.IOException)4 JobID (org.apache.flink.api.common.JobID)4 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)4 ExecutionState (org.apache.flink.runtime.execution.ExecutionState)4 PartitionInfo (org.apache.flink.runtime.executiongraph.PartitionInfo)4 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)4 Task (org.apache.flink.runtime.taskmanager.Task)4 URL (java.net.URL)3 Collection (java.util.Collection)3