use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.
the class SingleInputGateTest method createInputGateWithLocalChannels.
private static Map<InputGateID, SingleInputGate> createInputGateWithLocalChannels(NettyShuffleEnvironment network, int numberOfGates, @SuppressWarnings("SameParameterValue") int numberOfLocalChannels) throws IOException {
ShuffleDescriptor[] channelDescs = new NettyShuffleDescriptor[numberOfLocalChannels];
for (int i = 0; i < numberOfLocalChannels; i++) {
channelDescs[i] = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
}
InputGateDeploymentDescriptor[] gateDescs = new InputGateDeploymentDescriptor[numberOfGates];
IntermediateDataSetID[] ids = new IntermediateDataSetID[numberOfGates];
for (int i = 0; i < numberOfGates; i++) {
ids[i] = new IntermediateDataSetID();
gateDescs[i] = new InputGateDeploymentDescriptor(ids[i], ResultPartitionType.PIPELINED, 0, channelDescs);
}
ExecutionAttemptID consumerID = new ExecutionAttemptID();
SingleInputGate[] gates = network.createInputGates(network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()), SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER, asList(gateDescs)).toArray(new SingleInputGate[] {});
Map<InputGateID, SingleInputGate> inputGatesById = new HashMap<>();
for (int i = 0; i < numberOfGates; i++) {
inputGatesById.put(new InputGateID(ids[i], consumerID), gates[i]);
}
return inputGatesById;
}
use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.
the class TaskExecutorSubmissionTest method testUpdateTaskInputPartitionsFailure.
/**
* Tests that the TaskManager fails the task if the partition update fails.
*/
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
final ExecutionAttemptID eid = new ExecutionAttemptID();
final TaskDeploymentDescriptor tdd = createTestTaskDeploymentDescriptor("test task", eid, BlockingNoOpInvokable.class);
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
final ShuffleEnvironment<?, ?> shuffleEnvironment = mock(ShuffleEnvironment.class, Mockito.RETURNS_MOCKS);
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setShuffleEnvironment(shuffleEnvironment).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
taskRunningFuture.get();
final ResourceID producerLocation = env.getTaskExecutor().getResourceID();
NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
final PartitionInfo partitionUpdate = new PartitionInfo(new IntermediateDataSetID(), shuffleDescriptor);
doThrow(new IOException()).when(shuffleEnvironment).updatePartitionInfo(eid, partitionUpdate);
final CompletableFuture<Acknowledge> updateFuture = tmGateway.updatePartitions(eid, Collections.singletonList(partitionUpdate), timeout);
updateFuture.get();
taskFailedFuture.get();
Task task = taskSlotTable.getTask(tdd.getExecutionAttemptId());
assertThat(task.getExecutionState(), is(ExecutionState.FAILED));
assertThat(task.getFailureCause(), instanceOf(IOException.class));
}
}
use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.
the class TaskExecutorSubmissionTest method testRemotePartitionNotFound.
/**
* Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testRemotePartitionNotFound() throws Exception {
try (NetUtils.Port port = NetUtils.getAvailablePort()) {
final int dataPort = port.getPort();
Configuration config = new Configuration();
config.setInteger(NettyShuffleEnvironmentOptions.DATA_PORT, dataPort);
config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
// Remote location (on the same TM though) for the partition
NettyShuffleDescriptor sdd = NettyShuffleDescriptorBuilder.newBuilder().setDataPort(dataPort).buildRemote();
TaskDeploymentDescriptor tdd = createReceiver(sdd);
ExecutionAttemptID eid = tdd.getExecutionAttemptId();
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setSlotSize(2).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).setConfiguration(config).setLocalCommunication(false).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
taskRunningFuture.get();
taskFailedFuture.get();
assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
}
}
}
use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.
the class TaskExecutorSubmissionTest method testLocalPartitionNotFound.
/**
* Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testLocalPartitionNotFound() throws Exception {
ResourceID producerLocation = ResourceID.generate();
NettyShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
TaskDeploymentDescriptor tdd = createReceiver(shuffleDescriptor);
ExecutionAttemptID eid = tdd.getExecutionAttemptId();
Configuration config = new Configuration();
config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(1).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture).setConfiguration(config).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
taskRunningFuture.get();
taskFailedFuture.get();
assertSame(taskSlotTable.getTask(eid).getExecutionState(), ExecutionState.FAILED);
assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
}
}
use of org.apache.flink.runtime.shuffle.NettyShuffleDescriptor in project flink by apache.
the class TaskExecutorSubmissionTest method testCancellingDependentAndStateUpdateFails.
/**
* This tests creates two tasks. The sender sends data but fails to send the state update back
* to the job manager. the second one blocks to be canceled
*/
@Test
public void testCancellingDependentAndStateUpdateFails() throws Exception {
ResourceID producerLocation = ResourceID.generate();
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
TaskDeploymentDescriptor tdd1 = createSender(sdd);
TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
if (taskExecutionState != null && taskExecutionState.getID().equals(eid1) && taskExecutionState.getExecutionState() == ExecutionState.RUNNING) {
return FutureUtils.completedExceptionally(new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
} else {
return CompletableFuture.completedFuture(Acknowledge.get());
}
}).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture).addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
task1RunningFuture.get();
taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
task2RunningFuture.get();
task1FailedFuture.get();
assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);
tmGateway.cancelTask(eid2, timeout);
task2CanceledFuture.get();
assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
}
}
Aggregations