use of org.apache.flink.runtime.io.network.ConnectionID in project flink by apache.
the class InputChannelDeploymentDescriptorTest method testMixedLocalRemoteUnknownDeployment.
/**
* Tests the deployment descriptors for local, remote, and unknown partition
* locations (with lazy deployment allowed and all execution states for the
* producers).
*/
@Test
public void testMixedLocalRemoteUnknownDeployment() throws Exception {
boolean allowLazyDeployment = true;
ResourceID consumerResourceId = ResourceID.generate();
ExecutionVertex consumer = mock(ExecutionVertex.class);
SimpleSlot consumerSlot = mockSlot(consumerResourceId);
// states.
for (ExecutionState state : ExecutionState.values()) {
// Local partition
ExecutionVertex localProducer = mockExecutionVertex(state, consumerResourceId);
IntermediateResultPartition localPartition = mockPartition(localProducer);
ResultPartitionID localPartitionId = new ResultPartitionID(localPartition.getPartitionId(), localProducer.getCurrentExecutionAttempt().getAttemptId());
ExecutionEdge localEdge = new ExecutionEdge(localPartition, consumer, 0);
// Remote partition
// new resource ID
ExecutionVertex remoteProducer = mockExecutionVertex(state, ResourceID.generate());
IntermediateResultPartition remotePartition = mockPartition(remoteProducer);
ResultPartitionID remotePartitionId = new ResultPartitionID(remotePartition.getPartitionId(), remoteProducer.getCurrentExecutionAttempt().getAttemptId());
ConnectionID remoteConnectionId = new ConnectionID(remoteProducer.getCurrentAssignedResource().getTaskManagerLocation(), 0);
ExecutionEdge remoteEdge = new ExecutionEdge(remotePartition, consumer, 1);
// Unknown partition
// no assigned resource
ExecutionVertex unknownProducer = mockExecutionVertex(state, null);
IntermediateResultPartition unknownPartition = mockPartition(unknownProducer);
ResultPartitionID unknownPartitionId = new ResultPartitionID(unknownPartition.getPartitionId(), unknownProducer.getCurrentExecutionAttempt().getAttemptId());
ExecutionEdge unknownEdge = new ExecutionEdge(unknownPartition, consumer, 2);
InputChannelDeploymentDescriptor[] desc = InputChannelDeploymentDescriptor.fromEdges(new ExecutionEdge[] { localEdge, remoteEdge, unknownEdge }, consumerSlot, allowLazyDeployment);
assertEquals(3, desc.length);
// These states are allowed
if (state == ExecutionState.RUNNING || state == ExecutionState.FINISHED || state == ExecutionState.SCHEDULED || state == ExecutionState.DEPLOYING) {
// Create local or remote channels
assertEquals(localPartitionId, desc[0].getConsumedPartitionId());
assertTrue(desc[0].getConsumedPartitionLocation().isLocal());
assertNull(desc[0].getConsumedPartitionLocation().getConnectionId());
assertEquals(remotePartitionId, desc[1].getConsumedPartitionId());
assertTrue(desc[1].getConsumedPartitionLocation().isRemote());
assertEquals(remoteConnectionId, desc[1].getConsumedPartitionLocation().getConnectionId());
} else {
// Unknown (lazy deployment allowed)
assertEquals(localPartitionId, desc[0].getConsumedPartitionId());
assertTrue(desc[0].getConsumedPartitionLocation().isUnknown());
assertNull(desc[0].getConsumedPartitionLocation().getConnectionId());
assertEquals(remotePartitionId, desc[1].getConsumedPartitionId());
assertTrue(desc[1].getConsumedPartitionLocation().isUnknown());
assertNull(desc[1].getConsumedPartitionLocation().getConnectionId());
}
assertEquals(unknownPartitionId, desc[2].getConsumedPartitionId());
assertTrue(desc[2].getConsumedPartitionLocation().isUnknown());
assertNull(desc[2].getConsumedPartitionLocation().getConnectionId());
}
}
use of org.apache.flink.runtime.io.network.ConnectionID in project flink by apache.
the class PartitionRequestClientFactoryTest method testResourceReleaseAfterInterruptedConnect.
@Test
public void testResourceReleaseAfterInterruptedConnect() throws Exception {
// Latch to synchronize on the connect call.
final CountDownLatch syncOnConnect = new CountDownLatch(1);
final Tuple2<NettyServer, NettyClient> netty = createNettyServerAndClient(new NettyProtocol() {
@Override
public ChannelHandler[] getServerChannelHandlers() {
return new ChannelHandler[0];
}
@Override
public ChannelHandler[] getClientChannelHandlers() {
return new ChannelHandler[] { new CountDownLatchOnConnectHandler(syncOnConnect) };
}
});
final NettyServer server = netty.f0;
final NettyClient client = netty.f1;
final UncaughtTestExceptionHandler exceptionHandler = new UncaughtTestExceptionHandler();
try {
final PartitionRequestClientFactory factory = new PartitionRequestClientFactory(client);
final Thread connect = new Thread(new Runnable() {
@Override
public void run() {
ConnectionID serverAddress = null;
try {
serverAddress = createServerConnectionID(0);
// This triggers a connect
factory.createPartitionRequestClient(serverAddress);
} catch (Throwable t) {
if (serverAddress != null) {
factory.closeOpenChannelConnections(serverAddress);
Thread.getDefaultUncaughtExceptionHandler().uncaughtException(Thread.currentThread(), t);
} else {
t.printStackTrace();
fail("Could not create RemoteAddress for server.");
}
}
}
});
connect.setUncaughtExceptionHandler(exceptionHandler);
connect.start();
// Wait on the connect
syncOnConnect.await();
connect.interrupt();
connect.join();
// Make sure that after a failed connect all resources are cleared.
assertEquals(0, factory.getNumberOfActiveClients());
// Make sure that the interrupt exception is not swallowed
assertTrue(exceptionHandler.getErrors().size() > 0);
} finally {
if (server != null) {
server.shutdown();
}
if (client != null) {
client.shutdown();
}
}
}
use of org.apache.flink.runtime.io.network.ConnectionID in project flink by apache.
the class TaskManagerTest method testRemotePartitionNotFound.
/**
* Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testRemotePartitionNotFound() throws Exception {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final IntermediateDataSetID resultId = new IntermediateDataSetID();
// Create the JM
ActorRef jm = system.actorOf(Props.create(new SimplePartitionStateLookupJobManagerCreator(leaderSessionID, getTestActor())));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
final int dataPort = NetUtils.getAvailablePort();
Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, dataPort);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
taskManager = TestingUtils.createTaskManager(system, jobManager, config, false, true);
// ---------------------------------------------------------------------------------
final ActorGateway tm = taskManager;
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final ResultPartitionID partitionId = new ResultPartitionID();
// Remote location (on the same TM though) for the partition
final ResultPartitionLocation loc = ResultPartitionLocation.createRemote(new ConnectionID(new InetSocketAddress("localhost", dataPort), 0));
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(partitionId, loc) };
final InputGateDeploymentDescriptor igdd = new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, new SerializedValue<>(new ExecutionConfig()), "Receiver", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.AgnosticReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(igdd), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
// Submit the task
tm.tell(new SubmitTask(tdd), testActorGateway);
expectMsgClass(Acknowledge.get().getClass());
// Wait to be notified about the final execution state by the mock JM
TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);
// The task should fail after repeated requests
assertEquals(ExecutionState.FAILED, msg.getExecutionState());
Throwable t = msg.getError(ClassLoader.getSystemClassLoader());
assertEquals("Thrown exception was not a PartitionNotFoundException: " + t.getMessage(), PartitionNotFoundException.class, t.getClass());
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.io.network.ConnectionID in project flink by apache.
the class PartitionRequestClientFactoryTest method testExceptionsAreNotCached.
@Test
public void testExceptionsAreNotCached() throws Exception {
NettyTestUtil.NettyServerAndClient nettyServerAndClient = createNettyServerAndClient();
try {
final PartitionRequestClientFactory factory = new PartitionRequestClientFactory(new UnstableNettyClient(nettyServerAndClient.client(), 1), connectionReuseEnabled);
final ConnectionID connectionID = nettyServerAndClient.getConnectionID(0);
try {
factory.createPartitionRequestClient(connectionID);
fail("Expected the first request to fail.");
} catch (RemoteTransportException expected) {
// expected
}
factory.createPartitionRequestClient(connectionID);
} finally {
nettyServerAndClient.client().shutdown();
nettyServerAndClient.server().shutdown();
}
}
use of org.apache.flink.runtime.io.network.ConnectionID in project flink by apache.
the class InputChannelDeploymentDescriptor method fromEdges.
// ------------------------------------------------------------------------
/**
* Creates an input channel deployment descriptor for each partition.
*/
public static InputChannelDeploymentDescriptor[] fromEdges(ExecutionEdge[] edges, SimpleSlot consumerSlot, boolean allowLazyDeployment) throws ExecutionGraphException {
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[edges.length];
// Each edge is connected to a different result partition
for (int i = 0; i < edges.length; i++) {
final IntermediateResultPartition consumedPartition = edges[i].getSource();
final Execution producer = consumedPartition.getProducer().getCurrentExecutionAttempt();
final ExecutionState producerState = producer.getState();
final SimpleSlot producerSlot = producer.getAssignedResource();
final ResultPartitionLocation partitionLocation;
// The producing task needs to be RUNNING or already FINISHED
if (consumedPartition.isConsumable() && producerSlot != null && (producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED || producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING)) {
final TaskManagerLocation partitionTaskManagerLocation = producerSlot.getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
if (partitionTaskManager.equals(consumerTaskManager)) {
// Consuming task is deployed to the same TaskManager as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, consumedPartition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
} else if (allowLazyDeployment) {
// The producing task might not have registered the partition yet
partitionLocation = ResultPartitionLocation.createUnknown();
} else if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) {
String msg = "Trying to schedule a task whose inputs were canceled or failed. " + "The producer is in state " + producerState + ".";
throw new ExecutionGraphException(msg);
} else {
String msg = String.format("Trying to eagerly schedule a task whose inputs " + "are not ready (partition consumable? %s, producer state: %s, producer slot: %s).", consumedPartition.isConsumable(), producerState, producerSlot);
throw new ExecutionGraphException(msg);
}
final ResultPartitionID consumedPartitionId = new ResultPartitionID(consumedPartition.getPartitionId(), producer.getAttemptId());
icdd[i] = new InputChannelDeploymentDescriptor(consumedPartitionId, partitionLocation);
}
return icdd;
}
Aggregations