use of org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor in project flink by apache.
the class Execution method scheduleOrUpdateConsumers.
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
final int numConsumers = allConsumers.size();
if (numConsumers > 1) {
fail(new IllegalStateException("Currently, only a single consumer group per partition is supported."));
} else if (numConsumers == 0) {
return;
}
for (ExecutionEdge edge : allConsumers.get(0)) {
final ExecutionVertex consumerVertex = edge.getTarget();
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
final IntermediateResultPartition partition = edge.getSource();
// ----------------------------------------------------------------
if (consumerState == CREATED) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// When deploying a consuming task, its task deployment descriptor will contain all
// deployment information available at the respective time. It is possible that some
// of the partitions to be consumed have not been created yet. These are updated
// runtime via the update messages.
//
// TODO The current approach may send many update messages even though the consuming
// task has already been deployed with all necessary information. We have to check
// whether this is a problem and fix it, if it is.
FlinkFuture.supplyAsync(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
consumerVertex.scheduleForExecution(consumerVertex.getExecutionGraph().getSlotProvider(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
} catch (Throwable t) {
consumerVertex.fail(new IllegalStateException("Could not schedule consumer " + "vertex " + consumerVertex, t));
}
return null;
}
}, executor);
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
} else // ----------------------------------------------------------------
// Consumer is running => send update message now
// ----------------------------------------------------------------
{
if (consumerState == RUNNING) {
final SimpleSlot consumerSlot = consumer.getAssignedResource();
if (consumerSlot == null) {
// The consumer has been reset concurrently
continue;
}
final TaskManagerLocation partitionTaskManagerLocation = partition.getProducer().getCurrentAssignedResource().getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId);
final ResultPartitionLocation partitionLocation;
if (consumerTaskManager.equals(partitionTaskManager)) {
// Consuming task is deployed to the same instance as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, partition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation);
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(new PartitionInfo(partition.getIntermediateResult().getId(), descriptor)));
} else // ----------------------------------------------------------------
if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
}
}
}
}
use of org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor in project flink by apache.
the class SingleInputGate method create.
// ------------------------------------------------------------------------
/**
* Creates an input gate and all of its input channels.
*/
public static SingleInputGate create(String owningTaskName, JobID jobId, ExecutionAttemptID executionId, InputGateDeploymentDescriptor igdd, NetworkEnvironment networkEnvironment, TaskActions taskActions, TaskIOMetricGroup metrics) {
final IntermediateDataSetID consumedResultId = checkNotNull(igdd.getConsumedResultId());
final ResultPartitionType consumedPartitionType = checkNotNull(igdd.getConsumedPartitionType());
final int consumedSubpartitionIndex = igdd.getConsumedSubpartitionIndex();
checkArgument(consumedSubpartitionIndex >= 0);
final InputChannelDeploymentDescriptor[] icdd = checkNotNull(igdd.getInputChannelDeploymentDescriptors());
final SingleInputGate inputGate = new SingleInputGate(owningTaskName, jobId, consumedResultId, consumedPartitionType, consumedSubpartitionIndex, icdd.length, taskActions, metrics);
// Create the input channels. There is one input channel for each consumed partition.
final InputChannel[] inputChannels = new InputChannel[icdd.length];
int numLocalChannels = 0;
int numRemoteChannels = 0;
int numUnknownChannels = 0;
for (int i = 0; i < inputChannels.length; i++) {
final ResultPartitionID partitionId = icdd[i].getConsumedPartitionId();
final ResultPartitionLocation partitionLocation = icdd[i].getConsumedPartitionLocation();
if (partitionLocation.isLocal()) {
inputChannels[i] = new LocalInputChannel(inputGate, i, partitionId, networkEnvironment.getResultPartitionManager(), networkEnvironment.getTaskEventDispatcher(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numLocalChannels++;
} else if (partitionLocation.isRemote()) {
inputChannels[i] = new RemoteInputChannel(inputGate, i, partitionId, partitionLocation.getConnectionId(), networkEnvironment.getConnectionManager(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numRemoteChannels++;
} else if (partitionLocation.isUnknown()) {
inputChannels[i] = new UnknownInputChannel(inputGate, i, partitionId, networkEnvironment.getResultPartitionManager(), networkEnvironment.getTaskEventDispatcher(), networkEnvironment.getConnectionManager(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numUnknownChannels++;
} else {
throw new IllegalStateException("Unexpected partition location.");
}
inputGate.setInputChannel(partitionId.getPartitionId(), inputChannels[i]);
}
LOG.debug("Created {} input channels (local: {}, remote: {}, unknown: {}).", inputChannels.length, numLocalChannels, numRemoteChannels, numUnknownChannels);
return inputGate;
}
use of org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor in project flink by apache.
the class TaskManagerTest method testUpdateTaskInputPartitionsFailure.
/**
* Tests that the TaskManager sends a proper exception back to the sender if the trigger stack
* trace message fails.
*/
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
ActorGateway jobManager = null;
ActorGateway taskManager = null;
try {
final ExecutionAttemptID executionAttemptId = new ExecutionAttemptID();
ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(new JobID(), "test job", new JobVertexID(), executionAttemptId, new SerializedValue<>(new ExecutionConfig()), "test task", 1, 0, 1, 0, new Configuration(), new Configuration(), BlockingNoOpInvokable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);
Await.result(submitResponse, timeout);
Future<Object> partitionUpdateResponse = taskManager.ask(new TaskMessages.UpdateTaskSinglePartitionInfo(executionAttemptId, new IntermediateDataSetID(), new InputChannelDeploymentDescriptor(new ResultPartitionID(), ResultPartitionLocation.createLocal())), timeout);
try {
Await.result(partitionUpdateResponse, timeout);
fail("The update task input partitions message should have failed.");
} catch (Exception e) {
// expected
}
} finally {
TestingUtils.stopActor(jobManager);
TestingUtils.stopActor(taskManager);
}
}
use of org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor in project flink by apache.
the class TaskManagerTest method testRunJobWithForwardChannel.
@Test
public void testRunJobWithForwardChannel() {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final JobID jid = new JobID();
JobVertexID vid1 = new JobVertexID();
JobVertexID vid2 = new JobVertexID();
final ExecutionAttemptID eid1 = new ExecutionAttemptID();
final ExecutionAttemptID eid2 = new ExecutionAttemptID();
ActorRef jm = system.actorOf(Props.create(new SimpleLookupJobManagerCreator(leaderSessionID)));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
final ActorGateway tm = taskManager;
IntermediateResultPartitionID partitionId = new IntermediateResultPartitionID();
List<ResultPartitionDeploymentDescriptor> irpdd = new ArrayList<ResultPartitionDeploymentDescriptor>();
irpdd.add(new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), partitionId, ResultPartitionType.PIPELINED, 1, 1, true));
InputGateDeploymentDescriptor ircdd = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(new ResultPartitionID(partitionId, eid1), ResultPartitionLocation.createLocal()) });
final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid, "TestJob", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.Sender.class.getName(), irpdd, Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid, "TestJob", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "Receiver", 7, 2, 7, 0, new Configuration(), new Configuration(), Tasks.Receiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(ircdd), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
try {
Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
// submit the sender task
tm.tell(new SubmitTask(tdd1), testActorGateway);
expectMsgEquals(Acknowledge.get());
// wait until the sender task is running
Await.ready(t1Running, d);
// only now (after the sender is running), submit the receiver task
tm.tell(new SubmitTask(tdd2), testActorGateway);
expectMsgEquals(Acknowledge.get());
// wait until the receiver task is running
Await.ready(t2Running, d);
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
Map<ExecutionAttemptID, Task> tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
Task t1 = tasks.get(eid1);
Task t2 = tasks.get(eid2);
// we get to the check, so we need to guard the check
if (t1 != null) {
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
Await.ready(response, d);
}
if (t2 != null) {
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
Await.ready(response, d);
assertEquals(ExecutionState.FINISHED, t2.getExecutionState());
}
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
assertEquals(0, tasks.size());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor in project flink by apache.
the class TaskManagerTest method testLocalPartitionNotFound.
/**
* Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testLocalPartitionNotFound() throws Exception {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final IntermediateDataSetID resultId = new IntermediateDataSetID();
// Create the JM
ActorRef jm = system.actorOf(Props.create(new SimplePartitionStateLookupJobManagerCreator(leaderSessionID, getTestActor())));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
final Configuration config = new Configuration();
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
taskManager = TestingUtils.createTaskManager(system, jobManager, config, true, true);
// ---------------------------------------------------------------------------------
final ActorGateway tm = taskManager;
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final ResultPartitionID partitionId = new ResultPartitionID();
// Local location (on the same TM though) for the partition
final ResultPartitionLocation loc = ResultPartitionLocation.createLocal();
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(partitionId, loc) };
final InputGateDeploymentDescriptor igdd = new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, new SerializedValue<>(new ExecutionConfig()), "Receiver", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.AgnosticReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(igdd), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
new Within(new FiniteDuration(120, TimeUnit.SECONDS)) {
@Override
protected void run() {
// Submit the task
tm.tell(new SubmitTask(tdd), testActorGateway);
expectMsgClass(Acknowledge.get().getClass());
// Wait to be notified about the final execution state by the mock JM
TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);
// The task should fail after repeated requests
assertEquals(msg.getExecutionState(), ExecutionState.FAILED);
Throwable error = msg.getError(getClass().getClassLoader());
if (error.getClass() != PartitionNotFoundException.class) {
error.printStackTrace();
fail("Wrong exception: " + error.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
Aggregations