use of org.apache.flink.runtime.deployment.ResultPartitionLocation in project flink by apache.
the class TaskManagerTest method testRemotePartitionNotFound.
/**
* Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testRemotePartitionNotFound() throws Exception {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final IntermediateDataSetID resultId = new IntermediateDataSetID();
// Create the JM
ActorRef jm = system.actorOf(Props.create(new SimplePartitionStateLookupJobManagerCreator(leaderSessionID, getTestActor())));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
final int dataPort = NetUtils.getAvailablePort();
Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, dataPort);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
taskManager = TestingUtils.createTaskManager(system, jobManager, config, false, true);
// ---------------------------------------------------------------------------------
final ActorGateway tm = taskManager;
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final ResultPartitionID partitionId = new ResultPartitionID();
// Remote location (on the same TM though) for the partition
final ResultPartitionLocation loc = ResultPartitionLocation.createRemote(new ConnectionID(new InetSocketAddress("localhost", dataPort), 0));
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(partitionId, loc) };
final InputGateDeploymentDescriptor igdd = new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, new SerializedValue<>(new ExecutionConfig()), "Receiver", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.AgnosticReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(igdd), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
// Submit the task
tm.tell(new SubmitTask(tdd), testActorGateway);
expectMsgClass(Acknowledge.get().getClass());
// Wait to be notified about the final execution state by the mock JM
TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);
// The task should fail after repeated requests
assertEquals(ExecutionState.FAILED, msg.getExecutionState());
Throwable t = msg.getError(ClassLoader.getSystemClassLoader());
assertEquals("Thrown exception was not a PartitionNotFoundException: " + t.getMessage(), PartitionNotFoundException.class, t.getClass());
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.deployment.ResultPartitionLocation in project flink by apache.
the class SingleInputGate method updateInputChannel.
public void updateInputChannel(InputChannelDeploymentDescriptor icdd) throws IOException, InterruptedException {
synchronized (requestLock) {
if (isReleased) {
// There was a race with a task failure/cancel
return;
}
final IntermediateResultPartitionID partitionId = icdd.getConsumedPartitionId().getPartitionId();
InputChannel current = inputChannels.get(partitionId);
if (current.getClass() == UnknownInputChannel.class) {
UnknownInputChannel unknownChannel = (UnknownInputChannel) current;
InputChannel newChannel;
ResultPartitionLocation partitionLocation = icdd.getConsumedPartitionLocation();
if (partitionLocation.isLocal()) {
newChannel = unknownChannel.toLocalInputChannel();
} else if (partitionLocation.isRemote()) {
newChannel = unknownChannel.toRemoteInputChannel(partitionLocation.getConnectionId());
} else {
throw new IllegalStateException("Tried to update unknown channel with unknown channel.");
}
LOG.debug("Updated unknown input channel to {}.", newChannel);
inputChannels.put(partitionId, newChannel);
if (requestedPartitionsFlag) {
newChannel.requestSubpartition(consumedSubpartitionIndex);
}
for (TaskEvent event : pendingEvents) {
newChannel.sendTaskEvent(event);
}
if (--numberOfUninitializedChannels == 0) {
pendingEvents.clear();
}
}
}
}
use of org.apache.flink.runtime.deployment.ResultPartitionLocation in project flink by apache.
the class Execution method scheduleOrUpdateConsumers.
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
final int numConsumers = allConsumers.size();
if (numConsumers > 1) {
fail(new IllegalStateException("Currently, only a single consumer group per partition is supported."));
} else if (numConsumers == 0) {
return;
}
for (ExecutionEdge edge : allConsumers.get(0)) {
final ExecutionVertex consumerVertex = edge.getTarget();
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
final IntermediateResultPartition partition = edge.getSource();
// ----------------------------------------------------------------
if (consumerState == CREATED) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// When deploying a consuming task, its task deployment descriptor will contain all
// deployment information available at the respective time. It is possible that some
// of the partitions to be consumed have not been created yet. These are updated
// runtime via the update messages.
//
// TODO The current approach may send many update messages even though the consuming
// task has already been deployed with all necessary information. We have to check
// whether this is a problem and fix it, if it is.
FlinkFuture.supplyAsync(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
consumerVertex.scheduleForExecution(consumerVertex.getExecutionGraph().getSlotProvider(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
} catch (Throwable t) {
consumerVertex.fail(new IllegalStateException("Could not schedule consumer " + "vertex " + consumerVertex, t));
}
return null;
}
}, executor);
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
} else // ----------------------------------------------------------------
// Consumer is running => send update message now
// ----------------------------------------------------------------
{
if (consumerState == RUNNING) {
final SimpleSlot consumerSlot = consumer.getAssignedResource();
if (consumerSlot == null) {
// The consumer has been reset concurrently
continue;
}
final TaskManagerLocation partitionTaskManagerLocation = partition.getProducer().getCurrentAssignedResource().getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId);
final ResultPartitionLocation partitionLocation;
if (consumerTaskManager.equals(partitionTaskManager)) {
// Consuming task is deployed to the same instance as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, partition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation);
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(new PartitionInfo(partition.getIntermediateResult().getId(), descriptor)));
} else // ----------------------------------------------------------------
if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
}
}
}
}
use of org.apache.flink.runtime.deployment.ResultPartitionLocation in project flink by apache.
the class SingleInputGate method create.
// ------------------------------------------------------------------------
/**
* Creates an input gate and all of its input channels.
*/
public static SingleInputGate create(String owningTaskName, JobID jobId, ExecutionAttemptID executionId, InputGateDeploymentDescriptor igdd, NetworkEnvironment networkEnvironment, TaskActions taskActions, TaskIOMetricGroup metrics) {
final IntermediateDataSetID consumedResultId = checkNotNull(igdd.getConsumedResultId());
final ResultPartitionType consumedPartitionType = checkNotNull(igdd.getConsumedPartitionType());
final int consumedSubpartitionIndex = igdd.getConsumedSubpartitionIndex();
checkArgument(consumedSubpartitionIndex >= 0);
final InputChannelDeploymentDescriptor[] icdd = checkNotNull(igdd.getInputChannelDeploymentDescriptors());
final SingleInputGate inputGate = new SingleInputGate(owningTaskName, jobId, consumedResultId, consumedPartitionType, consumedSubpartitionIndex, icdd.length, taskActions, metrics);
// Create the input channels. There is one input channel for each consumed partition.
final InputChannel[] inputChannels = new InputChannel[icdd.length];
int numLocalChannels = 0;
int numRemoteChannels = 0;
int numUnknownChannels = 0;
for (int i = 0; i < inputChannels.length; i++) {
final ResultPartitionID partitionId = icdd[i].getConsumedPartitionId();
final ResultPartitionLocation partitionLocation = icdd[i].getConsumedPartitionLocation();
if (partitionLocation.isLocal()) {
inputChannels[i] = new LocalInputChannel(inputGate, i, partitionId, networkEnvironment.getResultPartitionManager(), networkEnvironment.getTaskEventDispatcher(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numLocalChannels++;
} else if (partitionLocation.isRemote()) {
inputChannels[i] = new RemoteInputChannel(inputGate, i, partitionId, partitionLocation.getConnectionId(), networkEnvironment.getConnectionManager(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numRemoteChannels++;
} else if (partitionLocation.isUnknown()) {
inputChannels[i] = new UnknownInputChannel(inputGate, i, partitionId, networkEnvironment.getResultPartitionManager(), networkEnvironment.getTaskEventDispatcher(), networkEnvironment.getConnectionManager(), networkEnvironment.getPartitionRequestInitialBackoff(), networkEnvironment.getPartitionRequestMaxBackoff(), metrics);
numUnknownChannels++;
} else {
throw new IllegalStateException("Unexpected partition location.");
}
inputGate.setInputChannel(partitionId.getPartitionId(), inputChannels[i]);
}
LOG.debug("Created {} input channels (local: {}, remote: {}, unknown: {}).", inputChannels.length, numLocalChannels, numRemoteChannels, numUnknownChannels);
return inputGate;
}
use of org.apache.flink.runtime.deployment.ResultPartitionLocation in project flink by apache.
the class TaskManagerTest method testLocalPartitionNotFound.
/**
* Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
*/
@Test
public void testLocalPartitionNotFound() throws Exception {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final IntermediateDataSetID resultId = new IntermediateDataSetID();
// Create the JM
ActorRef jm = system.actorOf(Props.create(new SimplePartitionStateLookupJobManagerCreator(leaderSessionID, getTestActor())));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
final Configuration config = new Configuration();
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
taskManager = TestingUtils.createTaskManager(system, jobManager, config, true, true);
// ---------------------------------------------------------------------------------
final ActorGateway tm = taskManager;
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final ResultPartitionID partitionId = new ResultPartitionID();
// Local location (on the same TM though) for the partition
final ResultPartitionLocation loc = ResultPartitionLocation.createLocal();
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(partitionId, loc) };
final InputGateDeploymentDescriptor igdd = new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, new SerializedValue<>(new ExecutionConfig()), "Receiver", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.AgnosticReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(igdd), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
new Within(new FiniteDuration(120, TimeUnit.SECONDS)) {
@Override
protected void run() {
// Submit the task
tm.tell(new SubmitTask(tdd), testActorGateway);
expectMsgClass(Acknowledge.get().getClass());
// Wait to be notified about the final execution state by the mock JM
TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);
// The task should fail after repeated requests
assertEquals(msg.getExecutionState(), ExecutionState.FAILED);
Throwable error = msg.getError(getClass().getClassLoader());
if (error.getClass() != PartitionNotFoundException.class) {
error.printStackTrace();
fail("Wrong exception: " + error.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
Aggregations