use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class InputChannelDeploymentDescriptor method fromEdges.
// ------------------------------------------------------------------------
/**
* Creates an input channel deployment descriptor for each partition.
*/
public static InputChannelDeploymentDescriptor[] fromEdges(ExecutionEdge[] edges, SimpleSlot consumerSlot, boolean allowLazyDeployment) throws ExecutionGraphException {
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final InputChannelDeploymentDescriptor[] icdd = new InputChannelDeploymentDescriptor[edges.length];
// Each edge is connected to a different result partition
for (int i = 0; i < edges.length; i++) {
final IntermediateResultPartition consumedPartition = edges[i].getSource();
final Execution producer = consumedPartition.getProducer().getCurrentExecutionAttempt();
final ExecutionState producerState = producer.getState();
final SimpleSlot producerSlot = producer.getAssignedResource();
final ResultPartitionLocation partitionLocation;
// The producing task needs to be RUNNING or already FINISHED
if (consumedPartition.isConsumable() && producerSlot != null && (producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED || producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING)) {
final TaskManagerLocation partitionTaskManagerLocation = producerSlot.getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
if (partitionTaskManager.equals(consumerTaskManager)) {
// Consuming task is deployed to the same TaskManager as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, consumedPartition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
} else if (allowLazyDeployment) {
// The producing task might not have registered the partition yet
partitionLocation = ResultPartitionLocation.createUnknown();
} else if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) {
String msg = "Trying to schedule a task whose inputs were canceled or failed. " + "The producer is in state " + producerState + ".";
throw new ExecutionGraphException(msg);
} else {
String msg = String.format("Trying to eagerly schedule a task whose inputs " + "are not ready (partition consumable? %s, producer state: %s, producer slot: %s).", consumedPartition.isConsumable(), producerState, producerSlot);
throw new ExecutionGraphException(msg);
}
final ResultPartitionID consumedPartitionId = new ResultPartitionID(consumedPartition.getPartitionId(), producer.getAttemptId());
icdd[i] = new InputChannelDeploymentDescriptor(consumedPartitionId, partitionLocation);
}
return icdd;
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class Execution method triggerCheckpoint.
/**
* Trigger a new checkpoint on the task of this execution.
*
* @param checkpointId of th checkpoint to trigger
* @param timestamp of the checkpoint to trigger
* @param checkpointOptions of the checkpoint to trigger
*/
public void triggerCheckpoint(long checkpointId, long timestamp, CheckpointOptions checkpointOptions) {
final SimpleSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
taskManagerGateway.triggerCheckpoint(attemptId, getVertex().getJobId(), checkpointId, timestamp, checkpointOptions);
} else {
LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running.");
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class Execution method scheduleOrUpdateConsumers.
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
final int numConsumers = allConsumers.size();
if (numConsumers > 1) {
fail(new IllegalStateException("Currently, only a single consumer group per partition is supported."));
} else if (numConsumers == 0) {
return;
}
for (ExecutionEdge edge : allConsumers.get(0)) {
final ExecutionVertex consumerVertex = edge.getTarget();
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
final IntermediateResultPartition partition = edge.getSource();
// ----------------------------------------------------------------
if (consumerState == CREATED) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// When deploying a consuming task, its task deployment descriptor will contain all
// deployment information available at the respective time. It is possible that some
// of the partitions to be consumed have not been created yet. These are updated
// runtime via the update messages.
//
// TODO The current approach may send many update messages even though the consuming
// task has already been deployed with all necessary information. We have to check
// whether this is a problem and fix it, if it is.
FlinkFuture.supplyAsync(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
consumerVertex.scheduleForExecution(consumerVertex.getExecutionGraph().getSlotProvider(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
} catch (Throwable t) {
consumerVertex.fail(new IllegalStateException("Could not schedule consumer " + "vertex " + consumerVertex, t));
}
return null;
}
}, executor);
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
} else // ----------------------------------------------------------------
// Consumer is running => send update message now
// ----------------------------------------------------------------
{
if (consumerState == RUNNING) {
final SimpleSlot consumerSlot = consumer.getAssignedResource();
if (consumerSlot == null) {
// The consumer has been reset concurrently
continue;
}
final TaskManagerLocation partitionTaskManagerLocation = partition.getProducer().getCurrentAssignedResource().getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId);
final ResultPartitionLocation partitionLocation;
if (consumerTaskManager.equals(partitionTaskManager)) {
// Consuming task is deployed to the same instance as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, partition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation);
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(new PartitionInfo(partition.getIntermediateResult().getId(), descriptor)));
} else // ----------------------------------------------------------------
if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
}
}
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class Execution method sendFailIntermediateResultPartitionsRpcCall.
private void sendFailIntermediateResultPartitionsRpcCall() {
final SimpleSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
// TODO For some tests this could be a problem when querying too early if all resources were released
taskManagerGateway.failPartition(attemptId);
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class Scheduler method getNewSlotForSharingGroup.
/**
* Tries to allocate a new slot for a vertex that is part of a slot sharing group. If one
* of the instances has a slot available, the method will allocate it as a shared slot, add that
* shared slot to the sharing group, and allocate a simple slot from that shared slot.
*
* <p>This method will try to allocate a slot from one of the local instances, and fall back to
* non-local instances, if permitted.</p>
*
* @param vertex The vertex to allocate the slot for.
* @param requestedLocations The locations that are considered local. May be null or empty, if the
* vertex has no location preferences.
* @param groupAssignment The slot sharing group of the vertex. Mandatory parameter.
* @param constraint The co-location constraint of the vertex. May be null.
* @param localOnly Flag to indicate if non-local choices are acceptable.
*
* @return A sub-slot for the given vertex, or {@code null}, if no slot is available.
*/
protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, SlotSharingGroupAssignment groupAssignment, CoLocationConstraint constraint, boolean localOnly) {
// in the set-with-available-instances
while (true) {
Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null) {
// nothing is available
return null;
}
final Instance instanceToUse = instanceLocalityPair.getLeft();
final Locality locality = instanceLocalityPair.getRight();
try {
JobVertexID groupID = vertex.getJobvertexId();
// allocate a shared slot from the instance
SharedSlot sharedSlot = instanceToUse.allocateSharedSlot(vertex.getJobId(), groupAssignment);
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
}
if (sharedSlot != null) {
// add the shared slot to the assignment group and allocate a sub-slot
SimpleSlot slot = constraint == null ? groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, groupID) : groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, constraint);
if (slot != null) {
return slot;
} else {
// could not add and allocate the sub-slot, so release shared slot
sharedSlot.releaseSlot();
}
}
} catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
Aggregations