use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class Execution method scheduleOrUpdateConsumers.
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
final int numConsumers = allConsumers.size();
if (numConsumers > 1) {
fail(new IllegalStateException("Currently, only a single consumer group per partition is supported."));
} else if (numConsumers == 0) {
return;
}
for (ExecutionEdge edge : allConsumers.get(0)) {
final ExecutionVertex consumerVertex = edge.getTarget();
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
final IntermediateResultPartition partition = edge.getSource();
// ----------------------------------------------------------------
if (consumerState == CREATED) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// When deploying a consuming task, its task deployment descriptor will contain all
// deployment information available at the respective time. It is possible that some
// of the partitions to be consumed have not been created yet. These are updated
// runtime via the update messages.
//
// TODO The current approach may send many update messages even though the consuming
// task has already been deployed with all necessary information. We have to check
// whether this is a problem and fix it, if it is.
FlinkFuture.supplyAsync(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
consumerVertex.scheduleForExecution(consumerVertex.getExecutionGraph().getSlotProvider(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
} catch (Throwable t) {
consumerVertex.fail(new IllegalStateException("Could not schedule consumer " + "vertex " + consumerVertex, t));
}
return null;
}
}, executor);
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
} else // ----------------------------------------------------------------
// Consumer is running => send update message now
// ----------------------------------------------------------------
{
if (consumerState == RUNNING) {
final SimpleSlot consumerSlot = consumer.getAssignedResource();
if (consumerSlot == null) {
// The consumer has been reset concurrently
continue;
}
final TaskManagerLocation partitionTaskManagerLocation = partition.getProducer().getCurrentAssignedResource().getTaskManagerLocation();
final ResourceID partitionTaskManager = partitionTaskManagerLocation.getResourceID();
final ResourceID consumerTaskManager = consumerSlot.getTaskManagerID();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId);
final ResultPartitionLocation partitionLocation;
if (consumerTaskManager.equals(partitionTaskManager)) {
// Consuming task is deployed to the same instance as the partition => local
partitionLocation = ResultPartitionLocation.createLocal();
} else {
// Different instances => remote
final ConnectionID connectionId = new ConnectionID(partitionTaskManagerLocation, partition.getIntermediateResult().getConnectionIndex());
partitionLocation = ResultPartitionLocation.createRemote(connectionId);
}
final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation);
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(new PartitionInfo(partition.getIntermediateResult().getId(), descriptor)));
} else // ----------------------------------------------------------------
if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();
consumerVertex.cachePartitionInfo(PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));
// double check to resolve race conditions
if (consumerVertex.getExecutionState() == RUNNING) {
consumerVertex.sendPartitionInfos();
}
}
}
}
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class SlotPool method offerSlot.
/**
* Slot offering by TaskManager with AllocationID. The AllocationID is originally generated by this pool and
* transfer through the ResourceManager to TaskManager. We use it to distinguish the different allocation
* we issued. Slot offering may be rejected if we find something mismatching or there is actually no pending
* request waiting for this slot (maybe fulfilled by some other returned slot).
*
* @param slot The offered slot
* @return True if we accept the offering
*/
@RpcMethod
public boolean offerSlot(final AllocatedSlot slot) {
validateRunsInMainThread();
// check if this TaskManager is valid
final ResourceID resourceID = slot.getTaskManagerId();
final AllocationID allocationID = slot.getSlotAllocationId();
if (!registeredTaskManagers.contains(resourceID)) {
LOG.debug("Received outdated slot offering [{}] from unregistered TaskManager: {}", slot.getSlotAllocationId(), slot);
return false;
}
// check whether we have already using this slot
if (allocatedSlots.contains(allocationID) || availableSlots.contains(allocationID)) {
LOG.debug("Received repeated offer for slot [{}]. Ignoring.", allocationID);
// and mark the offering as a success
return true;
}
// check whether we have request waiting for this slot
PendingRequest pendingRequest = pendingRequests.remove(allocationID);
if (pendingRequest != null) {
// we were waiting for this!
SimpleSlot resultSlot = createSimpleSlot(slot, Locality.UNKNOWN);
pendingRequest.future().complete(resultSlot);
allocatedSlots.add(resultSlot);
} else {
// we were actually not waiting for this:
// - could be that this request had been fulfilled
// - we are receiving the slots from TaskManagers after becoming leaders
availableSlots.add(slot, clock.relativeTimeMillis());
}
// too long and timed out
return true;
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class SlotSharingGroupAssignment method releaseSimpleSlot.
// ------------------------------------------------------------------------
// Slot releasing
// ------------------------------------------------------------------------
/**
* Releases the simple slot from the assignment group.
*
* @param simpleSlot The SimpleSlot to be released
*/
void releaseSimpleSlot(SimpleSlot simpleSlot) {
synchronized (lock) {
// that the releasing is in progress
if (simpleSlot.markCancelled()) {
// sanity checks
if (simpleSlot.isAlive()) {
throw new IllegalStateException("slot is still alive");
}
// check whether the slot is already released
if (simpleSlot.markReleased()) {
LOG.debug("Release simple slot {}.", simpleSlot);
AbstractID groupID = simpleSlot.getGroupID();
SharedSlot parent = simpleSlot.getParent();
// if we have a group ID, then our parent slot is tracked here
if (groupID != null && !allSlots.contains(parent)) {
throw new IllegalArgumentException("Slot was not associated with this SlotSharingGroup before.");
}
int parentRemaining = parent.removeDisposedChildSlot(simpleSlot);
if (parentRemaining > 0) {
if (groupID != null) {
// if we have a group ID, then our parent becomes available
// for that group again. otherwise, the slot is part of a
// co-location group and nothing becomes immediately available
Map<ResourceID, List<SharedSlot>> slotsForJid = availableSlotsPerJid.get(groupID);
// sanity check
if (slotsForJid == null) {
throw new IllegalStateException("Trying to return a slot for group " + groupID + " when available slots indicated that all slots were available.");
}
putIntoMultiMap(slotsForJid, parent.getTaskManagerID(), parent);
}
} else {
// the parent shared slot is now empty and can be released
parent.markCancelled();
internalDisposeEmptySharedSlot(parent);
}
}
}
}
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class SlotSharingGroupAssignment method getSlotForTaskInternal.
private Tuple2<SharedSlot, Locality> getSlotForTaskInternal(AbstractID groupId, Iterable<TaskManagerLocation> preferredLocations, boolean localOnly) {
// check if there is anything at all in this group assignment
if (allSlots.isEmpty()) {
return null;
}
// get the available slots for the group
Map<ResourceID, List<SharedSlot>> slotsForGroup = availableSlotsPerJid.get(groupId);
if (slotsForGroup == null) {
// we have a new group, so all slots are available
slotsForGroup = new LinkedHashMap<>();
availableSlotsPerJid.put(groupId, slotsForGroup);
for (SharedSlot availableSlot : allSlots) {
putIntoMultiMap(slotsForGroup, availableSlot.getTaskManagerID(), availableSlot);
}
} else if (slotsForGroup.isEmpty()) {
// the group exists, but nothing is available for that group
return null;
}
// check whether we can schedule the task to a preferred location
boolean didNotGetPreferred = false;
if (preferredLocations != null) {
for (TaskManagerLocation location : preferredLocations) {
// set the flag that we failed a preferred location. If one will be found,
// we return early anyways and skip the flag evaluation
didNotGetPreferred = true;
SharedSlot slot = removeFromMultiMap(slotsForGroup, location.getResourceID());
if (slot != null && slot.isAlive()) {
return new Tuple2<>(slot, Locality.LOCAL);
}
}
}
// if we want only local assignments, exit now with a "not found" result
if (didNotGetPreferred && localOnly) {
return null;
}
Locality locality = didNotGetPreferred ? Locality.NON_LOCAL : Locality.UNCONSTRAINED;
// schedule the task to any available location
SharedSlot slot;
while ((slot = pollFromMultiMap(slotsForGroup)) != null) {
if (slot.isAlive()) {
return new Tuple2<>(slot, locality);
}
}
// nothing available after all, all slots were dead
return null;
}
use of org.apache.flink.runtime.clusterframework.types.ResourceID in project flink by apache.
the class SlotSharingGroupAssignment method addSharedSlotAndAllocateSubSlot.
private SimpleSlot addSharedSlotAndAllocateSubSlot(SharedSlot sharedSlot, Locality locality, JobVertexID groupId, CoLocationConstraint constraint) {
// sanity checks
if (!sharedSlot.isRootAndEmpty()) {
throw new IllegalArgumentException("The given slot is not an empty root slot.");
}
final ResourceID location = sharedSlot.getTaskManagerID();
synchronized (lock) {
// early out in case that the slot died (instance disappeared)
if (!sharedSlot.isAlive()) {
return null;
}
// add to the total bookkeeping
if (!allSlots.add(sharedSlot)) {
throw new IllegalArgumentException("Slot was already contained in the assignment group");
}
SimpleSlot subSlot;
AbstractID groupIdForMap;
if (constraint == null) {
// allocate us a sub slot to return
subSlot = sharedSlot.allocateSubSlot(groupId);
groupIdForMap = groupId;
} else {
// sanity check
if (constraint.isAssignedAndAlive()) {
throw new IllegalStateException("Trying to add a shared slot to a co-location constraint that has a life slot.");
}
// we need a co-location slot --> a SimpleSlot nested in a SharedSlot to
// host other co-located tasks
SharedSlot constraintGroupSlot = sharedSlot.allocateSharedSlot(constraint.getGroupId());
groupIdForMap = constraint.getGroupId();
if (constraintGroupSlot != null) {
// the sub-slots in the co-location constraint slot have no own group IDs
subSlot = constraintGroupSlot.allocateSubSlot(null);
if (subSlot != null) {
// all went well, we can give the constraint its slot
constraint.setSharedSlot(constraintGroupSlot);
// NOTE: Do not lock the location constraint, because we don't yet know whether we will
// take the slot here
} else {
// if we could not create a sub slot, release the co-location slot
// note that this does implicitly release the slot we have just added
// as well, because we release its last child slot. That is expected
// and desired.
constraintGroupSlot.releaseSlot();
}
} else {
// this should not happen, as we are under the lock that also
// guards slot disposals. Keep the check to be on the safe side
subSlot = null;
}
}
if (subSlot != null) {
// preserve the locality information
subSlot.setLocality(locality);
// let the other groups know that this slot exists and that they
// can place a task into this slot.
boolean entryForNewJidExists = false;
for (Map.Entry<AbstractID, Map<ResourceID, List<SharedSlot>>> entry : availableSlotsPerJid.entrySet()) {
// there is already an entry for this groupID
if (entry.getKey().equals(groupIdForMap)) {
entryForNewJidExists = true;
continue;
}
Map<ResourceID, List<SharedSlot>> available = entry.getValue();
putIntoMultiMap(available, location, sharedSlot);
}
// make sure an empty entry exists for this group, if no other entry exists
if (!entryForNewJidExists) {
availableSlotsPerJid.put(groupIdForMap, new LinkedHashMap<ResourceID, List<SharedSlot>>());
}
return subSlot;
} else {
// This should be a rare case, since this method is called with a fresh slot.
return null;
}
}
// end synchronized (lock)
}
Aggregations