use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.
the class Scheduler method handleNewSlot.
private void handleNewSlot() {
synchronized (globalLock) {
Instance instance = this.newlyAvailableInstances.poll();
if (instance == null || !instance.hasResourcesAvailable()) {
// someone else took it
return;
}
QueuedTask queued = taskQueue.peek();
if (queued != null) {
ScheduledUnit task = queued.getTask();
ExecutionVertex vertex = task.getTaskToExecute().getVertex();
try {
SimpleSlot newSlot = instance.allocateSimpleSlot(vertex.getJobId());
if (newSlot != null) {
// success, remove from the task queue and notify the future
taskQueue.poll();
if (queued.getFuture() != null) {
try {
queued.getFuture().complete(newSlot);
} catch (Throwable t) {
LOG.error("Error calling allocation future for task " + vertex.getSimpleName(), t);
task.getTaskToExecute().fail(t);
}
}
}
} catch (InstanceDiedException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Instance " + instance + " was marked dead asynchronously.");
}
removeInstance(instance);
}
} else {
this.instancesWithAvailableResources.put(instance.getTaskManagerID(), instance);
}
}
}
use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.
the class Scheduler method getNewSlotForSharingGroup.
/**
* Tries to allocate a new slot for a vertex that is part of a slot sharing group. If one
* of the instances has a slot available, the method will allocate it as a shared slot, add that
* shared slot to the sharing group, and allocate a simple slot from that shared slot.
*
* <p>This method will try to allocate a slot from one of the local instances, and fall back to
* non-local instances, if permitted.</p>
*
* @param vertex The vertex to allocate the slot for.
* @param requestedLocations The locations that are considered local. May be null or empty, if the
* vertex has no location preferences.
* @param groupAssignment The slot sharing group of the vertex. Mandatory parameter.
* @param constraint The co-location constraint of the vertex. May be null.
* @param localOnly Flag to indicate if non-local choices are acceptable.
*
* @return A sub-slot for the given vertex, or {@code null}, if no slot is available.
*/
protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, SlotSharingGroupAssignment groupAssignment, CoLocationConstraint constraint, boolean localOnly) {
// in the set-with-available-instances
while (true) {
Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null) {
// nothing is available
return null;
}
final Instance instanceToUse = instanceLocalityPair.getLeft();
final Locality locality = instanceLocalityPair.getRight();
try {
JobVertexID groupID = vertex.getJobvertexId();
// allocate a shared slot from the instance
SharedSlot sharedSlot = instanceToUse.allocateSharedSlot(vertex.getJobId(), groupAssignment);
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
}
if (sharedSlot != null) {
// add the shared slot to the assignment group and allocate a sub-slot
SimpleSlot slot = constraint == null ? groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, groupID) : groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, constraint);
if (slot != null) {
return slot;
} else {
// could not add and allocate the sub-slot, so release shared slot
sharedSlot.releaseSlot();
}
}
} catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.
the class Scheduler method getFreeSlotForTask.
/**
* Gets a suitable instance to schedule the vertex execution to.
* <p>
* NOTE: This method does is not thread-safe, it needs to be synchronized by the caller.
*
* @param vertex The task to run.
* @return The instance to run the vertex on, it {@code null}, if no instance is available.
*/
protected SimpleSlot getFreeSlotForTask(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, boolean localOnly) {
// in the set-with-available-instances
while (true) {
Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null) {
return null;
}
Instance instanceToUse = instanceLocalityPair.getLeft();
Locality locality = instanceLocalityPair.getRight();
try {
SimpleSlot slot = instanceToUse.allocateSimpleSlot(vertex.getJobId());
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
}
if (slot != null) {
slot.setLocality(locality);
return slot;
}
} catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
Aggregations