Examples with InstanceDiedException - org.apache.flink.runtime.instance.InstanceDiedException

Example 1 with InstanceDiedException

use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.

the class Scheduler method handleNewSlot.

private void handleNewSlot() {
    synchronized (globalLock) {
        Instance instance = this.newlyAvailableInstances.poll();
        if (instance == null || !instance.hasResourcesAvailable()) {
            // someone else took it
            return;
        }
        QueuedTask queued = taskQueue.peek();
        if (queued != null) {
            ScheduledUnit task = queued.getTask();
            ExecutionVertex vertex = task.getTaskToExecute().getVertex();
            try {
                SimpleSlot newSlot = instance.allocateSimpleSlot(vertex.getJobId());
                if (newSlot != null) {
                    // success, remove from the task queue and notify the future
                    taskQueue.poll();
                    if (queued.getFuture() != null) {
                        try {
                            queued.getFuture().complete(newSlot);
                        } catch (Throwable t) {
                            LOG.error("Error calling allocation future for task " + vertex.getSimpleName(), t);
                            task.getTaskToExecute().fail(t);
                        }
                    }
                }
            } catch (InstanceDiedException e) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Instance " + instance + " was marked dead asynchronously.");
                }
                removeInstance(instance);
            }
        } else {
            this.instancesWithAvailableResources.put(instance.getTaskManagerID(), instance);
        }
    }
}

Also used : Instance(org.apache.flink.runtime.instance.Instance) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) InstanceDiedException(org.apache.flink.runtime.instance.InstanceDiedException)

Example 2 with InstanceDiedException

use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.

the class Scheduler method getNewSlotForSharingGroup.

/**
	 * Tries to allocate a new slot for a vertex that is part of a slot sharing group. If one
	 * of the instances has a slot available, the method will allocate it as a shared slot, add that
	 * shared slot to the sharing group, and allocate a simple slot from that shared slot.
	 * 
	 * <p>This method will try to allocate a slot from one of the local instances, and fall back to
	 * non-local instances, if permitted.</p>
	 * 
	 * @param vertex The vertex to allocate the slot for.
	 * @param requestedLocations The locations that are considered local. May be null or empty, if the
	 *                           vertex has no location preferences.
	 * @param groupAssignment The slot sharing group of the vertex. Mandatory parameter.
	 * @param constraint The co-location constraint of the vertex. May be null.
	 * @param localOnly Flag to indicate if non-local choices are acceptable.
	 * 
	 * @return A sub-slot for the given vertex, or {@code null}, if no slot is available.
	 */
protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, SlotSharingGroupAssignment groupAssignment, CoLocationConstraint constraint, boolean localOnly) {
    // in the set-with-available-instances
    while (true) {
        Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
        if (instanceLocalityPair == null) {
            // nothing is available
            return null;
        }
        final Instance instanceToUse = instanceLocalityPair.getLeft();
        final Locality locality = instanceLocalityPair.getRight();
        try {
            JobVertexID groupID = vertex.getJobvertexId();
            // allocate a shared slot from the instance
            SharedSlot sharedSlot = instanceToUse.allocateSharedSlot(vertex.getJobId(), groupAssignment);
            // if the instance has further available slots, re-add it to the set of available resources.
            if (instanceToUse.hasResourcesAvailable()) {
                this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
            }
            if (sharedSlot != null) {
                // add the shared slot to the assignment group and allocate a sub-slot
                SimpleSlot slot = constraint == null ? groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, groupID) : groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, constraint);
                if (slot != null) {
                    return slot;
                } else {
                    // could not add and allocate the sub-slot, so release shared slot
                    sharedSlot.releaseSlot();
                }
            }
        } catch (InstanceDiedException e) {
            // the instance died it has not yet been propagated to this scheduler
            // remove the instance from the set of available instances
            removeInstance(instanceToUse);
        }
    // if we failed to get a slot, fall through the loop
    }
}

Also used : SharedSlot(org.apache.flink.runtime.instance.SharedSlot) Instance(org.apache.flink.runtime.instance.Instance) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) InstanceDiedException(org.apache.flink.runtime.instance.InstanceDiedException)

Example 3 with InstanceDiedException

use of org.apache.flink.runtime.instance.InstanceDiedException in project flink by apache.

the class Scheduler method getFreeSlotForTask.

/**
	 * Gets a suitable instance to schedule the vertex execution to.
	 * <p>
	 * NOTE: This method does is not thread-safe, it needs to be synchronized by the caller.
	 * 
	 * @param vertex The task to run. 
	 * @return The instance to run the vertex on, it {@code null}, if no instance is available.
	 */
protected SimpleSlot getFreeSlotForTask(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, boolean localOnly) {
    // in the set-with-available-instances
    while (true) {
        Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
        if (instanceLocalityPair == null) {
            return null;
        }
        Instance instanceToUse = instanceLocalityPair.getLeft();
        Locality locality = instanceLocalityPair.getRight();
        try {
            SimpleSlot slot = instanceToUse.allocateSimpleSlot(vertex.getJobId());
            // if the instance has further available slots, re-add it to the set of available resources.
            if (instanceToUse.hasResourcesAvailable()) {
                this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
            }
            if (slot != null) {
                slot.setLocality(locality);
                return slot;
            }
        } catch (InstanceDiedException e) {
            // the instance died it has not yet been propagated to this scheduler
            // remove the instance from the set of available instances
            removeInstance(instanceToUse);
        }
    // if we failed to get a slot, fall through the loop
    }
}

Also used : Instance(org.apache.flink.runtime.instance.Instance) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) InstanceDiedException(org.apache.flink.runtime.instance.InstanceDiedException)

Aggregations

Instance (org.apache.flink.runtime.instance.Instance)3 InstanceDiedException (org.apache.flink.runtime.instance.InstanceDiedException)3 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)1 SharedSlot (org.apache.flink.runtime.instance.SharedSlot)1 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)1