Search in sources :

Example 1 with TaskSlot

use of org.apache.flink.runtime.taskexecutor.slot.TaskSlot in project flink by apache.

the class TaskExecutor method offerSlotsToJobManager.

// ------------------------------------------------------------------------
//  Internal job manager connection methods
// ------------------------------------------------------------------------
private void offerSlotsToJobManager(final JobID jobId) {
    final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
    if (jobManagerConnection == null) {
        log.debug("There is no job manager connection to the leader of job {}.", jobId);
    } else {
        if (taskSlotTable.hasAllocatedSlots(jobId)) {
            log.info("Offer reserved slots to the leader of job {}.", jobId);
            final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();
            final Iterator<TaskSlot> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId);
            final UUID leaderId = jobManagerConnection.getLeaderId();
            final Collection<SlotOffer> reservedSlots = new HashSet<>(2);
            while (reservedSlotsIterator.hasNext()) {
                SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
                try {
                    if (!taskSlotTable.markSlotActive(offer.getAllocationId())) {
                        // the slot is either free or releasing at the moment
                        final String message = "Could not mark slot " + jobId + " active.";
                        log.debug(message);
                        jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message));
                    }
                } catch (SlotNotFoundException e) {
                    final String message = "Could not mark slot " + jobId + " active.";
                    jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message));
                    continue;
                }
                reservedSlots.add(offer);
            }
            Future<Iterable<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots(getResourceID(), reservedSlots, leaderId, taskManagerConfiguration.getTimeout());
            acceptedSlotsFuture.thenAcceptAsync(new AcceptFunction<Iterable<SlotOffer>>() {

                @Override
                public void accept(Iterable<SlotOffer> acceptedSlots) {
                    // check if the response is still valid
                    if (isJobManagerConnectionValid(jobId, leaderId)) {
                        // mark accepted slots active
                        for (SlotOffer acceptedSlot : acceptedSlots) {
                            reservedSlots.remove(acceptedSlot);
                        }
                        final Exception e = new Exception("The slot was rejected by the JobManager.");
                        for (SlotOffer rejectedSlot : reservedSlots) {
                            freeSlot(rejectedSlot.getAllocationId(), e);
                        }
                    } else {
                        // discard the response since there is a new leader for the job
                        log.debug("Discard offer slot response since there is a new leader " + "for the job {}.", jobId);
                    }
                }
            }, getMainThreadExecutor());
            acceptedSlotsFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

                @Override
                public Void apply(Throwable throwable) {
                    if (throwable instanceof TimeoutException) {
                        // We ran into a timeout. Try again.
                        offerSlotsToJobManager(jobId);
                    } else {
                        // We encountered an exception. Free the slots and return them to the RM.
                        for (SlotOffer reservedSlot : reservedSlots) {
                            freeSlot(reservedSlot.getAllocationId(), throwable);
                        }
                    }
                    return null;
                }
            }, getMainThreadExecutor());
        } else {
            log.debug("There are no unassigned slots for the job {}.", jobId);
        }
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) TaskSlot(org.apache.flink.runtime.taskexecutor.slot.TaskSlot) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) TimeoutException(java.util.concurrent.TimeoutException) PartitionException(org.apache.flink.runtime.taskexecutor.exceptions.PartitionException) CheckpointException(org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) TaskException(org.apache.flink.runtime.taskexecutor.exceptions.TaskException) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) IOException(java.io.IOException) UUID(java.util.UUID) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with TaskSlot

use of org.apache.flink.runtime.taskexecutor.slot.TaskSlot in project flink by apache.

the class TaskExecutor method internalOfferSlotsToJobManager.

private void internalOfferSlotsToJobManager(JobTable.Connection jobManagerConnection) {
    final JobID jobId = jobManagerConnection.getJobId();
    if (taskSlotTable.hasAllocatedSlots(jobId)) {
        log.info("Offer reserved slots to the leader of job {}.", jobId);
        final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();
        final Iterator<TaskSlot<Task>> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId);
        final JobMasterId jobMasterId = jobManagerConnection.getJobMasterId();
        final Collection<SlotOffer> reservedSlots = new HashSet<>(2);
        while (reservedSlotsIterator.hasNext()) {
            SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
            reservedSlots.add(offer);
        }
        final UUID slotOfferId = UUID.randomUUID();
        currentSlotOfferPerJob.put(jobId, slotOfferId);
        CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots(getResourceID(), reservedSlots, taskManagerConfiguration.getRpcTimeout());
        acceptedSlotsFuture.whenCompleteAsync(handleAcceptedSlotOffers(jobId, jobMasterGateway, jobMasterId, reservedSlots, slotOfferId), getMainThreadExecutor());
    } else {
        log.debug("There are no unassigned slots for the job {}.", jobId);
    }
}
Also used : SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) TaskSlot(org.apache.flink.runtime.taskexecutor.slot.TaskSlot) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Collection(java.util.Collection) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet)

Example 3 with TaskSlot

use of org.apache.flink.runtime.taskexecutor.slot.TaskSlot in project flink by apache.

the class TaskExecutor method freeInactiveSlots.

@Override
public void freeInactiveSlots(JobID jobId, Time timeout) {
    log.debug("Freeing inactive slots for job {}.", jobId);
    // need a copy to prevent ConcurrentModificationExceptions
    final ImmutableList<TaskSlot<Task>> inactiveSlots = ImmutableList.copyOf(taskSlotTable.getAllocatedSlots(jobId));
    for (TaskSlot<Task> slot : inactiveSlots) {
        freeSlotInternal(slot.getAllocationId(), new FlinkException("Slot was re-claimed by resource manager."));
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) TaskSlot(org.apache.flink.runtime.taskexecutor.slot.TaskSlot) FlinkException(org.apache.flink.util.FlinkException)

Aggregations

TaskSlot (org.apache.flink.runtime.taskexecutor.slot.TaskSlot)3 HashSet (java.util.HashSet)2 UUID (java.util.UUID)2 JobMasterGateway (org.apache.flink.runtime.jobmaster.JobMasterGateway)2 SlotOffer (org.apache.flink.runtime.taskexecutor.slot.SlotOffer)2 IOException (java.io.IOException)1 Collection (java.util.Collection)1 TimeoutException (java.util.concurrent.TimeoutException)1 JobID (org.apache.flink.api.common.JobID)1 JobMasterId (org.apache.flink.runtime.jobmaster.JobMasterId)1 CheckpointException (org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException)1 PartitionException (org.apache.flink.runtime.taskexecutor.exceptions.PartitionException)1 SlotAllocationException (org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException)1 TaskException (org.apache.flink.runtime.taskexecutor.exceptions.TaskException)1 TaskSubmissionException (org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException)1 SlotNotActiveException (org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException)1 SlotNotFoundException (org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException)1 Task (org.apache.flink.runtime.taskmanager.Task)1 FlinkException (org.apache.flink.util.FlinkException)1