Search in sources :

Example 1 with TaskClusterAttempt

use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.

the class JobExecutor method notifyTaskFailure.

/**
     * Indicates that a single task attempt has encountered a failure.
     * @param ta Failed Task Attempt
     * @param exceptions exeptions thrown during the failure
     */
public void notifyTaskFailure(TaskAttempt ta, List<Exception> exceptions) {
    try {
        LOGGER.fine("Received failure notification for TaskAttempt " + ta.getTaskAttemptId());
        TaskAttemptId taId = ta.getTaskAttemptId();
        TaskCluster tc = ta.getTask().getTaskCluster();
        TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
        if (lastAttempt != null && taId.getAttempt() == lastAttempt.getAttempt()) {
            LOGGER.fine("Marking TaskAttempt " + ta.getTaskAttemptId() + " as failed");
            ta.setStatus(TaskAttempt.TaskStatus.FAILED, exceptions);
            abortTaskCluster(lastAttempt, TaskClusterAttempt.TaskClusterStatus.FAILED);
            abortDoomedTaskClusters();
            if (lastAttempt.getAttempt() >= jobRun.getActivityClusterGraph().getMaxReattempts() || isCancelled()) {
                abortJob(exceptions);
                return;
            }
            startRunnableActivityClusters();
        } else {
            LOGGER.warning("Ignoring task failure notification: " + taId + " -- Current last attempt = " + lastAttempt);
        }
    } catch (Exception e) {
        abortJob(Collections.singletonList(e));
    }
}
Also used : TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) HyracksException(org.apache.hyracks.api.exceptions.HyracksException)

Example 2 with TaskClusterAttempt

use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.

the class JobExecutor method startRunnableTaskClusters.

private void startRunnableTaskClusters(Set<TaskCluster> tcRoots) throws HyracksException {
    Map<TaskCluster, Runnability> runnabilityMap = new HashMap<>();
    for (TaskCluster tc : tcRoots) {
        assignRunnabilityRank(tc, runnabilityMap);
    }
    PriorityQueue<RankedRunnableTaskCluster> queue = new PriorityQueue<>();
    for (Map.Entry<TaskCluster, Runnability> e : runnabilityMap.entrySet()) {
        TaskCluster tc = e.getKey();
        Runnability runnability = e.getValue();
        if (runnability.getTag() != Runnability.Tag.RUNNABLE) {
            continue;
        }
        int priority = runnability.getPriority();
        if (priority >= 0 && priority < Integer.MAX_VALUE) {
            queue.add(new RankedRunnableTaskCluster(priority, tc));
        }
    }
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("Ranked TCs: " + queue);
    }
    Map<String, List<TaskAttemptDescriptor>> taskAttemptMap = new HashMap<>();
    for (RankedRunnableTaskCluster rrtc : queue) {
        TaskCluster tc = rrtc.getTaskCluster();
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("Found runnable TC: " + tc);
            List<TaskClusterAttempt> attempts = tc.getAttempts();
            LOGGER.fine("Attempts so far:" + attempts.size());
            for (TaskClusterAttempt tcAttempt : attempts) {
                LOGGER.fine("Status: " + tcAttempt.getStatus());
            }
        }
        assignTaskLocations(tc, taskAttemptMap);
    }
    if (taskAttemptMap.isEmpty()) {
        return;
    }
    startTasks(taskAttemptMap);
}
Also used : HashMap(java.util.HashMap) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) PriorityQueue(java.util.PriorityQueue) Constraint(org.apache.hyracks.api.constraints.Constraint) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with TaskClusterAttempt

use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.

the class JobExecutor method findDoomedTaskClusters.

private boolean findDoomedTaskClusters(TaskCluster tc, Set<TaskCluster> doomedTaskClusters) {
    if (doomedTaskClusters.contains(tc)) {
        return true;
    }
    TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
    if (lastAttempt != null) {
        switch(lastAttempt.getStatus()) {
            case ABORTED:
            case FAILED:
            case COMPLETED:
                return false;
            default:
                break;
        }
    }
    Map<ConnectorDescriptorId, IConnectorPolicy> connectorPolicyMap = jobRun.getConnectorPolicyMap();
    PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
    boolean doomed = false;
    for (TaskCluster depTC : tc.getDependencyTaskClusters()) {
        if (findDoomedTaskClusters(depTC, doomedTaskClusters)) {
            doomed = true;
        }
    }
    for (PartitionId pid : tc.getRequiredPartitions()) {
        ConnectorDescriptorId cdId = pid.getConnectorDescriptorId();
        IConnectorPolicy cPolicy = connectorPolicyMap.get(cdId);
        PartitionState maxState = pmm.getMaximumAvailableState(pid);
        if ((maxState == null || (cPolicy.consumerWaitsForProducerToFinish() && maxState != PartitionState.COMMITTED)) && findDoomedTaskClusters(partitionProducingTaskClusterMap.get(pid), doomedTaskClusters)) {
            doomed = true;
        }
    }
    if (doomed) {
        doomedTaskClusters.add(tc);
    }
    return doomed;
}
Also used : TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) PartitionState(org.apache.hyracks.control.common.job.PartitionState) PartitionMatchMaker(org.apache.hyracks.control.cc.partitions.PartitionMatchMaker) PartitionId(org.apache.hyracks.api.partitions.PartitionId)

Example 4 with TaskClusterAttempt

use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.

the class JobExecutor method abortOngoingTaskClusters.

/**
     * Aborts ongoing task clusters.
     *
     * @param taskFilter,
     *            selects tasks that should be directly marked as failed without doing the aborting RPC.
     * @param exceptionGenerator,
     *            generates an exception for tasks that are directly marked as failed.
     */
private void abortOngoingTaskClusters(ITaskFilter taskFilter, IExceptionGenerator exceptionGenerator) throws HyracksException {
    for (ActivityCluster ac : jobRun.getActivityClusterGraph().getActivityClusterMap().values()) {
        if (!isPlanned(ac)) {
            continue;
        }
        TaskCluster[] taskClusters = getActivityClusterPlan(ac).getTaskClusters();
        if (taskClusters == null) {
            continue;
        }
        for (TaskCluster tc : taskClusters) {
            TaskClusterAttempt lastTaskClusterAttempt = findLastTaskClusterAttempt(tc);
            if (lastTaskClusterAttempt == null || !(lastTaskClusterAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED || lastTaskClusterAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING)) {
                continue;
            }
            boolean abort = false;
            for (TaskAttempt ta : lastTaskClusterAttempt.getTaskAttempts().values()) {
                assert ta.getStatus() == TaskAttempt.TaskStatus.COMPLETED || ta.getStatus() == TaskAttempt.TaskStatus.RUNNING;
                if (taskFilter.directlyMarkAsFailed(ta)) {
                    // Directly mark it as fail, without further aborting.
                    ta.setStatus(TaskAttempt.TaskStatus.FAILED, Collections.singletonList(exceptionGenerator.getException(ta)));
                    ta.setEndTime(System.currentTimeMillis());
                    abort = true;
                }
            }
            if (abort) {
                abortTaskCluster(lastTaskClusterAttempt, TaskClusterAttempt.TaskClusterStatus.ABORTED);
            }
        }
        abortDoomedTaskClusters();
    }
}
Also used : TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster)

Example 5 with TaskClusterAttempt

use of org.apache.hyracks.control.cc.job.TaskClusterAttempt in project asterixdb by apache.

the class JobExecutor method findTaskLocation.

private String findTaskLocation(TaskId tid) {
    ActivityId aid = tid.getActivityId();
    ActivityCluster ac = jobRun.getActivityClusterGraph().getActivityMap().get(aid);
    Task[] tasks = getActivityClusterPlan(ac).getActivityPlanMap().get(aid).getTasks();
    List<TaskClusterAttempt> tcAttempts = tasks[tid.getPartition()].getTaskCluster().getAttempts();
    if (tcAttempts == null || tcAttempts.isEmpty()) {
        return null;
    }
    TaskClusterAttempt lastTCA = tcAttempts.get(tcAttempts.size() - 1);
    TaskAttempt ta = lastTCA.getTaskAttempts().get(tid);
    return ta == null ? null : ta.getNodeId();
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster)

Aggregations

TaskClusterAttempt (org.apache.hyracks.control.cc.job.TaskClusterAttempt)10 TaskCluster (org.apache.hyracks.control.cc.job.TaskCluster)7 TaskAttempt (org.apache.hyracks.control.cc.job.TaskAttempt)5 ActivityCluster (org.apache.hyracks.api.job.ActivityCluster)4 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)3 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)3 TaskAttemptId (org.apache.hyracks.api.dataflow.TaskAttemptId)3 IConnectorPolicy (org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy)3 Task (org.apache.hyracks.control.cc.job.Task)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Constraint (org.apache.hyracks.api.constraints.Constraint)2 TaskId (org.apache.hyracks.api.dataflow.TaskId)2 PartitionId (org.apache.hyracks.api.partitions.PartitionId)2 PartitionMatchMaker (org.apache.hyracks.control.cc.partitions.PartitionMatchMaker)2 PartitionState (org.apache.hyracks.control.common.job.PartitionState)2 PriorityQueue (java.util.PriorityQueue)1 NetworkAddress (org.apache.hyracks.api.comm.NetworkAddress)1