Search in sources :

Example 1 with TaskAttemptId

use of org.apache.hyracks.api.dataflow.TaskAttemptId in project asterixdb by apache.

the class CCNCFunctions method readTaskAttemptId.

private static TaskAttemptId readTaskAttemptId(DataInputStream dis) throws IOException {
    int odid = dis.readInt();
    int aid = dis.readInt();
    int partition = dis.readInt();
    int attempt = dis.readInt();
    TaskAttemptId taId = new TaskAttemptId(new TaskId(new ActivityId(new OperatorDescriptorId(odid), aid), partition), attempt);
    return taId;
}
Also used : OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) ActivityId(org.apache.hyracks.api.dataflow.ActivityId)

Example 2 with TaskAttemptId

use of org.apache.hyracks.api.dataflow.TaskAttemptId in project asterixdb by apache.

the class AbortTasksWork method run.

@Override
public void run() {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Aborting Tasks: " + jobId + ":" + tasks);
    }
    IDatasetPartitionManager dpm = ncs.getDatasetPartitionManager();
    if (dpm != null) {
        ncs.getDatasetPartitionManager().abortReader(jobId);
    }
    Joblet ji = ncs.getJobletMap().get(jobId);
    if (ji != null) {
        Map<TaskAttemptId, Task> taskMap = ji.getTaskMap();
        for (TaskAttemptId taId : tasks) {
            Task task = taskMap.get(taId);
            if (task != null) {
                task.abort();
            }
        }
    } else {
        LOGGER.log(Level.WARNING, "Joblet couldn't be found. Tasks of job " + jobId + " have all either completed or failed");
    }
}
Also used : IDatasetPartitionManager(org.apache.hyracks.api.dataset.IDatasetPartitionManager) Task(org.apache.hyracks.control.nc.Task) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) Joblet(org.apache.hyracks.control.nc.Joblet)

Example 3 with TaskAttemptId

use of org.apache.hyracks.api.dataflow.TaskAttemptId in project asterixdb by apache.

the class JobExecutor method abortTaskCluster.

private void abortTaskCluster(TaskClusterAttempt tcAttempt, TaskClusterAttempt.TaskClusterStatus failedOrAbortedStatus) {
    LOGGER.fine("Aborting task cluster: " + tcAttempt.getAttempt());
    Set<TaskAttemptId> abortTaskIds = new HashSet<>();
    Map<String, List<TaskAttemptId>> abortTaskAttemptMap = new HashMap<>();
    for (TaskAttempt ta : tcAttempt.getTaskAttempts().values()) {
        TaskAttemptId taId = ta.getTaskAttemptId();
        TaskAttempt.TaskStatus status = ta.getStatus();
        abortTaskIds.add(taId);
        LOGGER.fine("Checking " + taId + ": " + ta.getStatus());
        if (status == TaskAttempt.TaskStatus.RUNNING || status == TaskAttempt.TaskStatus.COMPLETED) {
            ta.setStatus(TaskAttempt.TaskStatus.ABORTED, null);
            ta.setEndTime(System.currentTimeMillis());
            List<TaskAttemptId> abortTaskAttempts = abortTaskAttemptMap.get(ta.getNodeId());
            if (status == TaskAttempt.TaskStatus.RUNNING && abortTaskAttempts == null) {
                abortTaskAttempts = new ArrayList<>();
                abortTaskAttemptMap.put(ta.getNodeId(), abortTaskAttempts);
            }
            if (status == TaskAttempt.TaskStatus.RUNNING) {
                abortTaskAttempts.add(taId);
            }
        }
    }
    final JobId jobId = jobRun.getJobId();
    LOGGER.fine("Abort map for job: " + jobId + ": " + abortTaskAttemptMap);
    INodeManager nodeManager = ccs.getNodeManager();
    for (Map.Entry<String, List<TaskAttemptId>> entry : abortTaskAttemptMap.entrySet()) {
        final NodeControllerState node = nodeManager.getNodeControllerState(entry.getKey());
        final List<TaskAttemptId> abortTaskAttempts = entry.getValue();
        if (node != null) {
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Aborting: " + abortTaskAttempts + " at " + entry.getKey());
            }
            try {
                node.getNodeController().abortTasks(jobId, abortTaskAttempts);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, e.getMessage(), e);
            }
        }
    }
    inProgressTaskClusters.remove(tcAttempt.getTaskCluster());
    TaskCluster tc = tcAttempt.getTaskCluster();
    PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
    pmm.removeUncommittedPartitions(tc.getProducedPartitions(), abortTaskIds);
    pmm.removePartitionRequests(tc.getRequiredPartitions(), abortTaskIds);
    tcAttempt.setStatus(failedOrAbortedStatus);
    tcAttempt.setEndTime(System.currentTimeMillis());
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) HashMap(java.util.HashMap) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) PartitionMatchMaker(org.apache.hyracks.control.cc.partitions.PartitionMatchMaker) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) ArrayList(java.util.ArrayList) List(java.util.List) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) HashMap(java.util.HashMap) Map(java.util.Map) JobId(org.apache.hyracks.api.job.JobId) HashSet(java.util.HashSet)

Example 4 with TaskAttemptId

use of org.apache.hyracks.api.dataflow.TaskAttemptId in project asterixdb by apache.

the class JobExecutor method notifyTaskFailure.

/**
     * Indicates that a single task attempt has encountered a failure.
     * @param ta Failed Task Attempt
     * @param exceptions exeptions thrown during the failure
     */
public void notifyTaskFailure(TaskAttempt ta, List<Exception> exceptions) {
    try {
        LOGGER.fine("Received failure notification for TaskAttempt " + ta.getTaskAttemptId());
        TaskAttemptId taId = ta.getTaskAttemptId();
        TaskCluster tc = ta.getTask().getTaskCluster();
        TaskClusterAttempt lastAttempt = findLastTaskClusterAttempt(tc);
        if (lastAttempt != null && taId.getAttempt() == lastAttempt.getAttempt()) {
            LOGGER.fine("Marking TaskAttempt " + ta.getTaskAttemptId() + " as failed");
            ta.setStatus(TaskAttempt.TaskStatus.FAILED, exceptions);
            abortTaskCluster(lastAttempt, TaskClusterAttempt.TaskClusterStatus.FAILED);
            abortDoomedTaskClusters();
            if (lastAttempt.getAttempt() >= jobRun.getActivityClusterGraph().getMaxReattempts() || isCancelled()) {
                abortJob(exceptions);
                return;
            }
            startRunnableActivityClusters();
        } else {
            LOGGER.warning("Ignoring task failure notification: " + taId + " -- Current last attempt = " + lastAttempt);
        }
    } catch (Exception e) {
        abortJob(Collections.singletonList(e));
    }
}
Also used : TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) HyracksException(org.apache.hyracks.api.exceptions.HyracksException)

Example 5 with TaskAttemptId

use of org.apache.hyracks.api.dataflow.TaskAttemptId in project asterixdb by apache.

the class StartTasksWork method run.

@Override
public void run() {
    Task task = null;
    try {
        NCServiceContext serviceCtx = ncs.getContext();
        Joblet joblet = getOrCreateLocalJoblet(deploymentId, jobId, serviceCtx, acgBytes);
        final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
        IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {

            @Override
            public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityOutputMap().get(aid).get(outputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }

            @Override
            public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityInputMap().get(aid).get(inputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }
        };
        for (TaskAttemptDescriptor td : taskDescriptors) {
            TaskAttemptId taId = td.getTaskAttemptId();
            TaskId tid = taId.getTaskId();
            ActivityId aid = tid.getActivityId();
            ActivityCluster ac = acg.getActivityMap().get(aid);
            IActivity han = ac.getActivityMap().get(aid);
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info("Initializing " + taId + " -> " + han);
            }
            final int partition = tid.getPartition();
            List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid);
            task = new Task(joblet, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs));
            IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount());
            List<IPartitionCollector> collectors = new ArrayList<>();
            if (inputs != null) {
                for (int i = 0; i < inputs.size(); ++i) {
                    IConnectorDescriptor conn = inputs.get(i);
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("input: " + i + ": " + conn.getConnectorId());
                    }
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IPartitionCollector collector = createPartitionCollector(td, partition, task, i, conn, recordDesc, cPolicy);
                    collectors.add(collector);
                }
            }
            List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(aid);
            if (outputs != null) {
                for (int i = 0; i < outputs.size(); ++i) {
                    final IConnectorDescriptor conn = outputs.get(i);
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    IPartitionWriterFactory pwFactory = createPartitionWriterFactory(task, cPolicy, jobId, conn, partition, taId, flags);
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("output: " + i + ": " + conn.getConnectorId());
                    }
                    IFrameWriter writer = conn.createPartitioner(task, recordDesc, pwFactory, partition, td.getPartitionCount(), td.getOutputPartitionCounts()[i]);
                    operator.setOutputFrameWriter(i, writer, recordDesc);
                }
            }
            task.setTaskRuntime(collectors.toArray(new IPartitionCollector[collectors.size()]), operator);
            joblet.addTask(task);
            task.start();
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Failure starting a task", e);
        // notify cc of start task failure
        List<Exception> exceptions = new ArrayList<>();
        ExceptionUtils.setNodeIds(exceptions, ncs.getId());
        ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions));
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) Task(org.apache.hyracks.control.nc.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) Joblet(org.apache.hyracks.control.nc.Joblet) IActivity(org.apache.hyracks.api.dataflow.IActivity) NCServiceContext(org.apache.hyracks.control.nc.application.NCServiceContext) INCServiceContext(org.apache.hyracks.api.application.INCServiceContext) List(java.util.List) ArrayList(java.util.ArrayList) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IPartitionCollector(org.apache.hyracks.api.comm.IPartitionCollector) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) IPartitionWriterFactory(org.apache.hyracks.api.comm.IPartitionWriterFactory) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) UnknownHostException(java.net.UnknownHostException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) IOperatorNodePushable(org.apache.hyracks.api.dataflow.IOperatorNodePushable)

Aggregations

TaskAttemptId (org.apache.hyracks.api.dataflow.TaskAttemptId)9 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)4 TaskId (org.apache.hyracks.api.dataflow.TaskId)4 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 OperatorDescriptorId (org.apache.hyracks.api.dataflow.OperatorDescriptorId)3 TaskAttempt (org.apache.hyracks.control.cc.job.TaskAttempt)3 TaskCluster (org.apache.hyracks.control.cc.job.TaskCluster)3 TaskClusterAttempt (org.apache.hyracks.control.cc.job.TaskClusterAttempt)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 INCServiceContext (org.apache.hyracks.api.application.INCServiceContext)2 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)2 IConnectorPolicy (org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy)2 ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)2 JobId (org.apache.hyracks.api.job.JobId)2 INodeManager (org.apache.hyracks.control.cc.cluster.INodeManager)2 TaskAttemptDescriptor (org.apache.hyracks.control.common.job.TaskAttemptDescriptor)2 Joblet (org.apache.hyracks.control.nc.Joblet)2