Search in sources :

Example 6 with ActivityCluster

use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.

the class ActivityClusterGraphBuilder method inferActivityClusters.

public ActivityClusterGraph inferActivityClusters(JobId jobId, JobActivityGraph jag) {
    /*
         * Build initial equivalence sets map. We create a map such that for each IOperatorTask, t -> { t }
         */
    Map<ActivityId, Set<ActivityId>> stageMap = new HashMap<ActivityId, Set<ActivityId>>();
    Set<Set<ActivityId>> stages = new HashSet<Set<ActivityId>>();
    for (ActivityId taskId : jag.getActivityMap().keySet()) {
        Set<ActivityId> eqSet = new HashSet<ActivityId>();
        eqSet.add(taskId);
        stageMap.put(taskId, eqSet);
        stages.add(eqSet);
    }
    boolean changed = true;
    while (changed) {
        changed = false;
        Pair<ActivityId, ActivityId> pair = findMergePair(jag, stages);
        if (pair != null) {
            merge(stageMap, stages, pair.getLeft(), pair.getRight());
            changed = true;
        }
    }
    ActivityClusterGraph acg = new ActivityClusterGraph();
    Map<ActivityId, ActivityCluster> acMap = new HashMap<ActivityId, ActivityCluster>();
    int acCounter = 0;
    Map<ActivityId, IActivity> activityNodeMap = jag.getActivityMap();
    List<ActivityCluster> acList = new ArrayList<ActivityCluster>();
    for (Set<ActivityId> stage : stages) {
        ActivityCluster ac = new ActivityCluster(acg, new ActivityClusterId(jobId, acCounter++));
        acList.add(ac);
        for (ActivityId aid : stage) {
            IActivity activity = activityNodeMap.get(aid);
            ac.addActivity(activity);
            acMap.put(aid, ac);
        }
    }
    for (Set<ActivityId> stage : stages) {
        for (ActivityId aid : stage) {
            IActivity activity = activityNodeMap.get(aid);
            ActivityCluster ac = acMap.get(aid);
            List<IConnectorDescriptor> aOutputs = jag.getActivityOutputMap().get(aid);
            if (aOutputs == null || aOutputs.isEmpty()) {
                ac.addRoot(activity);
            } else {
                int nActivityOutputs = aOutputs.size();
                for (int i = 0; i < nActivityOutputs; ++i) {
                    IConnectorDescriptor conn = aOutputs.get(i);
                    ac.addConnector(conn);
                    Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> pcPair = jag.getConnectorActivityMap().get(conn.getConnectorId());
                    ac.connect(conn, activity, i, pcPair.getRight().getLeft(), pcPair.getRight().getRight(), jag.getConnectorRecordDescriptorMap().get(conn.getConnectorId()));
                }
            }
        }
    }
    Map<ActivityId, Set<ActivityId>> blocked2BlockerMap = jag.getBlocked2BlockerMap();
    for (ActivityCluster s : acList) {
        Map<ActivityId, Set<ActivityId>> acBlocked2BlockerMap = s.getBlocked2BlockerMap();
        Set<ActivityCluster> blockerStages = new HashSet<ActivityCluster>();
        for (ActivityId t : s.getActivityMap().keySet()) {
            Set<ActivityId> blockerTasks = blocked2BlockerMap.get(t);
            acBlocked2BlockerMap.put(t, blockerTasks);
            if (blockerTasks != null) {
                for (ActivityId bt : blockerTasks) {
                    blockerStages.add(acMap.get(bt));
                }
            }
        }
        for (ActivityCluster bs : blockerStages) {
            s.getDependencies().add(bs);
        }
    }
    acg.addActivityClusters(acList);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine(acg.toJSON().asText());
    }
    return acg;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) IActivity(org.apache.hyracks.api.dataflow.IActivity) ActivityClusterId(org.apache.hyracks.api.job.ActivityClusterId) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HashSet(java.util.HashSet) Pair(org.apache.commons.lang3.tuple.Pair)

Example 7 with ActivityCluster

use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.

the class ActivityClusterPlanner method getTaskCluster.

private TaskCluster getTaskCluster(TaskId tid) {
    JobRun run = executor.getJobRun();
    ActivityCluster ac = run.getActivityClusterGraph().getActivityMap().get(tid.getActivityId());
    ActivityClusterPlan acp = run.getActivityClusterPlanMap().get(ac.getId());
    Task[] tasks = acp.getActivityPlanMap().get(tid.getActivityId()).getTasks();
    Task task = tasks[tid.getPartition()];
    assert task.getTaskId().equals(tid);
    return task.getTaskCluster();
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) ActivityClusterPlan(org.apache.hyracks.control.cc.job.ActivityClusterPlan) JobRun(org.apache.hyracks.control.cc.job.JobRun) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster)

Example 8 with ActivityCluster

use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.

the class ActivityClusterPlanner method buildActivityPlanMap.

private Map<ActivityId, ActivityPlan> buildActivityPlanMap(ActivityCluster ac, JobRun jobRun, Map<ActivityId, ActivityPartitionDetails> pcMap) {
    Map<ActivityId, ActivityPlan> activityPlanMap = new HashMap<>();
    Set<ActivityId> depAnIds = new HashSet<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        depAnIds.clear();
        getDependencyActivityIds(depAnIds, anId, ac);
        ActivityPartitionDetails apd = pcMap.get(anId);
        Task[] tasks = new Task[apd.getPartitionCount()];
        ActivityPlan activityPlan = new ActivityPlan(apd);
        for (int i = 0; i < tasks.length; ++i) {
            TaskId tid = new TaskId(anId, i);
            tasks[i] = new Task(tid, activityPlan);
            for (ActivityId danId : depAnIds) {
                ActivityCluster dAC = ac.getActivityClusterGraph().getActivityMap().get(danId);
                ActivityClusterPlan dACP = jobRun.getActivityClusterPlanMap().get(dAC.getId());
                assert dACP != null : "IllegalStateEncountered: Dependent AC is being planned without a plan for " + "dependency AC: Encountered no plan for ActivityID " + danId;
                Task[] dATasks = dACP.getActivityPlanMap().get(danId).getTasks();
                assert dATasks != null : "IllegalStateEncountered: Dependent AC is being planned without a plan for" + " dependency AC: Encountered no plan for ActivityID " + danId;
                assert dATasks.length == tasks.length : "Dependency activity partitioned differently from " + "dependent: " + dATasks.length + " != " + tasks.length;
                Task dTask = dATasks[i];
                TaskId dTaskId = dTask.getTaskId();
                tasks[i].getDependencies().add(dTaskId);
                dTask.getDependents().add(tid);
            }
        }
        activityPlan.setTasks(tasks);
        activityPlanMap.put(anId, activityPlan);
    }
    return activityPlanMap;
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ActivityPlan(org.apache.hyracks.control.cc.job.ActivityPlan) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterPlan(org.apache.hyracks.control.cc.job.ActivityClusterPlan) HashSet(java.util.HashSet)

Example 9 with ActivityCluster

use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.

the class JobExecutor method findRunnableTaskClusterRoots.

private void findRunnableTaskClusterRoots(Set<TaskCluster> frontier, ActivityCluster candidate) throws HyracksException {
    boolean depsComplete = true;
    for (ActivityCluster depAC : candidate.getDependencies()) {
        if (!isPlanned(depAC)) {
            depsComplete = false;
            findRunnableTaskClusterRoots(frontier, depAC);
        } else {
            boolean tcRootsComplete = true;
            for (TaskCluster tc : getActivityClusterPlan(depAC).getTaskClusters()) {
                if (!tc.getProducedPartitions().isEmpty()) {
                    continue;
                }
                TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
                if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
                    tcRootsComplete = false;
                    break;
                }
            }
            if (!tcRootsComplete) {
                depsComplete = false;
                findRunnableTaskClusterRoots(frontier, depAC);
            }
        }
    }
    if (!depsComplete) {
        return;
    }
    if (!isPlanned(candidate)) {
        ActivityClusterPlanner acp = new ActivityClusterPlanner(this);
        ActivityClusterPlan acPlan = acp.planActivityCluster(candidate);
        jobRun.getActivityClusterPlanMap().put(candidate.getId(), acPlan);
        partitionProducingTaskClusterMap.putAll(acp.getPartitionProducingTaskClusterMap());
    }
    for (TaskCluster tc : getActivityClusterPlan(candidate).getTaskClusters()) {
        if (!tc.getProducedPartitions().isEmpty()) {
            continue;
        }
        TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
        if (tca == null || tca.getStatus() != TaskClusterAttempt.TaskClusterStatus.COMPLETED) {
            frontier.add(tc);
        }
    }
}
Also used : ActivityClusterPlan(org.apache.hyracks.control.cc.job.ActivityClusterPlan) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster)

Example 10 with ActivityCluster

use of org.apache.hyracks.api.job.ActivityCluster in project asterixdb by apache.

the class StartTasksWork method run.

@Override
public void run() {
    Task task = null;
    try {
        NCServiceContext serviceCtx = ncs.getContext();
        Joblet joblet = getOrCreateLocalJoblet(deploymentId, jobId, serviceCtx, acgBytes);
        final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
        IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {

            @Override
            public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityOutputMap().get(aid).get(outputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }

            @Override
            public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityInputMap().get(aid).get(inputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }
        };
        for (TaskAttemptDescriptor td : taskDescriptors) {
            TaskAttemptId taId = td.getTaskAttemptId();
            TaskId tid = taId.getTaskId();
            ActivityId aid = tid.getActivityId();
            ActivityCluster ac = acg.getActivityMap().get(aid);
            IActivity han = ac.getActivityMap().get(aid);
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info("Initializing " + taId + " -> " + han);
            }
            final int partition = tid.getPartition();
            List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid);
            task = new Task(joblet, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs));
            IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount());
            List<IPartitionCollector> collectors = new ArrayList<>();
            if (inputs != null) {
                for (int i = 0; i < inputs.size(); ++i) {
                    IConnectorDescriptor conn = inputs.get(i);
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("input: " + i + ": " + conn.getConnectorId());
                    }
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IPartitionCollector collector = createPartitionCollector(td, partition, task, i, conn, recordDesc, cPolicy);
                    collectors.add(collector);
                }
            }
            List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(aid);
            if (outputs != null) {
                for (int i = 0; i < outputs.size(); ++i) {
                    final IConnectorDescriptor conn = outputs.get(i);
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    IPartitionWriterFactory pwFactory = createPartitionWriterFactory(task, cPolicy, jobId, conn, partition, taId, flags);
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("output: " + i + ": " + conn.getConnectorId());
                    }
                    IFrameWriter writer = conn.createPartitioner(task, recordDesc, pwFactory, partition, td.getPartitionCount(), td.getOutputPartitionCounts()[i]);
                    operator.setOutputFrameWriter(i, writer, recordDesc);
                }
            }
            task.setTaskRuntime(collectors.toArray(new IPartitionCollector[collectors.size()]), operator);
            joblet.addTask(task);
            task.start();
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Failure starting a task", e);
        // notify cc of start task failure
        List<Exception> exceptions = new ArrayList<>();
        ExceptionUtils.setNodeIds(exceptions, ncs.getId());
        ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions));
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) Task(org.apache.hyracks.control.nc.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) Joblet(org.apache.hyracks.control.nc.Joblet) IActivity(org.apache.hyracks.api.dataflow.IActivity) NCServiceContext(org.apache.hyracks.control.nc.application.NCServiceContext) INCServiceContext(org.apache.hyracks.api.application.INCServiceContext) List(java.util.List) ArrayList(java.util.ArrayList) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IPartitionCollector(org.apache.hyracks.api.comm.IPartitionCollector) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) IPartitionWriterFactory(org.apache.hyracks.api.comm.IPartitionWriterFactory) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) UnknownHostException(java.net.UnknownHostException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) IOperatorNodePushable(org.apache.hyracks.api.dataflow.IOperatorNodePushable)

Aggregations

ActivityCluster (org.apache.hyracks.api.job.ActivityCluster)14 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)11 HashMap (java.util.HashMap)7 TaskId (org.apache.hyracks.api.dataflow.TaskId)6 IActivity (org.apache.hyracks.api.dataflow.IActivity)5 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)5 ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)5 Task (org.apache.hyracks.control.cc.job.Task)5 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 List (java.util.List)3 Pair (org.apache.commons.lang3.tuple.Pair)3 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)3 ActivityClusterPlan (org.apache.hyracks.control.cc.job.ActivityClusterPlan)3 TaskAttempt (org.apache.hyracks.control.cc.job.TaskAttempt)3 TaskClusterAttempt (org.apache.hyracks.control.cc.job.TaskClusterAttempt)3 Set (java.util.Set)2 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)2 IRecordDescriptorProvider (org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider)2 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2