Search in sources :

Example 6 with Task

use of org.apache.hyracks.control.cc.job.Task in project asterixdb by apache.

the class ActivityClusterPlanner method buildConnectorPolicyAwareTaskClusters.

private TaskCluster[] buildConnectorPolicyAwareTaskClusters(ActivityCluster ac, Map<ActivityId, ActivityPlan> activityPlanMap, Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity) {
    Map<TaskId, Set<TaskId>> taskClusterMap = new HashMap<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        ActivityPlan ap = activityPlanMap.get(anId);
        Task[] tasks = ap.getTasks();
        for (Task t : tasks) {
            Set<TaskId> cluster = new HashSet<>();
            TaskId tid = t.getTaskId();
            cluster.add(tid);
            taskClusterMap.put(tid, cluster);
        }
    }
    JobRun jobRun = executor.getJobRun();
    Map<ConnectorDescriptorId, IConnectorPolicy> connectorPolicies = jobRun.getConnectorPolicyMap();
    for (Map.Entry<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> e : taskConnectivity.entrySet()) {
        Set<TaskId> cluster = taskClusterMap.get(e.getKey());
        for (Pair<TaskId, ConnectorDescriptorId> p : e.getValue()) {
            IConnectorPolicy cPolicy = connectorPolicies.get(p.getRight());
            if (cPolicy.requiresProducerConsumerCoscheduling()) {
                cluster.add(p.getLeft());
            }
        }
    }
    /*
         * We compute the transitive closure of this (producer-consumer) relation to find the largest set of
         * tasks that need to be co-scheduled.
         */
    int counter = 0;
    TaskId[] ordinalList = new TaskId[taskClusterMap.size()];
    Map<TaskId, Integer> ordinalMap = new HashMap<>();
    for (TaskId tid : taskClusterMap.keySet()) {
        ordinalList[counter] = tid;
        ordinalMap.put(tid, counter);
        ++counter;
    }
    int n = ordinalList.length;
    BitSet[] paths = new BitSet[n];
    for (Map.Entry<TaskId, Set<TaskId>> e : taskClusterMap.entrySet()) {
        int i = ordinalMap.get(e.getKey());
        BitSet bsi = paths[i];
        if (bsi == null) {
            bsi = new BitSet(n);
            paths[i] = bsi;
        }
        for (TaskId ttid : e.getValue()) {
            int j = ordinalMap.get(ttid);
            paths[i].set(j);
            BitSet bsj = paths[j];
            if (bsj == null) {
                bsj = new BitSet(n);
                paths[j] = bsj;
            }
            bsj.set(i);
        }
    }
    for (int k = 0; k < n; ++k) {
        for (int i = paths[k].nextSetBit(0); i >= 0; i = paths[k].nextSetBit(i + 1)) {
            for (int j = paths[i].nextClearBit(0); j < n && j >= 0; j = paths[i].nextClearBit(j + 1)) {
                paths[i].set(j, paths[k].get(j));
                paths[j].set(i, paths[i].get(j));
            }
        }
    }
    BitSet pending = new BitSet(n);
    pending.set(0, n);
    List<List<TaskId>> clusters = new ArrayList<>();
    for (int i = pending.nextSetBit(0); i >= 0; i = pending.nextSetBit(i)) {
        List<TaskId> cluster = new ArrayList<>();
        for (int j = paths[i].nextSetBit(0); j >= 0; j = paths[i].nextSetBit(j + 1)) {
            cluster.add(ordinalList[j]);
            pending.clear(j);
        }
        clusters.add(cluster);
    }
    List<TaskCluster> tcSet = new ArrayList<>();
    counter = 0;
    for (List<TaskId> cluster : clusters) {
        List<Task> taskStates = new ArrayList<>();
        for (TaskId tid : cluster) {
            taskStates.add(activityPlanMap.get(tid.getActivityId()).getTasks()[tid.getPartition()]);
        }
        TaskCluster tc = new TaskCluster(new TaskClusterId(ac.getId(), counter++), ac, taskStates.toArray(new Task[taskStates.size()]));
        tcSet.add(tc);
        for (TaskId tid : cluster) {
            activityPlanMap.get(tid.getActivityId()).getTasks()[tid.getPartition()].setTaskCluster(tc);
        }
    }
    return tcSet.toArray(new TaskCluster[tcSet.size()]);
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashSet(java.util.HashSet) Set(java.util.Set) BitSet(java.util.BitSet) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) ActivityPlan(org.apache.hyracks.control.cc.job.ActivityPlan) BitSet(java.util.BitSet) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) HashMap(java.util.HashMap) Map(java.util.Map) TaskClusterId(org.apache.hyracks.control.cc.job.TaskClusterId) JobRun(org.apache.hyracks.control.cc.job.JobRun)

Example 7 with Task

use of org.apache.hyracks.control.cc.job.Task in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskClusters.

private TaskCluster[] computeTaskClusters(ActivityCluster ac, JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap) {
    Set<ActivityId> activities = ac.getActivityMap().keySet();
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = computeTaskConnectivity(jobRun, activityPlanMap, activities);
    TaskCluster[] taskClusters = ac.getActivityClusterGraph().isUseConnectorPolicyForScheduling() ? buildConnectorPolicyAwareTaskClusters(ac, activityPlanMap, taskConnectivity) : buildConnectorPolicyUnawareTaskClusters(ac, activityPlanMap);
    for (TaskCluster tc : taskClusters) {
        Set<TaskCluster> tcDependencyTaskClusters = tc.getDependencyTaskClusters();
        for (Task ts : tc.getTasks()) {
            TaskId tid = ts.getTaskId();
            List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(tid);
            if (cInfoList != null) {
                for (Pair<TaskId, ConnectorDescriptorId> p : cInfoList) {
                    Task targetTS = activityPlanMap.get(p.getLeft().getActivityId()).getTasks()[p.getLeft().getPartition()];
                    TaskCluster targetTC = targetTS.getTaskCluster();
                    if (targetTC != tc) {
                        ConnectorDescriptorId cdId = p.getRight();
                        PartitionId pid = new PartitionId(jobRun.getJobId(), cdId, tid.getPartition(), p.getLeft().getPartition());
                        tc.getProducedPartitions().add(pid);
                        targetTC.getRequiredPartitions().add(pid);
                        partitionProducingTaskClusterMap.put(pid, tc);
                    }
                }
            }
            for (TaskId dTid : ts.getDependencies()) {
                TaskCluster dTC = getTaskCluster(dTid);
                dTC.getDependentTaskClusters().add(tc);
                tcDependencyTaskClusters.add(dTC);
            }
        }
    }
    return taskClusters;
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) PartitionId(org.apache.hyracks.api.partitions.PartitionId) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 8 with Task

use of org.apache.hyracks.control.cc.job.Task in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskConnectivity.

private Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> computeTaskConnectivity(JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap, Set<ActivityId> activities) {
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = new HashMap<>();
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    BitSet targetBitmap = new BitSet();
    for (ActivityId ac1 : activities) {
        ActivityCluster ac = acg.getActivityMap().get(ac1);
        Task[] ac1TaskStates = activityPlanMap.get(ac1).getTasks();
        int nProducers = ac1TaskStates.length;
        List<IConnectorDescriptor> outputConns = ac.getActivityOutputMap().get(ac1);
        if (outputConns == null) {
            continue;
        }
        for (IConnectorDescriptor c : outputConns) {
            ConnectorDescriptorId cdId = c.getConnectorId();
            ActivityId ac2 = ac.getConsumerActivity(cdId);
            Task[] ac2TaskStates = activityPlanMap.get(ac2).getTasks();
            int nConsumers = ac2TaskStates.length;
            if (c.allProducersToAllConsumers()) {
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = new ArrayList<>();
                for (int j = 0; j < nConsumers; j++) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
                for (int i = 0; i < nProducers; ++i) {
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                continue;
            }
            for (int i = 0; i < nProducers; ++i) {
                c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i].getTaskId());
                if (cInfoList == null) {
                    cInfoList = new ArrayList<>();
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
            }
        }
    }
    return taskConnectivity;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 9 with Task

use of org.apache.hyracks.control.cc.job.Task in project asterixdb by apache.

the class AbstractTaskLifecycleWork method runWork.

@Override
public final void runWork() {
    IJobManager jobManager = ccs.getJobManager();
    JobRun run = jobManager.get(jobId);
    if (run != null) {
        TaskId tid = taId.getTaskId();
        Map<ActivityId, ActivityCluster> activityClusterMap = run.getActivityClusterGraph().getActivityMap();
        ActivityCluster ac = activityClusterMap.get(tid.getActivityId());
        if (ac != null) {
            Map<ActivityId, ActivityPlan> taskStateMap = run.getActivityClusterPlanMap().get(ac.getId()).getActivityPlanMap();
            Task[] taskStates = taskStateMap.get(tid.getActivityId()).getTasks();
            if (taskStates != null && taskStates.length > tid.getPartition()) {
                Task ts = taskStates[tid.getPartition()];
                TaskCluster tc = ts.getTaskCluster();
                List<TaskClusterAttempt> taskClusterAttempts = tc.getAttempts();
                if (taskClusterAttempts != null && taskClusterAttempts.size() > taId.getAttempt()) {
                    TaskClusterAttempt tca = taskClusterAttempts.get(taId.getAttempt());
                    TaskAttempt ta = tca.getTaskAttempts().get(tid);
                    if (ta != null) {
                        performEvent(ta);
                    }
                }
            }
        }
    }
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ActivityPlan(org.apache.hyracks.control.cc.job.ActivityPlan) IJobManager(org.apache.hyracks.control.cc.job.IJobManager) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) JobRun(org.apache.hyracks.control.cc.job.JobRun)

Example 10 with Task

use of org.apache.hyracks.control.cc.job.Task in project asterixdb by apache.

the class JobExecutor method assignTaskLocations.

private void assignTaskLocations(TaskCluster tc, Map<String, List<TaskAttemptDescriptor>> taskAttemptMap) throws HyracksException {
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    Task[] tasks = tc.getTasks();
    List<TaskClusterAttempt> tcAttempts = tc.getAttempts();
    int attempts = tcAttempts.size();
    TaskClusterAttempt tcAttempt = new TaskClusterAttempt(tc, attempts);
    Map<TaskId, TaskAttempt> taskAttempts = new HashMap<>();
    Map<TaskId, LValueConstraintExpression> locationMap = new HashMap<>();
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = new TaskAttempt(tcAttempt, new TaskAttemptId(new TaskId(tid.getActivityId(), tid.getPartition()), attempts), ts);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.INITIALIZED, null);
        locationMap.put(tid, new PartitionLocationExpression(tid.getActivityId().getOperatorDescriptorId(), tid.getPartition()));
        taskAttempts.put(tid, taskAttempt);
    }
    tcAttempt.setTaskAttempts(taskAttempts);
    solver.solve(locationMap.values());
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = taskAttempts.get(tid);
        String nodeId = assignLocation(acg, locationMap, tid, taskAttempt);
        taskAttempt.setNodeId(nodeId);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.RUNNING, null);
        taskAttempt.setStartTime(System.currentTimeMillis());
        List<TaskAttemptDescriptor> tads = taskAttemptMap.get(nodeId);
        if (tads == null) {
            tads = new ArrayList<>();
            taskAttemptMap.put(nodeId, tads);
        }
        OperatorDescriptorId opId = tid.getActivityId().getOperatorDescriptorId();
        jobRun.registerOperatorLocation(opId, tid.getPartition(), nodeId);
        ActivityPartitionDetails apd = ts.getActivityPlan().getActivityPartitionDetails();
        TaskAttemptDescriptor tad = new TaskAttemptDescriptor(taskAttempt.getTaskAttemptId(), apd.getPartitionCount(), apd.getInputPartitionCounts(), apd.getOutputPartitionCounts());
        tads.add(tad);
    }
    tcAttempt.initializePendingTaskCounter();
    tcAttempts.add(tcAttempt);
    /**
         * Improvement for reducing master/slave message communications, for each TaskAttemptDescriptor,
         * we set the NetworkAddress[][] partitionLocations, in which each row is for an incoming connector descriptor
         * and each column is for an input channel of the connector.
         */
    INodeManager nodeManager = ccs.getNodeManager();
    for (Map.Entry<String, List<TaskAttemptDescriptor>> e : taskAttemptMap.entrySet()) {
        List<TaskAttemptDescriptor> tads = e.getValue();
        for (TaskAttemptDescriptor tad : tads) {
            TaskAttemptId taid = tad.getTaskAttemptId();
            int attempt = taid.getAttempt();
            TaskId tid = taid.getTaskId();
            ActivityId aid = tid.getActivityId();
            List<IConnectorDescriptor> inConnectors = acg.getActivityInputs(aid);
            int[] inPartitionCounts = tad.getInputPartitionCounts();
            if (inPartitionCounts == null) {
                continue;
            }
            NetworkAddress[][] partitionLocations = new NetworkAddress[inPartitionCounts.length][];
            for (int i = 0; i < inPartitionCounts.length; ++i) {
                ConnectorDescriptorId cdId = inConnectors.get(i).getConnectorId();
                IConnectorPolicy policy = jobRun.getConnectorPolicyMap().get(cdId);
                /**
                     * carry sender location information into a task
                     * when it is not the case that it is an re-attempt and the send-side
                     * is materialized blocking.
                     */
                if (attempt > 0 && policy.materializeOnSendSide() && policy.consumerWaitsForProducerToFinish()) {
                    continue;
                }
                ActivityId producerAid = acg.getProducerActivity(cdId);
                partitionLocations[i] = new NetworkAddress[inPartitionCounts[i]];
                for (int j = 0; j < inPartitionCounts[i]; ++j) {
                    TaskId producerTaskId = new TaskId(producerAid, j);
                    String nodeId = findTaskLocation(producerTaskId);
                    partitionLocations[i][j] = nodeManager.getNodeControllerState(nodeId).getDataPort();
                }
            }
            tad.setInputPartitionLocations(partitionLocations);
        }
    }
    tcAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.RUNNING);
    tcAttempt.setStartTime(System.currentTimeMillis());
    inProgressTaskClusters.add(tc);
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) NetworkAddress(org.apache.hyracks.api.comm.NetworkAddress) ArrayList(java.util.ArrayList) List(java.util.List) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) PartitionLocationExpression(org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) Constraint(org.apache.hyracks.api.constraints.Constraint) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Task (org.apache.hyracks.control.cc.job.Task)10 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)9 TaskId (org.apache.hyracks.api.dataflow.TaskId)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)5 ActivityCluster (org.apache.hyracks.api.job.ActivityCluster)5 List (java.util.List)4 ActivityPlan (org.apache.hyracks.control.cc.job.ActivityPlan)4 TaskCluster (org.apache.hyracks.control.cc.job.TaskCluster)4 BitSet (java.util.BitSet)3 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)3 IConnectorPolicy (org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy)3 JobRun (org.apache.hyracks.control.cc.job.JobRun)3 TaskAttempt (org.apache.hyracks.control.cc.job.TaskAttempt)3 TaskClusterAttempt (org.apache.hyracks.control.cc.job.TaskClusterAttempt)3 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Pair (org.apache.commons.lang3.tuple.Pair)2 ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)2