Search in sources :

Example 31 with ActivityId

use of org.apache.hyracks.api.dataflow.ActivityId in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskClusters.

private TaskCluster[] computeTaskClusters(ActivityCluster ac, JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap) {
    Set<ActivityId> activities = ac.getActivityMap().keySet();
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = computeTaskConnectivity(jobRun, activityPlanMap, activities);
    TaskCluster[] taskClusters = ac.getActivityClusterGraph().isUseConnectorPolicyForScheduling() ? buildConnectorPolicyAwareTaskClusters(ac, activityPlanMap, taskConnectivity) : buildConnectorPolicyUnawareTaskClusters(ac, activityPlanMap);
    for (TaskCluster tc : taskClusters) {
        Set<TaskCluster> tcDependencyTaskClusters = tc.getDependencyTaskClusters();
        for (Task ts : tc.getTasks()) {
            TaskId tid = ts.getTaskId();
            List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(tid);
            if (cInfoList != null) {
                for (Pair<TaskId, ConnectorDescriptorId> p : cInfoList) {
                    Task targetTS = activityPlanMap.get(p.getLeft().getActivityId()).getTasks()[p.getLeft().getPartition()];
                    TaskCluster targetTC = targetTS.getTaskCluster();
                    if (targetTC != tc) {
                        ConnectorDescriptorId cdId = p.getRight();
                        PartitionId pid = new PartitionId(jobRun.getJobId(), cdId, tid.getPartition(), p.getLeft().getPartition());
                        tc.getProducedPartitions().add(pid);
                        targetTC.getRequiredPartitions().add(pid);
                        partitionProducingTaskClusterMap.put(pid, tc);
                    }
                }
            }
            for (TaskId dTid : ts.getDependencies()) {
                TaskCluster dTC = getTaskCluster(dTid);
                dTC.getDependentTaskClusters().add(tc);
                tcDependencyTaskClusters.add(dTC);
            }
        }
    }
    return taskClusters;
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) PartitionId(org.apache.hyracks.api.partitions.PartitionId) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 32 with ActivityId

use of org.apache.hyracks.api.dataflow.ActivityId in project asterixdb by apache.

the class ActivityClusterPlanner method computePartitionCounts.

private Map<ActivityId, ActivityPartitionDetails> computePartitionCounts(ActivityCluster ac) throws HyracksException {
    PartitionConstraintSolver solver = executor.getSolver();
    Set<LValueConstraintExpression> lValues = new HashSet<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        lValues.add(new PartitionCountExpression(anId.getOperatorDescriptorId()));
    }
    solver.solve(lValues);
    Map<OperatorDescriptorId, Integer> nPartMap = new HashMap<>();
    for (LValueConstraintExpression lv : lValues) {
        Object value = solver.getValue(lv);
        if (value == null) {
            throw new HyracksException("No value found for " + lv);
        }
        if (!(value instanceof Number)) {
            throw new HyracksException("Unexpected type of value bound to " + lv + ": " + value.getClass() + "(" + value + ")");
        }
        int nParts = ((Number) value).intValue();
        if (nParts <= 0) {
            throw new HyracksException("Unsatisfiable number of partitions for " + lv + ": " + nParts);
        }
        nPartMap.put(((PartitionCountExpression) lv).getOperatorDescriptorId(), nParts);
    }
    Map<ActivityId, ActivityPartitionDetails> activityPartsMap = new HashMap<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        int nParts = nPartMap.get(anId.getOperatorDescriptorId());
        int[] nInputPartitions = null;
        List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(anId);
        if (inputs != null) {
            nInputPartitions = new int[inputs.size()];
            for (int i = 0; i < nInputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = inputs.get(i).getConnectorId();
                ActivityId aid = ac.getProducerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nInputPartitions[i] = nPartInt;
            }
        }
        int[] nOutputPartitions = null;
        List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(anId);
        if (outputs != null) {
            nOutputPartitions = new int[outputs.size()];
            for (int i = 0; i < nOutputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = outputs.get(i).getConnectorId();
                ActivityId aid = ac.getConsumerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nOutputPartitions[i] = nPartInt;
            }
        }
        ActivityPartitionDetails apd = new ActivityPartitionDetails(nParts, nInputPartitions, nOutputPartitions);
        activityPartsMap.put(anId, apd);
    }
    return activityPartsMap;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) PartitionCountExpression(org.apache.hyracks.api.constraints.expressions.PartitionCountExpression) HashSet(java.util.HashSet)

Example 33 with ActivityId

use of org.apache.hyracks.api.dataflow.ActivityId in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskConnectivity.

private Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> computeTaskConnectivity(JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap, Set<ActivityId> activities) {
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = new HashMap<>();
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    BitSet targetBitmap = new BitSet();
    for (ActivityId ac1 : activities) {
        ActivityCluster ac = acg.getActivityMap().get(ac1);
        Task[] ac1TaskStates = activityPlanMap.get(ac1).getTasks();
        int nProducers = ac1TaskStates.length;
        List<IConnectorDescriptor> outputConns = ac.getActivityOutputMap().get(ac1);
        if (outputConns == null) {
            continue;
        }
        for (IConnectorDescriptor c : outputConns) {
            ConnectorDescriptorId cdId = c.getConnectorId();
            ActivityId ac2 = ac.getConsumerActivity(cdId);
            Task[] ac2TaskStates = activityPlanMap.get(ac2).getTasks();
            int nConsumers = ac2TaskStates.length;
            if (c.allProducersToAllConsumers()) {
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = new ArrayList<>();
                for (int j = 0; j < nConsumers; j++) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
                for (int i = 0; i < nProducers; ++i) {
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                continue;
            }
            for (int i = 0; i < nProducers; ++i) {
                c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i].getTaskId());
                if (cInfoList == null) {
                    cInfoList = new ArrayList<>();
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
            }
        }
    }
    return taskConnectivity;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 34 with ActivityId

use of org.apache.hyracks.api.dataflow.ActivityId in project asterixdb by apache.

the class AbstractTaskLifecycleWork method runWork.

@Override
public final void runWork() {
    IJobManager jobManager = ccs.getJobManager();
    JobRun run = jobManager.get(jobId);
    if (run != null) {
        TaskId tid = taId.getTaskId();
        Map<ActivityId, ActivityCluster> activityClusterMap = run.getActivityClusterGraph().getActivityMap();
        ActivityCluster ac = activityClusterMap.get(tid.getActivityId());
        if (ac != null) {
            Map<ActivityId, ActivityPlan> taskStateMap = run.getActivityClusterPlanMap().get(ac.getId()).getActivityPlanMap();
            Task[] taskStates = taskStateMap.get(tid.getActivityId()).getTasks();
            if (taskStates != null && taskStates.length > tid.getPartition()) {
                Task ts = taskStates[tid.getPartition()];
                TaskCluster tc = ts.getTaskCluster();
                List<TaskClusterAttempt> taskClusterAttempts = tc.getAttempts();
                if (taskClusterAttempts != null && taskClusterAttempts.size() > taId.getAttempt()) {
                    TaskClusterAttempt tca = taskClusterAttempts.get(taId.getAttempt());
                    TaskAttempt ta = tca.getTaskAttempts().get(tid);
                    if (ta != null) {
                        performEvent(ta);
                    }
                }
            }
        }
    }
}
Also used : Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ActivityPlan(org.apache.hyracks.control.cc.job.ActivityPlan) IJobManager(org.apache.hyracks.control.cc.job.IJobManager) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) TaskCluster(org.apache.hyracks.control.cc.job.TaskCluster) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) JobRun(org.apache.hyracks.control.cc.job.JobRun)

Example 35 with ActivityId

use of org.apache.hyracks.api.dataflow.ActivityId in project asterixdb by apache.

the class JobRun method toJSON.

public ObjectNode toJSON() {
    ObjectMapper om = new ObjectMapper();
    ObjectNode result = om.createObjectNode();
    result.put("job-id", jobId.toString());
    result.putPOJO("status", getStatus());
    result.put("create-time", getCreateTime());
    result.put("start-time", getStartTime());
    result.put("end-time", getEndTime());
    ArrayNode aClusters = om.createArrayNode();
    for (ActivityCluster ac : acg.getActivityClusterMap().values()) {
        ObjectNode acJSON = om.createObjectNode();
        acJSON.put("activity-cluster-id", String.valueOf(ac.getId()));
        ArrayNode activitiesJSON = om.createArrayNode();
        for (ActivityId aid : ac.getActivityMap().keySet()) {
            activitiesJSON.addPOJO(aid);
        }
        acJSON.set("activities", activitiesJSON);
        ArrayNode dependenciesJSON = om.createArrayNode();
        for (ActivityCluster dependency : ac.getDependencies()) {
            dependenciesJSON.add(String.valueOf(dependency.getId()));
        }
        acJSON.set("dependencies", dependenciesJSON);
        ActivityClusterPlan acp = activityClusterPlanMap.get(ac.getId());
        if (acp == null) {
            acJSON.putNull("plan");
        } else {
            ObjectNode planJSON = om.createObjectNode();
            ArrayNode acTasks = om.createArrayNode();
            for (Map.Entry<ActivityId, ActivityPlan> e : acp.getActivityPlanMap().entrySet()) {
                ActivityPlan acPlan = e.getValue();
                ObjectNode entry = om.createObjectNode();
                entry.put("activity-id", e.getKey().toString());
                ActivityPartitionDetails apd = acPlan.getActivityPartitionDetails();
                entry.put("partition-count", apd.getPartitionCount());
                ArrayNode inPartCountsJSON = om.createArrayNode();
                int[] inPartCounts = apd.getInputPartitionCounts();
                if (inPartCounts != null) {
                    for (int i : inPartCounts) {
                        inPartCountsJSON.add(i);
                    }
                }
                entry.set("input-partition-counts", inPartCountsJSON);
                ArrayNode outPartCountsJSON = om.createArrayNode();
                int[] outPartCounts = apd.getOutputPartitionCounts();
                if (outPartCounts != null) {
                    for (int o : outPartCounts) {
                        outPartCountsJSON.add(o);
                    }
                }
                entry.set("output-partition-counts", outPartCountsJSON);
                ArrayNode tasks = om.createArrayNode();
                for (Task t : acPlan.getTasks()) {
                    ObjectNode task = om.createObjectNode();
                    task.put("task-id", t.getTaskId().toString());
                    ArrayNode dependentTasksJSON = om.createArrayNode();
                    for (TaskId dependent : t.getDependents()) {
                        dependentTasksJSON.add(dependent.toString());
                        task.set("dependents", dependentTasksJSON);
                        ArrayNode dependencyTasksJSON = om.createArrayNode();
                        for (TaskId dependency : t.getDependencies()) {
                            dependencyTasksJSON.add(dependency.toString());
                        }
                        task.set("dependencies", dependencyTasksJSON);
                        tasks.add(task);
                    }
                    entry.set("tasks", tasks);
                    acTasks.add(entry);
                }
            }
            planJSON.set("activities", acTasks);
            ArrayNode tClusters = om.createArrayNode();
            for (TaskCluster tc : acp.getTaskClusters()) {
                ObjectNode c = om.createObjectNode();
                c.put("task-cluster-id", String.valueOf(tc.getTaskClusterId()));
                ArrayNode tasksAry = om.createArrayNode();
                for (Task t : tc.getTasks()) {
                    tasksAry.add(t.getTaskId().toString());
                }
                c.set("tasks", tasksAry);
                ArrayNode prodParts = om.createArrayNode();
                for (PartitionId p : tc.getProducedPartitions()) {
                    prodParts.add(p.toString());
                }
                c.set("produced-partitions", prodParts);
                ArrayNode reqdParts = om.createArrayNode();
                for (PartitionId p : tc.getRequiredPartitions()) {
                    reqdParts.add(p.toString());
                }
                c.set("required-partitions", reqdParts);
                ArrayNode attempts = om.createArrayNode();
                List<TaskClusterAttempt> tcAttempts = tc.getAttempts();
                if (tcAttempts != null) {
                    for (TaskClusterAttempt tca : tcAttempts) {
                        ObjectNode attempt = om.createObjectNode();
                        attempt.put("attempt", tca.getAttempt());
                        attempt.putPOJO("status", tca.getStatus());
                        attempt.put("start-time", tca.getStartTime());
                        attempt.put("end-time", tca.getEndTime());
                        ArrayNode taskAttempts = om.createArrayNode();
                        for (TaskAttempt ta : tca.getTaskAttempts().values()) {
                            ObjectNode taskAttempt = om.createObjectNode();
                            taskAttempt.putPOJO("task-id", ta.getTaskAttemptId().getTaskId());
                            taskAttempt.putPOJO("task-attempt-id", ta.getTaskAttemptId());
                            taskAttempt.putPOJO("status", ta.getStatus());
                            taskAttempt.put("node-id", ta.getNodeId());
                            taskAttempt.put("start-time", ta.getStartTime());
                            taskAttempt.put("end-time", ta.getEndTime());
                            List<Exception> exceptions = ta.getExceptions();
                            if (exceptions != null && !exceptions.isEmpty()) {
                                List<Exception> filteredExceptions = ExceptionUtils.getActualExceptions(exceptions);
                                for (Exception exception : filteredExceptions) {
                                    StringWriter exceptionWriter = new StringWriter();
                                    exception.printStackTrace(new PrintWriter(exceptionWriter));
                                    taskAttempt.put("failure-details", exceptionWriter.toString());
                                }
                            }
                            taskAttempts.add(taskAttempt);
                        }
                        attempt.set("task-attempts", taskAttempts);
                        attempts.add(attempt);
                    }
                }
                c.set("attempts", attempts);
                tClusters.add(c);
            }
            planJSON.set("task-clusters", tClusters);
            acJSON.set("plan", planJSON);
        }
        aClusters.add(acJSON);
    }
    result.set("activity-clusters", aClusters);
    result.set("profile", profile.toJSON());
    return result;
}
Also used : TaskId(org.apache.hyracks.api.dataflow.TaskId) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) PartitionId(org.apache.hyracks.api.partitions.PartitionId) Constraint(org.apache.hyracks.api.constraints.Constraint) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) StringWriter(java.io.StringWriter) ActivityPartitionDetails(org.apache.hyracks.control.cc.executor.ActivityPartitionDetails) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) HashMap(java.util.HashMap) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) PrintWriter(java.io.PrintWriter)

Aggregations

ActivityId (org.apache.hyracks.api.dataflow.ActivityId)46 TaskId (org.apache.hyracks.api.dataflow.TaskId)12 HashMap (java.util.HashMap)11 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)11 ActivityCluster (org.apache.hyracks.api.job.ActivityCluster)11 IActivity (org.apache.hyracks.api.dataflow.IActivity)9 Task (org.apache.hyracks.control.cc.job.Task)9 ArrayList (java.util.ArrayList)8 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)7 OperatorDescriptorId (org.apache.hyracks.api.dataflow.OperatorDescriptorId)7 HashSet (java.util.HashSet)6 List (java.util.List)6 Pair (org.apache.commons.lang3.tuple.Pair)6 ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)6 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)5 ActivityPlan (org.apache.hyracks.control.cc.job.ActivityPlan)5 TaskCluster (org.apache.hyracks.control.cc.job.TaskCluster)5 IOperatorNodePushable (org.apache.hyracks.api.dataflow.IOperatorNodePushable)4 TaskAttemptId (org.apache.hyracks.api.dataflow.TaskAttemptId)4 IConnectorPolicy (org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy)4