Search in sources :

Example 6 with ActivityClusterGraph

use of org.apache.hyracks.api.job.ActivityClusterGraph in project asterixdb by apache.

the class DistributeJobWork method run.

@Override
public void run() {
    try {
        ncs.checkForDuplicateDistributedJob(jobId);
        ActivityClusterGraph acg = (ActivityClusterGraph) DeploymentUtils.deserialize(acgBytes, null, ncs.getContext());
        ncs.storeActivityClusterGraph(jobId, acg);
    } catch (HyracksException e) {
        try {
            ncs.getClusterController().notifyDistributedJobFailure(jobId, ncs.getId());
        } catch (Exception e1) {
            e1.printStackTrace();
        }
    }
}
Also used : ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException)

Example 7 with ActivityClusterGraph

use of org.apache.hyracks.api.job.ActivityClusterGraph in project asterixdb by apache.

the class SuperActivity method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
    Map<ActivityId, IActivity> activities = getActivityMap();
    for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
        /**
             * extract start activities
             */
        List<IConnectorDescriptor> conns = getActivityInputMap().get(entry.getKey());
        if (conns == null || conns.isEmpty()) {
            startActivities.put(entry.getKey(), entry.getValue());
        }
    }
    /**
         * wrap a RecordDescriptorProvider for the super activity
         */
    IRecordDescriptorProvider wrappedRecDescProvider = new IRecordDescriptorProvider() {

        @Override
        public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
            if (startActivities.get(aid) != null) {
                /**
                     * if the activity is a start (input boundary) activity
                     */
                int superActivityInputChannel = SuperActivity.this.getClusterInputIndex(Pair.of(aid, inputIndex));
                if (superActivityInputChannel >= 0) {
                    return recordDescProvider.getInputRecordDescriptor(activityId, superActivityInputChannel);
                }
            }
            if (SuperActivity.this.getActivityMap().get(aid) != null) {
                /**
                     * if the activity is an internal activity of the super activity
                     */
                IConnectorDescriptor conn = getActivityInputMap().get(aid).get(inputIndex);
                return getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }
            /**
                 * the following is for the case where the activity is in other SuperActivities
                 */
            ActivityClusterGraph acg = SuperActivity.this.getActivityClusterGraph();
            for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
                ActivityCluster ac = entry.getValue();
                for (Entry<ActivityId, IActivity> saEntry : ac.getActivityMap().entrySet()) {
                    SuperActivity sa = (SuperActivity) saEntry.getValue();
                    if (sa.getActivityMap().get(aid) != null) {
                        List<IConnectorDescriptor> conns = sa.getActivityInputMap().get(aid);
                        if (conns != null && conns.size() >= inputIndex) {
                            IConnectorDescriptor conn = conns.get(inputIndex);
                            return sa.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                        } else {
                            int superActivityInputChannel = sa.getClusterInputIndex(Pair.of(aid, inputIndex));
                            if (superActivityInputChannel >= 0) {
                                return recordDescProvider.getInputRecordDescriptor(sa.getActivityId(), superActivityInputChannel);
                            }
                        }
                    }
                }
            }
            return null;
        }

        @Override
        public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
            /**
                 * if the activity is an output-boundary activity
                 */
            int superActivityOutputChannel = SuperActivity.this.getClusterOutputIndex(Pair.of(aid, outputIndex));
            if (superActivityOutputChannel >= 0) {
                return recordDescProvider.getOutputRecordDescriptor(activityId, superActivityOutputChannel);
            }
            if (SuperActivity.this.getActivityMap().get(aid) != null) {
                /**
                     * if the activity is an internal activity of the super activity
                     */
                IConnectorDescriptor conn = getActivityOutputMap().get(aid).get(outputIndex);
                return getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }
            /**
                 * the following is for the case where the activity is in other SuperActivities
                 */
            ActivityClusterGraph acg = SuperActivity.this.getActivityClusterGraph();
            for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
                ActivityCluster ac = entry.getValue();
                for (Entry<ActivityId, IActivity> saEntry : ac.getActivityMap().entrySet()) {
                    SuperActivity sa = (SuperActivity) saEntry.getValue();
                    if (sa.getActivityMap().get(aid) != null) {
                        List<IConnectorDescriptor> conns = sa.getActivityOutputMap().get(aid);
                        if (conns != null && conns.size() >= outputIndex) {
                            IConnectorDescriptor conn = conns.get(outputIndex);
                            return sa.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                        } else {
                            superActivityOutputChannel = sa.getClusterOutputIndex(Pair.of(aid, outputIndex));
                            if (superActivityOutputChannel >= 0) {
                                return recordDescProvider.getOutputRecordDescriptor(sa.getActivityId(), superActivityOutputChannel);
                            }
                        }
                    }
                }
            }
            return null;
        }
    };
    return new SuperActivityOperatorNodePushable(this, startActivities, ctx, wrappedRecDescProvider, partition, nPartitions);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) OneToOneConnectedActivityCluster(org.apache.hyracks.api.rewriter.OneToOneConnectedActivityCluster) IActivity(org.apache.hyracks.api.dataflow.IActivity) ActivityClusterId(org.apache.hyracks.api.job.ActivityClusterId) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph)

Example 8 with ActivityClusterGraph

use of org.apache.hyracks.api.job.ActivityClusterGraph in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskConnectivity.

private Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> computeTaskConnectivity(JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap, Set<ActivityId> activities) {
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = new HashMap<>();
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    BitSet targetBitmap = new BitSet();
    for (ActivityId ac1 : activities) {
        ActivityCluster ac = acg.getActivityMap().get(ac1);
        Task[] ac1TaskStates = activityPlanMap.get(ac1).getTasks();
        int nProducers = ac1TaskStates.length;
        List<IConnectorDescriptor> outputConns = ac.getActivityOutputMap().get(ac1);
        if (outputConns == null) {
            continue;
        }
        for (IConnectorDescriptor c : outputConns) {
            ConnectorDescriptorId cdId = c.getConnectorId();
            ActivityId ac2 = ac.getConsumerActivity(cdId);
            Task[] ac2TaskStates = activityPlanMap.get(ac2).getTasks();
            int nConsumers = ac2TaskStates.length;
            if (c.allProducersToAllConsumers()) {
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = new ArrayList<>();
                for (int j = 0; j < nConsumers; j++) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
                for (int i = 0; i < nProducers; ++i) {
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                continue;
            }
            for (int i = 0; i < nProducers; ++i) {
                c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i].getTaskId());
                if (cInfoList == null) {
                    cInfoList = new ArrayList<>();
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
            }
        }
    }
    return taskConnectivity;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 9 with ActivityClusterGraph

use of org.apache.hyracks.api.job.ActivityClusterGraph in project asterixdb by apache.

the class JobExecutor method startTasks.

private void startTasks(Map<String, List<TaskAttemptDescriptor>> taskAttemptMap) throws HyracksException {
    final DeploymentId deploymentId = jobRun.getDeploymentId();
    final JobId jobId = jobRun.getJobId();
    final ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    final Map<ConnectorDescriptorId, IConnectorPolicy> connectorPolicies = new HashMap<>(jobRun.getConnectorPolicyMap());
    INodeManager nodeManager = ccs.getNodeManager();
    try {
        byte[] acgBytes = predistributed ? null : JavaSerializationUtils.serialize(acg);
        for (Map.Entry<String, List<TaskAttemptDescriptor>> entry : taskAttemptMap.entrySet()) {
            String nodeId = entry.getKey();
            final List<TaskAttemptDescriptor> taskDescriptors = entry.getValue();
            final NodeControllerState node = nodeManager.getNodeControllerState(nodeId);
            if (node != null) {
                node.getActiveJobIds().add(jobRun.getJobId());
                boolean changed = jobRun.getParticipatingNodeIds().add(nodeId);
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine("Starting: " + taskDescriptors + " at " + entry.getKey());
                }
                byte[] jagBytes = changed ? acgBytes : null;
                node.getNodeController().startTasks(deploymentId, jobId, jagBytes, taskDescriptors, connectorPolicies, jobRun.getFlags());
            }
        }
    } catch (Exception e) {
        throw new HyracksException(e);
    }
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) DeploymentId(org.apache.hyracks.api.deployment.DeploymentId) HashMap(java.util.HashMap) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) ArrayList(java.util.ArrayList) List(java.util.List) NodeControllerState(org.apache.hyracks.control.cc.NodeControllerState) HashMap(java.util.HashMap) Map(java.util.Map) JobId(org.apache.hyracks.api.job.JobId)

Example 10 with ActivityClusterGraph

use of org.apache.hyracks.api.job.ActivityClusterGraph in project asterixdb by apache.

the class JobExecutor method assignTaskLocations.

private void assignTaskLocations(TaskCluster tc, Map<String, List<TaskAttemptDescriptor>> taskAttemptMap) throws HyracksException {
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    Task[] tasks = tc.getTasks();
    List<TaskClusterAttempt> tcAttempts = tc.getAttempts();
    int attempts = tcAttempts.size();
    TaskClusterAttempt tcAttempt = new TaskClusterAttempt(tc, attempts);
    Map<TaskId, TaskAttempt> taskAttempts = new HashMap<>();
    Map<TaskId, LValueConstraintExpression> locationMap = new HashMap<>();
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = new TaskAttempt(tcAttempt, new TaskAttemptId(new TaskId(tid.getActivityId(), tid.getPartition()), attempts), ts);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.INITIALIZED, null);
        locationMap.put(tid, new PartitionLocationExpression(tid.getActivityId().getOperatorDescriptorId(), tid.getPartition()));
        taskAttempts.put(tid, taskAttempt);
    }
    tcAttempt.setTaskAttempts(taskAttempts);
    solver.solve(locationMap.values());
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = taskAttempts.get(tid);
        String nodeId = assignLocation(acg, locationMap, tid, taskAttempt);
        taskAttempt.setNodeId(nodeId);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.RUNNING, null);
        taskAttempt.setStartTime(System.currentTimeMillis());
        List<TaskAttemptDescriptor> tads = taskAttemptMap.get(nodeId);
        if (tads == null) {
            tads = new ArrayList<>();
            taskAttemptMap.put(nodeId, tads);
        }
        OperatorDescriptorId opId = tid.getActivityId().getOperatorDescriptorId();
        jobRun.registerOperatorLocation(opId, tid.getPartition(), nodeId);
        ActivityPartitionDetails apd = ts.getActivityPlan().getActivityPartitionDetails();
        TaskAttemptDescriptor tad = new TaskAttemptDescriptor(taskAttempt.getTaskAttemptId(), apd.getPartitionCount(), apd.getInputPartitionCounts(), apd.getOutputPartitionCounts());
        tads.add(tad);
    }
    tcAttempt.initializePendingTaskCounter();
    tcAttempts.add(tcAttempt);
    /**
         * Improvement for reducing master/slave message communications, for each TaskAttemptDescriptor,
         * we set the NetworkAddress[][] partitionLocations, in which each row is for an incoming connector descriptor
         * and each column is for an input channel of the connector.
         */
    INodeManager nodeManager = ccs.getNodeManager();
    for (Map.Entry<String, List<TaskAttemptDescriptor>> e : taskAttemptMap.entrySet()) {
        List<TaskAttemptDescriptor> tads = e.getValue();
        for (TaskAttemptDescriptor tad : tads) {
            TaskAttemptId taid = tad.getTaskAttemptId();
            int attempt = taid.getAttempt();
            TaskId tid = taid.getTaskId();
            ActivityId aid = tid.getActivityId();
            List<IConnectorDescriptor> inConnectors = acg.getActivityInputs(aid);
            int[] inPartitionCounts = tad.getInputPartitionCounts();
            if (inPartitionCounts == null) {
                continue;
            }
            NetworkAddress[][] partitionLocations = new NetworkAddress[inPartitionCounts.length][];
            for (int i = 0; i < inPartitionCounts.length; ++i) {
                ConnectorDescriptorId cdId = inConnectors.get(i).getConnectorId();
                IConnectorPolicy policy = jobRun.getConnectorPolicyMap().get(cdId);
                /**
                     * carry sender location information into a task
                     * when it is not the case that it is an re-attempt and the send-side
                     * is materialized blocking.
                     */
                if (attempt > 0 && policy.materializeOnSendSide() && policy.consumerWaitsForProducerToFinish()) {
                    continue;
                }
                ActivityId producerAid = acg.getProducerActivity(cdId);
                partitionLocations[i] = new NetworkAddress[inPartitionCounts[i]];
                for (int j = 0; j < inPartitionCounts[i]; ++j) {
                    TaskId producerTaskId = new TaskId(producerAid, j);
                    String nodeId = findTaskLocation(producerTaskId);
                    partitionLocations[i][j] = nodeManager.getNodeControllerState(nodeId).getDataPort();
                }
            }
            tad.setInputPartitionLocations(partitionLocations);
        }
    }
    tcAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.RUNNING);
    tcAttempt.setStartTime(System.currentTimeMillis());
    inProgressTaskClusters.add(tc);
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) NetworkAddress(org.apache.hyracks.api.comm.NetworkAddress) ArrayList(java.util.ArrayList) List(java.util.List) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) PartitionLocationExpression(org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) Constraint(org.apache.hyracks.api.constraints.Constraint) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ActivityClusterGraph (org.apache.hyracks.api.job.ActivityClusterGraph)12 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)7 HashMap (java.util.HashMap)6 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 ActivityCluster (org.apache.hyracks.api.job.ActivityCluster)5 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)4 IActivity (org.apache.hyracks.api.dataflow.IActivity)4 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)4 Pair (org.apache.commons.lang3.tuple.Pair)3 TaskId (org.apache.hyracks.api.dataflow.TaskId)3 IConnectorPolicy (org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy)3 INodeManager (org.apache.hyracks.control.cc.cluster.INodeManager)3 TaskAttemptDescriptor (org.apache.hyracks.control.common.job.TaskAttemptDescriptor)3 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Constraint (org.apache.hyracks.api.constraints.Constraint)2 TaskAttemptId (org.apache.hyracks.api.dataflow.TaskAttemptId)2 IRecordDescriptorProvider (org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider)2