Search in sources :

Example 61 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class ActivityClusterPlanner method computePartitionCounts.

private Map<ActivityId, ActivityPartitionDetails> computePartitionCounts(ActivityCluster ac) throws HyracksException {
    PartitionConstraintSolver solver = executor.getSolver();
    Set<LValueConstraintExpression> lValues = new HashSet<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        lValues.add(new PartitionCountExpression(anId.getOperatorDescriptorId()));
    }
    solver.solve(lValues);
    Map<OperatorDescriptorId, Integer> nPartMap = new HashMap<>();
    for (LValueConstraintExpression lv : lValues) {
        Object value = solver.getValue(lv);
        if (value == null) {
            throw new HyracksException("No value found for " + lv);
        }
        if (!(value instanceof Number)) {
            throw new HyracksException("Unexpected type of value bound to " + lv + ": " + value.getClass() + "(" + value + ")");
        }
        int nParts = ((Number) value).intValue();
        if (nParts <= 0) {
            throw new HyracksException("Unsatisfiable number of partitions for " + lv + ": " + nParts);
        }
        nPartMap.put(((PartitionCountExpression) lv).getOperatorDescriptorId(), nParts);
    }
    Map<ActivityId, ActivityPartitionDetails> activityPartsMap = new HashMap<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        int nParts = nPartMap.get(anId.getOperatorDescriptorId());
        int[] nInputPartitions = null;
        List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(anId);
        if (inputs != null) {
            nInputPartitions = new int[inputs.size()];
            for (int i = 0; i < nInputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = inputs.get(i).getConnectorId();
                ActivityId aid = ac.getProducerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nInputPartitions[i] = nPartInt;
            }
        }
        int[] nOutputPartitions = null;
        List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(anId);
        if (outputs != null) {
            nOutputPartitions = new int[outputs.size()];
            for (int i = 0; i < nOutputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = outputs.get(i).getConnectorId();
                ActivityId aid = ac.getConsumerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nOutputPartitions[i] = nPartInt;
            }
        }
        ActivityPartitionDetails apd = new ActivityPartitionDetails(nParts, nInputPartitions, nOutputPartitions);
        activityPartsMap.put(anId, apd);
    }
    return activityPartsMap;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) PartitionCountExpression(org.apache.hyracks.api.constraints.expressions.PartitionCountExpression) HashSet(java.util.HashSet)

Example 62 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class ActivityClusterPlanner method computeTaskConnectivity.

private Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> computeTaskConnectivity(JobRun jobRun, Map<ActivityId, ActivityPlan> activityPlanMap, Set<ActivityId> activities) {
    Map<TaskId, List<Pair<TaskId, ConnectorDescriptorId>>> taskConnectivity = new HashMap<>();
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    BitSet targetBitmap = new BitSet();
    for (ActivityId ac1 : activities) {
        ActivityCluster ac = acg.getActivityMap().get(ac1);
        Task[] ac1TaskStates = activityPlanMap.get(ac1).getTasks();
        int nProducers = ac1TaskStates.length;
        List<IConnectorDescriptor> outputConns = ac.getActivityOutputMap().get(ac1);
        if (outputConns == null) {
            continue;
        }
        for (IConnectorDescriptor c : outputConns) {
            ConnectorDescriptorId cdId = c.getConnectorId();
            ActivityId ac2 = ac.getConsumerActivity(cdId);
            Task[] ac2TaskStates = activityPlanMap.get(ac2).getTasks();
            int nConsumers = ac2TaskStates.length;
            if (c.allProducersToAllConsumers()) {
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = new ArrayList<>();
                for (int j = 0; j < nConsumers; j++) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
                for (int i = 0; i < nProducers; ++i) {
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                continue;
            }
            for (int i = 0; i < nProducers; ++i) {
                c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
                List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i].getTaskId());
                if (cInfoList == null) {
                    cInfoList = new ArrayList<>();
                    taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
                }
                for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
                    TaskId targetTID = ac2TaskStates[j].getTaskId();
                    cInfoList.add(Pair.of(targetTID, cdId));
                }
            }
        }
    }
    return taskConnectivity;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.commons.lang3.tuple.Pair)

Example 63 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobExecutor method assignTaskLocations.

private void assignTaskLocations(TaskCluster tc, Map<String, List<TaskAttemptDescriptor>> taskAttemptMap) throws HyracksException {
    ActivityClusterGraph acg = jobRun.getActivityClusterGraph();
    Task[] tasks = tc.getTasks();
    List<TaskClusterAttempt> tcAttempts = tc.getAttempts();
    int attempts = tcAttempts.size();
    TaskClusterAttempt tcAttempt = new TaskClusterAttempt(tc, attempts);
    Map<TaskId, TaskAttempt> taskAttempts = new HashMap<>();
    Map<TaskId, LValueConstraintExpression> locationMap = new HashMap<>();
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = new TaskAttempt(tcAttempt, new TaskAttemptId(new TaskId(tid.getActivityId(), tid.getPartition()), attempts), ts);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.INITIALIZED, null);
        locationMap.put(tid, new PartitionLocationExpression(tid.getActivityId().getOperatorDescriptorId(), tid.getPartition()));
        taskAttempts.put(tid, taskAttempt);
    }
    tcAttempt.setTaskAttempts(taskAttempts);
    solver.solve(locationMap.values());
    for (int i = 0; i < tasks.length; ++i) {
        Task ts = tasks[i];
        TaskId tid = ts.getTaskId();
        TaskAttempt taskAttempt = taskAttempts.get(tid);
        String nodeId = assignLocation(acg, locationMap, tid, taskAttempt);
        taskAttempt.setNodeId(nodeId);
        taskAttempt.setStatus(TaskAttempt.TaskStatus.RUNNING, null);
        taskAttempt.setStartTime(System.currentTimeMillis());
        List<TaskAttemptDescriptor> tads = taskAttemptMap.get(nodeId);
        if (tads == null) {
            tads = new ArrayList<>();
            taskAttemptMap.put(nodeId, tads);
        }
        OperatorDescriptorId opId = tid.getActivityId().getOperatorDescriptorId();
        jobRun.registerOperatorLocation(opId, tid.getPartition(), nodeId);
        ActivityPartitionDetails apd = ts.getActivityPlan().getActivityPartitionDetails();
        TaskAttemptDescriptor tad = new TaskAttemptDescriptor(taskAttempt.getTaskAttemptId(), apd.getPartitionCount(), apd.getInputPartitionCounts(), apd.getOutputPartitionCounts());
        tads.add(tad);
    }
    tcAttempt.initializePendingTaskCounter();
    tcAttempts.add(tcAttempt);
    /**
         * Improvement for reducing master/slave message communications, for each TaskAttemptDescriptor,
         * we set the NetworkAddress[][] partitionLocations, in which each row is for an incoming connector descriptor
         * and each column is for an input channel of the connector.
         */
    INodeManager nodeManager = ccs.getNodeManager();
    for (Map.Entry<String, List<TaskAttemptDescriptor>> e : taskAttemptMap.entrySet()) {
        List<TaskAttemptDescriptor> tads = e.getValue();
        for (TaskAttemptDescriptor tad : tads) {
            TaskAttemptId taid = tad.getTaskAttemptId();
            int attempt = taid.getAttempt();
            TaskId tid = taid.getTaskId();
            ActivityId aid = tid.getActivityId();
            List<IConnectorDescriptor> inConnectors = acg.getActivityInputs(aid);
            int[] inPartitionCounts = tad.getInputPartitionCounts();
            if (inPartitionCounts == null) {
                continue;
            }
            NetworkAddress[][] partitionLocations = new NetworkAddress[inPartitionCounts.length][];
            for (int i = 0; i < inPartitionCounts.length; ++i) {
                ConnectorDescriptorId cdId = inConnectors.get(i).getConnectorId();
                IConnectorPolicy policy = jobRun.getConnectorPolicyMap().get(cdId);
                /**
                     * carry sender location information into a task
                     * when it is not the case that it is an re-attempt and the send-side
                     * is materialized blocking.
                     */
                if (attempt > 0 && policy.materializeOnSendSide() && policy.consumerWaitsForProducerToFinish()) {
                    continue;
                }
                ActivityId producerAid = acg.getProducerActivity(cdId);
                partitionLocations[i] = new NetworkAddress[inPartitionCounts[i]];
                for (int j = 0; j < inPartitionCounts[i]; ++j) {
                    TaskId producerTaskId = new TaskId(producerAid, j);
                    String nodeId = findTaskLocation(producerTaskId);
                    partitionLocations[i][j] = nodeManager.getNodeControllerState(nodeId).getDataPort();
                }
            }
            tad.setInputPartitionLocations(partitionLocations);
        }
    }
    tcAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.RUNNING);
    tcAttempt.setStartTime(System.currentTimeMillis());
    inProgressTaskClusters.add(tc);
}
Also used : INodeManager(org.apache.hyracks.control.cc.cluster.INodeManager) Task(org.apache.hyracks.control.cc.job.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) TaskClusterAttempt(org.apache.hyracks.control.cc.job.TaskClusterAttempt) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) NetworkAddress(org.apache.hyracks.api.comm.NetworkAddress) ArrayList(java.util.ArrayList) List(java.util.List) TaskAttempt(org.apache.hyracks.control.cc.job.TaskAttempt) PartitionLocationExpression(org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) Constraint(org.apache.hyracks.api.constraints.Constraint) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HashMap(java.util.HashMap) Map(java.util.Map)

Example 64 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class HashPartitionExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int[] keys = new int[hashFields.size()];
    IBinaryHashFunctionFactory[] hashFunctionFactories = new IBinaryHashFunctionFactory[hashFields.size()];
    int i = 0;
    IBinaryHashFunctionFactoryProvider hashFunProvider = context.getBinaryHashFunctionFactoryProvider();
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    for (LogicalVariable v : hashFields) {
        keys[i] = opSchema.findVariable(v);
        hashFunctionFactories[i] = hashFunProvider.getBinaryHashFunctionFactory(env.getVarType(v));
        ++i;
    }
    ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(keys, hashFunctionFactories);
    IConnectorDescriptor conn = new MToNPartitioningConnectorDescriptor(spec, tpcf);
    return new Pair<>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IBinaryHashFunctionFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 65 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class HashPartitionMergeExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int[] keys = new int[partitionFields.size()];
    IBinaryHashFunctionFactory[] hashFunctionFactories = new IBinaryHashFunctionFactory[partitionFields.size()];
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    {
        int i = 0;
        IBinaryHashFunctionFactoryProvider hashFunProvider = context.getBinaryHashFunctionFactoryProvider();
        for (LogicalVariable v : partitionFields) {
            keys[i] = opSchema.findVariable(v);
            hashFunctionFactories[i] = hashFunProvider.getBinaryHashFunctionFactory(env.getVarType(v));
            ++i;
        }
    }
    ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(keys, hashFunctionFactories);
    int n = orderColumns.size();
    int[] sortFields = new int[n];
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[n];
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    INormalizedKeyComputerFactory nkcf = null;
    int j = 0;
    for (OrderColumn oc : orderColumns) {
        LogicalVariable var = oc.getColumn();
        sortFields[j] = opSchema.findVariable(var);
        Object type = env.getVarType(var);
        IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
        comparatorFactories[j] = bcfp.getBinaryComparatorFactory(type, oc.getOrder() == OrderKind.ASC);
        if (j == 0 && nkcfProvider != null && type != null) {
            nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, oc.getOrder() == OrderKind.ASC);
        }
        j++;
    }
    IConnectorDescriptor conn = new MToNPartitioningMergingConnectorDescriptor(spec, tpcf, sortFields, comparatorFactories, nkcf);
    return new Pair<IConnectorDescriptor, TargetConstraint>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IBinaryHashFunctionFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) OrderColumn(org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18