Search in sources :

Example 11 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobSpecificationActivityClusterGraphGeneratorFactory method createActivityClusterGraphGenerator.

@Override
public IActivityClusterGraphGenerator createActivityClusterGraphGenerator(JobId jobId, final ICCServiceContext ccServiceCtx, Set<JobFlag> jobFlags) throws HyracksException {
    final JobActivityGraphBuilder builder = new JobActivityGraphBuilder(spec, jobFlags);
    PlanUtils.visit(spec, new IConnectorDescriptorVisitor() {

        @Override
        public void visit(IConnectorDescriptor conn) throws HyracksException {
            builder.addConnector(conn);
        }
    });
    PlanUtils.visit(spec, new IOperatorDescriptorVisitor() {

        @Override
        public void visit(IOperatorDescriptor op) {
            op.contributeActivities(builder);
        }
    });
    builder.finish();
    final JobActivityGraph jag = builder.getActivityGraph();
    ActivityClusterGraphBuilder acgb = new ActivityClusterGraphBuilder();
    final ActivityClusterGraph acg = acgb.inferActivityClusters(jobId, jag);
    acg.setFrameSize(spec.getFrameSize());
    acg.setMaxReattempts(spec.getMaxReattempts());
    acg.setJobletEventListenerFactory(spec.getJobletEventListenerFactory());
    acg.setGlobalJobDataFactory(spec.getGlobalJobDataFactory());
    acg.setConnectorPolicyAssignmentPolicy(spec.getConnectorPolicyAssignmentPolicy());
    acg.setUseConnectorPolicyForScheduling(spec.isUseConnectorPolicyForScheduling());
    final Set<Constraint> constraints = new HashSet<>();
    final IConstraintAcceptor acceptor = new IConstraintAcceptor() {

        @Override
        public void addConstraint(Constraint constraint) {
            constraints.add(constraint);
        }
    };
    PlanUtils.visit(spec, new IOperatorDescriptorVisitor() {

        @Override
        public void visit(IOperatorDescriptor op) {
            op.contributeSchedulingConstraints(acceptor, ccServiceCtx);
        }
    });
    PlanUtils.visit(spec, new IConnectorDescriptorVisitor() {

        @Override
        public void visit(IConnectorDescriptor conn) {
            conn.contributeSchedulingConstraints(acceptor, acg.getConnectorMap().get(conn.getConnectorId()), ccServiceCtx);
        }
    });
    constraints.addAll(spec.getUserConstraints());
    return new IActivityClusterGraphGenerator() {

        @Override
        public ActivityClusterGraph initialize() {
            ActivityClusterGraphRewriter rewriter = new ActivityClusterGraphRewriter();
            rewriter.rewrite(acg);
            return acg;
        }

        @Override
        public Set<Constraint> getConstraints() {
            return constraints;
        }
    };
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Constraint(org.apache.hyracks.api.constraints.Constraint) IConstraintAcceptor(org.apache.hyracks.api.constraints.IConstraintAcceptor) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) IActivityClusterGraphGenerator(org.apache.hyracks.api.job.IActivityClusterGraphGenerator) JobActivityGraph(org.apache.hyracks.api.job.JobActivityGraph) ActivityClusterGraphRewriter(org.apache.hyracks.api.rewriter.ActivityClusterGraphRewriter) HashSet(java.util.HashSet)

Example 12 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class ActivityClusterGraphBuilder method findMergePair.

private static Pair<ActivityId, ActivityId> findMergePair(JobActivityGraph jag, Set<Set<ActivityId>> eqSets) {
    for (Set<ActivityId> eqSet : eqSets) {
        for (ActivityId t : eqSet) {
            List<IConnectorDescriptor> inputList = jag.getActivityInputMap().get(t);
            if (inputList != null) {
                for (IConnectorDescriptor conn : inputList) {
                    ActivityId inTask = jag.getProducerActivity(conn.getConnectorId());
                    if (!eqSet.contains(inTask)) {
                        return Pair.<ActivityId, ActivityId>of(t, inTask);
                    }
                }
            }
            List<IConnectorDescriptor> outputList = jag.getActivityOutputMap().get(t);
            if (outputList != null) {
                for (IConnectorDescriptor conn : outputList) {
                    ActivityId outTask = jag.getConsumerActivity(conn.getConnectorId());
                    if (!eqSet.contains(outTask)) {
                        return Pair.<ActivityId, ActivityId>of(t, outTask);
                    }
                }
            }
        }
    }
    return null;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId)

Example 13 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class ActivityClusterGraphBuilder method inferActivityClusters.

public ActivityClusterGraph inferActivityClusters(JobId jobId, JobActivityGraph jag) {
    /*
         * Build initial equivalence sets map. We create a map such that for each IOperatorTask, t -> { t }
         */
    Map<ActivityId, Set<ActivityId>> stageMap = new HashMap<ActivityId, Set<ActivityId>>();
    Set<Set<ActivityId>> stages = new HashSet<Set<ActivityId>>();
    for (ActivityId taskId : jag.getActivityMap().keySet()) {
        Set<ActivityId> eqSet = new HashSet<ActivityId>();
        eqSet.add(taskId);
        stageMap.put(taskId, eqSet);
        stages.add(eqSet);
    }
    boolean changed = true;
    while (changed) {
        changed = false;
        Pair<ActivityId, ActivityId> pair = findMergePair(jag, stages);
        if (pair != null) {
            merge(stageMap, stages, pair.getLeft(), pair.getRight());
            changed = true;
        }
    }
    ActivityClusterGraph acg = new ActivityClusterGraph();
    Map<ActivityId, ActivityCluster> acMap = new HashMap<ActivityId, ActivityCluster>();
    int acCounter = 0;
    Map<ActivityId, IActivity> activityNodeMap = jag.getActivityMap();
    List<ActivityCluster> acList = new ArrayList<ActivityCluster>();
    for (Set<ActivityId> stage : stages) {
        ActivityCluster ac = new ActivityCluster(acg, new ActivityClusterId(jobId, acCounter++));
        acList.add(ac);
        for (ActivityId aid : stage) {
            IActivity activity = activityNodeMap.get(aid);
            ac.addActivity(activity);
            acMap.put(aid, ac);
        }
    }
    for (Set<ActivityId> stage : stages) {
        for (ActivityId aid : stage) {
            IActivity activity = activityNodeMap.get(aid);
            ActivityCluster ac = acMap.get(aid);
            List<IConnectorDescriptor> aOutputs = jag.getActivityOutputMap().get(aid);
            if (aOutputs == null || aOutputs.isEmpty()) {
                ac.addRoot(activity);
            } else {
                int nActivityOutputs = aOutputs.size();
                for (int i = 0; i < nActivityOutputs; ++i) {
                    IConnectorDescriptor conn = aOutputs.get(i);
                    ac.addConnector(conn);
                    Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> pcPair = jag.getConnectorActivityMap().get(conn.getConnectorId());
                    ac.connect(conn, activity, i, pcPair.getRight().getLeft(), pcPair.getRight().getRight(), jag.getConnectorRecordDescriptorMap().get(conn.getConnectorId()));
                }
            }
        }
    }
    Map<ActivityId, Set<ActivityId>> blocked2BlockerMap = jag.getBlocked2BlockerMap();
    for (ActivityCluster s : acList) {
        Map<ActivityId, Set<ActivityId>> acBlocked2BlockerMap = s.getBlocked2BlockerMap();
        Set<ActivityCluster> blockerStages = new HashSet<ActivityCluster>();
        for (ActivityId t : s.getActivityMap().keySet()) {
            Set<ActivityId> blockerTasks = blocked2BlockerMap.get(t);
            acBlocked2BlockerMap.put(t, blockerTasks);
            if (blockerTasks != null) {
                for (ActivityId bt : blockerTasks) {
                    blockerStages.add(acMap.get(bt));
                }
            }
        }
        for (ActivityCluster bs : blockerStages) {
            s.getDependencies().add(bs);
        }
    }
    acg.addActivityClusters(acList);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine(acg.toJSON().asText());
    }
    return acg;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) IActivity(org.apache.hyracks.api.dataflow.IActivity) ActivityClusterId(org.apache.hyracks.api.job.ActivityClusterId) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) HashSet(java.util.HashSet) Pair(org.apache.commons.lang3.tuple.Pair)

Example 14 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class SuperActivityOperatorNodePushable method init.

private void init() throws HyracksDataException {
    Queue<Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> childQueue = new LinkedList<>();
    List<IConnectorDescriptor> outputConnectors;
    /*
         * Set up the source operators
         */
    for (Entry<ActivityId, IActivity> entry : startActivities.entrySet()) {
        IOperatorNodePushable opPushable = entry.getValue().createPushRuntime(ctx, recordDescProvider, partition, nPartitions);
        operatorNodePushablesBFSOrder.add(opPushable);
        operatorNodePushables.put(entry.getKey(), opPushable);
        inputArity += opPushable.getInputArity();
        outputConnectors = MapUtils.getObject(parent.getActivityOutputMap(), entry.getKey(), Collections.emptyList());
        for (IConnectorDescriptor conn : outputConnectors) {
            childQueue.add(parent.getConnectorActivityMap().get(conn.getConnectorId()));
        }
    }
    /*
         * Using BFS (breadth-first search) to construct to runtime execution DAG...
         */
    while (!childQueue.isEmpty()) {
        /*
             * construct the source to destination information
             */
        Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> channel = childQueue.poll();
        ActivityId sourceId = channel.getLeft().getLeft().getActivityId();
        int outputChannel = channel.getLeft().getRight();
        ActivityId destId = channel.getRight().getLeft().getActivityId();
        int inputChannel = channel.getRight().getRight();
        IOperatorNodePushable sourceOp = operatorNodePushables.get(sourceId);
        IOperatorNodePushable destOp = operatorNodePushables.get(destId);
        if (destOp == null) {
            destOp = channel.getRight().getLeft().createPushRuntime(ctx, recordDescProvider, partition, nPartitions);
            operatorNodePushablesBFSOrder.add(destOp);
            operatorNodePushables.put(destId, destOp);
        }
        /*
             * construct the dataflow connection from a producer to a consumer
             */
        sourceOp.setOutputFrameWriter(outputChannel, destOp.getInputFrameWriter(inputChannel), recordDescProvider.getInputRecordDescriptor(destId, inputChannel));
        /*
             * traverse to the child of the current activity
             */
        outputConnectors = MapUtils.getObject(parent.getActivityOutputMap(), destId, Collections.emptyList());
        /*
             * expend the executing activities further to the downstream
             */
        for (IConnectorDescriptor conn : outputConnectors) {
            if (conn != null) {
                childQueue.add(parent.getConnectorActivityMap().get(conn.getConnectorId()));
            }
        }
    }
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IActivity(org.apache.hyracks.api.dataflow.IActivity) IOperatorNodePushable(org.apache.hyracks.api.dataflow.IOperatorNodePushable) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) LinkedList(java.util.LinkedList) Pair(org.apache.commons.lang3.tuple.Pair)

Example 15 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class RangePartitionExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int n = partitioningFields.size();
    int[] sortFields = new int[n];
    IBinaryComparatorFactory[] comps = new IBinaryComparatorFactory[n];
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    INormalizedKeyComputerFactory nkcf = null;
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    int i = 0;
    for (OrderColumn oc : partitioningFields) {
        LogicalVariable var = oc.getColumn();
        sortFields[i] = opSchema.findVariable(var);
        Object type = env.getVarType(var);
        OrderKind order = oc.getOrder();
        if (i == 0 && nkcfProvider != null && type != null) {
            nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, order == OrderKind.ASC);
        }
        IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
        comps[i] = bcfp.getBinaryComparatorFactory(type, oc.getOrder() == OrderKind.ASC);
        i++;
    }
    ITuplePartitionComputerFactory tpcf = new FieldRangePartitionComputerFactory(sortFields, comps, rangeMap);
    IConnectorDescriptor conn = new MToNPartitioningConnectorDescriptor(spec, tpcf);
    return new Pair<IConnectorDescriptor, TargetConstraint>(conn, null);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) FieldRangePartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.range.FieldRangePartitionComputerFactory) OrderColumn(org.apache.hyracks.algebricks.core.algebra.properties.OrderColumn) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) OrderKind(org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder.OrderKind) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18