Search in sources :

Example 6 with OperatorDescriptorId

use of org.apache.hyracks.api.dataflow.OperatorDescriptorId in project asterixdb by apache.

the class FeedOperations method combineIntakeCollectJobs.

private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException {
    JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize());
    // copy ingestor
    FeedIntakeOperatorDescriptor firstOp = (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0));
    FeedIntakeOperatorDescriptor ingestionOp;
    if (firstOp.getAdaptorFactory() == null) {
        ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorLibraryName(), firstOp.getAdaptorFactoryClassName(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
    } else {
        ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorFactory(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
    }
    // create replicator
    ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(jobSpec, ingestionOp.getOutputRecordDescriptors()[0], jobsList.size());
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations);
    // Loop over the jobs to copy operators and connections
    Map<OperatorDescriptorId, OperatorDescriptorId> operatorIdMapping = new HashMap<>();
    Map<ConnectorDescriptorId, ConnectorDescriptorId> connectorIdMapping = new HashMap<>();
    Map<OperatorDescriptorId, List<LocationConstraint>> operatorLocations = new HashMap<>();
    Map<OperatorDescriptorId, Integer> operatorCounts = new HashMap<>();
    List<JobId> jobIds = new ArrayList<>();
    FeedMetaOperatorDescriptor metaOp;
    for (int iter1 = 0; iter1 < jobsList.size(); iter1++) {
        FeedConnection curFeedConnection = feedConnections.get(iter1);
        JobSpecification subJob = jobsList.get(iter1);
        operatorIdMapping.clear();
        Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap();
        String datasetName = feedConnections.get(iter1).getDatasetName();
        FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName);
        FeedPolicyEntity feedPolicyEntity = FeedMetadataUtil.validateIfPolicyExists(curFeedConnection.getDataverseName(), curFeedConnection.getPolicyName(), metadataProvider.getMetadataTxnContext());
        for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> entry : operatorsMap.entrySet()) {
            IOperatorDescriptor opDesc = entry.getValue();
            OperatorDescriptorId oldId = opDesc.getOperatorId();
            OperatorDescriptorId opId = null;
            if (opDesc instanceof LSMTreeInsertDeleteOperatorDescriptor && ((LSMTreeInsertDeleteOperatorDescriptor) opDesc).isPrimary()) {
                metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.STORE);
                opId = metaOp.getOperatorId();
                opDesc.setOperatorId(opId);
            } else {
                if (opDesc instanceof AlgebricksMetaOperatorDescriptor) {
                    AlgebricksMetaOperatorDescriptor algOp = (AlgebricksMetaOperatorDescriptor) opDesc;
                    IPushRuntimeFactory[] runtimeFactories = algOp.getPipeline().getRuntimeFactories();
                    // Tweak AssignOp to work with messages
                    if (runtimeFactories[0] instanceof AssignRuntimeFactory && runtimeFactories.length > 1) {
                        IConnectorDescriptor connectorDesc = subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0);
                        // anything on the network interface needs to be message compatible
                        if (connectorDesc instanceof MToNPartitioningConnectorDescriptor) {
                            metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.COMPUTE);
                            opId = metaOp.getOperatorId();
                            opDesc.setOperatorId(opId);
                        }
                    }
                }
                if (opId == null) {
                    opId = jobSpec.createOperatorDescriptorId(opDesc);
                }
            }
            operatorIdMapping.put(oldId, opId);
        }
        // copy connectors
        connectorIdMapping.clear();
        for (Entry<ConnectorDescriptorId, IConnectorDescriptor> entry : subJob.getConnectorMap().entrySet()) {
            IConnectorDescriptor connDesc = entry.getValue();
            ConnectorDescriptorId newConnId;
            if (connDesc instanceof MToNPartitioningConnectorDescriptor) {
                MToNPartitioningConnectorDescriptor m2nConn = (MToNPartitioningConnectorDescriptor) connDesc;
                connDesc = new MToNPartitioningWithMessageConnectorDescriptor(jobSpec, m2nConn.getTuplePartitionComputerFactory());
                newConnId = connDesc.getConnectorId();
            } else {
                newConnId = jobSpec.createConnectorDescriptor(connDesc);
            }
            connectorIdMapping.put(entry.getKey(), newConnId);
        }
        // make connections between operators
        for (Entry<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> entry : subJob.getConnectorOperatorMap().entrySet()) {
            ConnectorDescriptorId newId = connectorIdMapping.get(entry.getKey());
            IConnectorDescriptor connDesc = jobSpec.getConnectorMap().get(newId);
            Pair<IOperatorDescriptor, Integer> leftOp = entry.getValue().getLeft();
            Pair<IOperatorDescriptor, Integer> rightOp = entry.getValue().getRight();
            IOperatorDescriptor leftOpDesc = jobSpec.getOperatorMap().get(leftOp.getLeft().getOperatorId());
            IOperatorDescriptor rightOpDesc = jobSpec.getOperatorMap().get(rightOp.getLeft().getOperatorId());
            if (leftOp.getLeft() instanceof FeedCollectOperatorDescriptor) {
                jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), replicateOp, iter1, leftOpDesc, leftOp.getRight());
            }
            jobSpec.connect(connDesc, leftOpDesc, leftOp.getRight(), rightOpDesc, rightOp.getRight());
        }
        // prepare for setting partition constraints
        operatorLocations.clear();
        operatorCounts.clear();
        for (Constraint constraint : subJob.getUserConstraints()) {
            LValueConstraintExpression lexpr = constraint.getLValue();
            ConstraintExpression cexpr = constraint.getRValue();
            OperatorDescriptorId opId;
            switch(lexpr.getTag()) {
                case PARTITION_COUNT:
                    opId = ((PartitionCountExpression) lexpr).getOperatorDescriptorId();
                    operatorCounts.put(operatorIdMapping.get(opId), (int) ((ConstantExpression) cexpr).getValue());
                    break;
                case PARTITION_LOCATION:
                    opId = ((PartitionLocationExpression) lexpr).getOperatorDescriptorId();
                    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(operatorIdMapping.get(opId));
                    List<LocationConstraint> locations = operatorLocations.get(opDesc.getOperatorId());
                    if (locations == null) {
                        locations = new ArrayList<>();
                        operatorLocations.put(opDesc.getOperatorId(), locations);
                    }
                    String location = (String) ((ConstantExpression) cexpr).getValue();
                    LocationConstraint lc = new LocationConstraint(location, ((PartitionLocationExpression) lexpr).getPartition());
                    locations.add(lc);
                    break;
                default:
                    break;
            }
        }
        // set absolute location constraints
        for (Entry<OperatorDescriptorId, List<LocationConstraint>> entry : operatorLocations.entrySet()) {
            IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
            // why do we need to sort?
            Collections.sort(entry.getValue(), (LocationConstraint o1, LocationConstraint o2) -> {
                return o1.partition - o2.partition;
            });
            String[] locations = new String[entry.getValue().size()];
            for (int j = 0; j < locations.length; ++j) {
                locations[j] = entry.getValue().get(j).location;
            }
            PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, opDesc, locations);
        }
        // set count constraints
        for (Entry<OperatorDescriptorId, Integer> entry : operatorCounts.entrySet()) {
            IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
            if (!operatorLocations.keySet().contains(entry.getKey())) {
                PartitionConstraintHelper.addPartitionCountConstraint(jobSpec, opDesc, entry.getValue());
            }
        }
        // roots
        for (OperatorDescriptorId root : subJob.getRoots()) {
            jobSpec.addRoot(jobSpec.getOperatorMap().get(operatorIdMapping.get(root)));
        }
        jobIds.add(((JobEventListenerFactory) subJob.getJobletEventListenerFactory()).getJobId());
    }
    // jobEventListenerFactory
    jobSpec.setJobletEventListenerFactory(new MultiTransactionJobletEventListenerFactory(jobIds, true));
    // useConnectorSchedulingPolicy
    jobSpec.setUseConnectorPolicyForScheduling(jobsList.get(0).isUseConnectorPolicyForScheduling());
    // connectorAssignmentPolicy
    jobSpec.setConnectorPolicyAssignmentPolicy(jobsList.get(0).getConnectorPolicyAssignmentPolicy());
    return jobSpec;
}
Also used : HashMap(java.util.HashMap) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Constraint(org.apache.hyracks.api.constraints.Constraint) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) ConstantExpression(org.apache.hyracks.api.constraints.expressions.ConstantExpression) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) ArrayList(java.util.ArrayList) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) List(java.util.List) ArrayList(java.util.ArrayList) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) ReplicateOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ReplicateOperatorDescriptor) Map(java.util.Map) HashMap(java.util.HashMap) FeedCollectOperatorDescriptor(org.apache.asterix.external.operators.FeedCollectOperatorDescriptor) FeedPolicyEntity(org.apache.asterix.metadata.entities.FeedPolicyEntity) JobSpecification(org.apache.hyracks.api.job.JobSpecification) FeedIntakeOperatorDescriptor(org.apache.asterix.external.operators.FeedIntakeOperatorDescriptor) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.commons.lang3.tuple.Pair) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MToNPartitioningWithMessageConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningWithMessageConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) ConstraintExpression(org.apache.hyracks.api.constraints.expressions.ConstraintExpression) FeedConnection(org.apache.asterix.metadata.entities.FeedConnection) MultiTransactionJobletEventListenerFactory(org.apache.asterix.runtime.job.listener.MultiTransactionJobletEventListenerFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Constraint(org.apache.hyracks.api.constraints.Constraint) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) FeedMetaOperatorDescriptor(org.apache.asterix.external.operators.FeedMetaOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FeedConnectionId(org.apache.asterix.external.feed.management.FeedConnectionId) LSMTreeInsertDeleteOperatorDescriptor(org.apache.asterix.common.dataflow.LSMTreeInsertDeleteOperatorDescriptor)

Example 7 with OperatorDescriptorId

use of org.apache.hyracks.api.dataflow.OperatorDescriptorId in project asterixdb by apache.

the class FramewriterTest method createWriters.

/**
     * @return a list of writers to test. these writers can be of the same type but behave differently based on included mocks
     * @throws HyracksDataException
     * @throws IndexException
     */
public IFrameWriter[] createWriters() throws HyracksDataException {
    ArrayList<BTreeSearchOperatorNodePushable> writers = new ArrayList<>();
    Pair<IIndexDataflowHelperFactory, ISearchOperationCallbackFactory>[] pairs = pairs();
    IRecordDescriptorProvider[] recordDescProviders = mockRecDescProviders();
    int partition = 0;
    IHyracksTaskContext[] ctxs = mockIHyracksTaskContext();
    int[] keys = { 0 };
    boolean lowKeyInclusive = true;
    boolean highKeyInclusive = true;
    for (Pair<IIndexDataflowHelperFactory, ISearchOperationCallbackFactory> pair : pairs) {
        for (IRecordDescriptorProvider recordDescProvider : recordDescProviders) {
            for (IHyracksTaskContext ctx : ctxs) {
                BTreeSearchOperatorNodePushable writer = new BTreeSearchOperatorNodePushable(ctx, partition, recordDescProvider.getInputRecordDescriptor(new ActivityId(new OperatorDescriptorId(0), 0), 0), keys, keys, lowKeyInclusive, highKeyInclusive, keys, keys, pair.getLeft(), false, false, null, pair.getRight(), false);
                writers.add(writer);
            }
        }
    }
    // Create the framewriter using the mocks
    return writers.toArray(new IFrameWriter[writers.size()]);
}
Also used : OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) BTreeSearchOperatorNodePushable(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorNodePushable) Pair(org.apache.commons.lang3.tuple.Pair)

Example 8 with OperatorDescriptorId

use of org.apache.hyracks.api.dataflow.OperatorDescriptorId in project asterixdb by apache.

the class JobBuilder method setAllPartitionConstraints.

private void setAllPartitionConstraints(Map<IConnectorDescriptor, TargetConstraint> tgtConstraints) {
    List<OperatorDescriptorId> roots = jobSpec.getRoots();
    setSpecifiedPartitionConstraints();
    for (OperatorDescriptorId rootId : roots) {
        setPartitionConstraintsBottomup(rootId, tgtConstraints, null, false);
    }
    for (OperatorDescriptorId rootId : roots) {
        setPartitionConstraintsTopdown(rootId, tgtConstraints, null);
    }
    for (OperatorDescriptorId rootId : roots) {
        setPartitionConstraintsBottomup(rootId, tgtConstraints, null, true);
    }
}
Also used : OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId)

Example 9 with OperatorDescriptorId

use of org.apache.hyracks.api.dataflow.OperatorDescriptorId in project asterixdb by apache.

the class TestUtils method create.

public static IHyracksTaskContext create(int frameSize) {
    try {
        IOManager ioManager = createIoManager();
        INCServiceContext serviceCtx = new TestNCServiceContext(ioManager, null);
        TestJobletContext jobletCtx = new TestJobletContext(frameSize, serviceCtx, new JobId(0));
        TaskAttemptId tid = new TaskAttemptId(new TaskId(new ActivityId(new OperatorDescriptorId(0), 0), 0), 0);
        IHyracksTaskContext taskCtx = new TestTaskContext(jobletCtx, tid);
        return taskCtx;
    } catch (HyracksException e) {
        throw new RuntimeException(e);
    }
}
Also used : OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) TaskId(org.apache.hyracks.api.dataflow.TaskId) IOManager(org.apache.hyracks.control.nc.io.IOManager) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) INCServiceContext(org.apache.hyracks.api.application.INCServiceContext) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) JobId(org.apache.hyracks.api.job.JobId)

Example 10 with OperatorDescriptorId

use of org.apache.hyracks.api.dataflow.OperatorDescriptorId in project asterixdb by apache.

the class ActivityClusterPlanner method computePartitionCounts.

private Map<ActivityId, ActivityPartitionDetails> computePartitionCounts(ActivityCluster ac) throws HyracksException {
    PartitionConstraintSolver solver = executor.getSolver();
    Set<LValueConstraintExpression> lValues = new HashSet<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        lValues.add(new PartitionCountExpression(anId.getOperatorDescriptorId()));
    }
    solver.solve(lValues);
    Map<OperatorDescriptorId, Integer> nPartMap = new HashMap<>();
    for (LValueConstraintExpression lv : lValues) {
        Object value = solver.getValue(lv);
        if (value == null) {
            throw new HyracksException("No value found for " + lv);
        }
        if (!(value instanceof Number)) {
            throw new HyracksException("Unexpected type of value bound to " + lv + ": " + value.getClass() + "(" + value + ")");
        }
        int nParts = ((Number) value).intValue();
        if (nParts <= 0) {
            throw new HyracksException("Unsatisfiable number of partitions for " + lv + ": " + nParts);
        }
        nPartMap.put(((PartitionCountExpression) lv).getOperatorDescriptorId(), nParts);
    }
    Map<ActivityId, ActivityPartitionDetails> activityPartsMap = new HashMap<>();
    for (ActivityId anId : ac.getActivityMap().keySet()) {
        int nParts = nPartMap.get(anId.getOperatorDescriptorId());
        int[] nInputPartitions = null;
        List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(anId);
        if (inputs != null) {
            nInputPartitions = new int[inputs.size()];
            for (int i = 0; i < nInputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = inputs.get(i).getConnectorId();
                ActivityId aid = ac.getProducerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nInputPartitions[i] = nPartInt;
            }
        }
        int[] nOutputPartitions = null;
        List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(anId);
        if (outputs != null) {
            nOutputPartitions = new int[outputs.size()];
            for (int i = 0; i < nOutputPartitions.length; ++i) {
                ConnectorDescriptorId cdId = outputs.get(i).getConnectorId();
                ActivityId aid = ac.getConsumerActivity(cdId);
                Integer nPartInt = nPartMap.get(aid.getOperatorDescriptorId());
                nOutputPartitions[i] = nPartInt;
            }
        }
        ActivityPartitionDetails apd = new ActivityPartitionDetails(nParts, nInputPartitions, nOutputPartitions);
        activityPartsMap.put(anId, apd);
    }
    return activityPartsMap;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) HashMap(java.util.HashMap) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) PartitionCountExpression(org.apache.hyracks.api.constraints.expressions.PartitionCountExpression) HashSet(java.util.HashSet)

Aggregations

OperatorDescriptorId (org.apache.hyracks.api.dataflow.OperatorDescriptorId)14 ActivityId (org.apache.hyracks.api.dataflow.ActivityId)7 HashMap (java.util.HashMap)5 Map (java.util.Map)5 Constraint (org.apache.hyracks.api.constraints.Constraint)4 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)4 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)4 TaskId (org.apache.hyracks.api.dataflow.TaskId)4 ArrayList (java.util.ArrayList)3 Pair (org.apache.commons.lang3.tuple.Pair)3 LValueConstraintExpression (org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression)3 PartitionCountExpression (org.apache.hyracks.api.constraints.expressions.PartitionCountExpression)3 ConnectorDescriptorId (org.apache.hyracks.api.dataflow.ConnectorDescriptorId)3 List (java.util.List)2 AlgebricksMetaOperatorDescriptor (org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor)2 PartitionLocationExpression (org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression)2 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)2 TaskAttemptId (org.apache.hyracks.api.dataflow.TaskAttemptId)2 IRecordDescriptorProvider (org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider)2 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)2