Search in sources :

Example 1 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class FeedOperations method combineIntakeCollectJobs.

private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException {
    JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize());
    // copy ingestor
    FeedIntakeOperatorDescriptor firstOp = (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0));
    FeedIntakeOperatorDescriptor ingestionOp;
    if (firstOp.getAdaptorFactory() == null) {
        ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorLibraryName(), firstOp.getAdaptorFactoryClassName(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
    } else {
        ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorFactory(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
    }
    // create replicator
    ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(jobSpec, ingestionOp.getOutputRecordDescriptors()[0], jobsList.size());
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations);
    // Loop over the jobs to copy operators and connections
    Map<OperatorDescriptorId, OperatorDescriptorId> operatorIdMapping = new HashMap<>();
    Map<ConnectorDescriptorId, ConnectorDescriptorId> connectorIdMapping = new HashMap<>();
    Map<OperatorDescriptorId, List<LocationConstraint>> operatorLocations = new HashMap<>();
    Map<OperatorDescriptorId, Integer> operatorCounts = new HashMap<>();
    List<JobId> jobIds = new ArrayList<>();
    FeedMetaOperatorDescriptor metaOp;
    for (int iter1 = 0; iter1 < jobsList.size(); iter1++) {
        FeedConnection curFeedConnection = feedConnections.get(iter1);
        JobSpecification subJob = jobsList.get(iter1);
        operatorIdMapping.clear();
        Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap();
        String datasetName = feedConnections.get(iter1).getDatasetName();
        FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName);
        FeedPolicyEntity feedPolicyEntity = FeedMetadataUtil.validateIfPolicyExists(curFeedConnection.getDataverseName(), curFeedConnection.getPolicyName(), metadataProvider.getMetadataTxnContext());
        for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> entry : operatorsMap.entrySet()) {
            IOperatorDescriptor opDesc = entry.getValue();
            OperatorDescriptorId oldId = opDesc.getOperatorId();
            OperatorDescriptorId opId = null;
            if (opDesc instanceof LSMTreeInsertDeleteOperatorDescriptor && ((LSMTreeInsertDeleteOperatorDescriptor) opDesc).isPrimary()) {
                metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.STORE);
                opId = metaOp.getOperatorId();
                opDesc.setOperatorId(opId);
            } else {
                if (opDesc instanceof AlgebricksMetaOperatorDescriptor) {
                    AlgebricksMetaOperatorDescriptor algOp = (AlgebricksMetaOperatorDescriptor) opDesc;
                    IPushRuntimeFactory[] runtimeFactories = algOp.getPipeline().getRuntimeFactories();
                    // Tweak AssignOp to work with messages
                    if (runtimeFactories[0] instanceof AssignRuntimeFactory && runtimeFactories.length > 1) {
                        IConnectorDescriptor connectorDesc = subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0);
                        // anything on the network interface needs to be message compatible
                        if (connectorDesc instanceof MToNPartitioningConnectorDescriptor) {
                            metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.COMPUTE);
                            opId = metaOp.getOperatorId();
                            opDesc.setOperatorId(opId);
                        }
                    }
                }
                if (opId == null) {
                    opId = jobSpec.createOperatorDescriptorId(opDesc);
                }
            }
            operatorIdMapping.put(oldId, opId);
        }
        // copy connectors
        connectorIdMapping.clear();
        for (Entry<ConnectorDescriptorId, IConnectorDescriptor> entry : subJob.getConnectorMap().entrySet()) {
            IConnectorDescriptor connDesc = entry.getValue();
            ConnectorDescriptorId newConnId;
            if (connDesc instanceof MToNPartitioningConnectorDescriptor) {
                MToNPartitioningConnectorDescriptor m2nConn = (MToNPartitioningConnectorDescriptor) connDesc;
                connDesc = new MToNPartitioningWithMessageConnectorDescriptor(jobSpec, m2nConn.getTuplePartitionComputerFactory());
                newConnId = connDesc.getConnectorId();
            } else {
                newConnId = jobSpec.createConnectorDescriptor(connDesc);
            }
            connectorIdMapping.put(entry.getKey(), newConnId);
        }
        // make connections between operators
        for (Entry<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> entry : subJob.getConnectorOperatorMap().entrySet()) {
            ConnectorDescriptorId newId = connectorIdMapping.get(entry.getKey());
            IConnectorDescriptor connDesc = jobSpec.getConnectorMap().get(newId);
            Pair<IOperatorDescriptor, Integer> leftOp = entry.getValue().getLeft();
            Pair<IOperatorDescriptor, Integer> rightOp = entry.getValue().getRight();
            IOperatorDescriptor leftOpDesc = jobSpec.getOperatorMap().get(leftOp.getLeft().getOperatorId());
            IOperatorDescriptor rightOpDesc = jobSpec.getOperatorMap().get(rightOp.getLeft().getOperatorId());
            if (leftOp.getLeft() instanceof FeedCollectOperatorDescriptor) {
                jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), replicateOp, iter1, leftOpDesc, leftOp.getRight());
            }
            jobSpec.connect(connDesc, leftOpDesc, leftOp.getRight(), rightOpDesc, rightOp.getRight());
        }
        // prepare for setting partition constraints
        operatorLocations.clear();
        operatorCounts.clear();
        for (Constraint constraint : subJob.getUserConstraints()) {
            LValueConstraintExpression lexpr = constraint.getLValue();
            ConstraintExpression cexpr = constraint.getRValue();
            OperatorDescriptorId opId;
            switch(lexpr.getTag()) {
                case PARTITION_COUNT:
                    opId = ((PartitionCountExpression) lexpr).getOperatorDescriptorId();
                    operatorCounts.put(operatorIdMapping.get(opId), (int) ((ConstantExpression) cexpr).getValue());
                    break;
                case PARTITION_LOCATION:
                    opId = ((PartitionLocationExpression) lexpr).getOperatorDescriptorId();
                    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(operatorIdMapping.get(opId));
                    List<LocationConstraint> locations = operatorLocations.get(opDesc.getOperatorId());
                    if (locations == null) {
                        locations = new ArrayList<>();
                        operatorLocations.put(opDesc.getOperatorId(), locations);
                    }
                    String location = (String) ((ConstantExpression) cexpr).getValue();
                    LocationConstraint lc = new LocationConstraint(location, ((PartitionLocationExpression) lexpr).getPartition());
                    locations.add(lc);
                    break;
                default:
                    break;
            }
        }
        // set absolute location constraints
        for (Entry<OperatorDescriptorId, List<LocationConstraint>> entry : operatorLocations.entrySet()) {
            IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
            // why do we need to sort?
            Collections.sort(entry.getValue(), (LocationConstraint o1, LocationConstraint o2) -> {
                return o1.partition - o2.partition;
            });
            String[] locations = new String[entry.getValue().size()];
            for (int j = 0; j < locations.length; ++j) {
                locations[j] = entry.getValue().get(j).location;
            }
            PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, opDesc, locations);
        }
        // set count constraints
        for (Entry<OperatorDescriptorId, Integer> entry : operatorCounts.entrySet()) {
            IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
            if (!operatorLocations.keySet().contains(entry.getKey())) {
                PartitionConstraintHelper.addPartitionCountConstraint(jobSpec, opDesc, entry.getValue());
            }
        }
        // roots
        for (OperatorDescriptorId root : subJob.getRoots()) {
            jobSpec.addRoot(jobSpec.getOperatorMap().get(operatorIdMapping.get(root)));
        }
        jobIds.add(((JobEventListenerFactory) subJob.getJobletEventListenerFactory()).getJobId());
    }
    // jobEventListenerFactory
    jobSpec.setJobletEventListenerFactory(new MultiTransactionJobletEventListenerFactory(jobIds, true));
    // useConnectorSchedulingPolicy
    jobSpec.setUseConnectorPolicyForScheduling(jobsList.get(0).isUseConnectorPolicyForScheduling());
    // connectorAssignmentPolicy
    jobSpec.setConnectorPolicyAssignmentPolicy(jobsList.get(0).getConnectorPolicyAssignmentPolicy());
    return jobSpec;
}
Also used : HashMap(java.util.HashMap) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Constraint(org.apache.hyracks.api.constraints.Constraint) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) ConstantExpression(org.apache.hyracks.api.constraints.expressions.ConstantExpression) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) ArrayList(java.util.ArrayList) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) List(java.util.List) ArrayList(java.util.ArrayList) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) ReplicateOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ReplicateOperatorDescriptor) Map(java.util.Map) HashMap(java.util.HashMap) FeedCollectOperatorDescriptor(org.apache.asterix.external.operators.FeedCollectOperatorDescriptor) FeedPolicyEntity(org.apache.asterix.metadata.entities.FeedPolicyEntity) JobSpecification(org.apache.hyracks.api.job.JobSpecification) FeedIntakeOperatorDescriptor(org.apache.asterix.external.operators.FeedIntakeOperatorDescriptor) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.commons.lang3.tuple.Pair) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MToNPartitioningWithMessageConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningWithMessageConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) LValueConstraintExpression(org.apache.hyracks.api.constraints.expressions.LValueConstraintExpression) ConstraintExpression(org.apache.hyracks.api.constraints.expressions.ConstraintExpression) FeedConnection(org.apache.asterix.metadata.entities.FeedConnection) MultiTransactionJobletEventListenerFactory(org.apache.asterix.runtime.job.listener.MultiTransactionJobletEventListenerFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) AssignRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.AssignRuntimeFactory) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Constraint(org.apache.hyracks.api.constraints.Constraint) LocationConstraint(org.apache.asterix.metadata.feeds.LocationConstraint) FeedMetaOperatorDescriptor(org.apache.asterix.external.operators.FeedMetaOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FeedConnectionId(org.apache.asterix.external.feed.management.FeedConnectionId) LSMTreeInsertDeleteOperatorDescriptor(org.apache.asterix.common.dataflow.LSMTreeInsertDeleteOperatorDescriptor)

Example 2 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobBuilder method setPartitionConstraintsTopdown.

private void setPartitionConstraintsTopdown(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp) {
    List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
    AlgebricksPartitionConstraint opConstraint;
    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
    if (opInputs != null) {
        for (IConnectorDescriptor conn : opInputs) {
            ConnectorDescriptorId cid = conn.getConnectorId();
            org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
            IOperatorDescriptor src = p.getLeft().getLeft();
            TargetConstraint constraint = tgtConstraints.get(conn);
            if (constraint != null) {
                if (constraint == TargetConstraint.SAME_COUNT) {
                    opConstraint = partitionConstraintMap.get(opDesc);
                    if (partitionConstraintMap.get(src) == null) {
                        if (opConstraint != null) {
                            partitionConstraintMap.put(src, opConstraint);
                            AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, src, opConstraint);
                        }
                    }
                }
            }
            // Post Order DFS
            setPartitionConstraintsTopdown(src.getOperatorId(), tgtConstraints, opDesc);
        }
    }
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 3 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobBuilder method buildSpec.

@Override
public void buildSpec(List<ILogicalOperator> roots) throws AlgebricksException {
    buildAsterixComponents();
    Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = setupConnectors();
    for (ILogicalOperator r : roots) {
        IOperatorDescriptor opDesc = findOpDescForAlgebraicOp(r);
        jobSpec.addRoot(opDesc);
    }
    setAllPartitionConstraints(tgtConstraints);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)

Example 4 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class InsertPipelineExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    // schema of tuples to be generated: 4 fields with int, string, string,
    // string
    // we will use field 2 as primary key to fill a clustered index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
    new UTF8StringSerializerDeserializer(), // we will use this as payload
    new UTF8StringSerializerDeserializer(), // we will use this field as key
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    new UTF8StringSerializerDeserializer() });
    // generate numRecords records with field 2 being unique, integer values
    // in [0, 100000], and strings with max length of 10 characters, and
    // random seed 100
    DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 100);
    // run data generator on first nodecontroller given
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // prepare insertion into primary index
    // tuples to be put into B-Tree shall have 4 fields
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparator factories for primary index
    IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
    primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    // map field 2 of input
    int[] primaryFieldPermutation = { 2, 1, 3, 4 };
    // tuple to field 0 of
    // B-Tree tuple, etc.
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    // create operator descriptor
    TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
    // prepare insertion into secondary index
    // tuples to be put into B-Tree shall have 2 fields
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // comparator factories for secondary index
    IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
    secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    int[] secondaryFieldPermutation = { 1, 2 };
    IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
    // create operator descriptor
    TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
    // end the insert pipeline at this sink operator
    NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nullSink, splitNCs);
    // distribute the records from the datagen via hashing to the bulk load
    // ops
    IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
    hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
    IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
    // connect the ops
    spec.connect(hashConn, dataGen, 0, primaryInsert, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsert, 0, secondaryInsert, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsert, 0, nullSink, 0);
    spec.addRoot(nullSink);
    return spec;
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) TreeIndexInsertUpdateDeleteOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) DataGenOperatorDescriptor(org.apache.hyracks.examples.btree.helper.DataGenOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 5 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class PrimaryIndexBulkLoadExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    // schema of tuples to be generated: 5 fields with string, string, int,
    // int, string
    // we will use field-index 2 as primary key to fill a clustered index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
    new UTF8StringSerializerDeserializer(), // we will use this as payload
    new UTF8StringSerializerDeserializer(), // we will use this field as key
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    new UTF8StringSerializerDeserializer() });
    // generate numRecords records with field 2 being unique, integer values
    // in [0, 100000], and strings with max length of 10 characters, and
    // random seed 50
    DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 50);
    // run data generator on first nodecontroller given
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
    // sort the tuples as preparation for bulk load
    // fields to sort on
    int[] sortFields = { 2 };
    // comparators for sort fields
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
    JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
    // tuples to be put into B-Tree shall have 4 fields
    int fieldCount = 4;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // create providers for B-Tree
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    // map field 2 of input tuple
    int[] fieldPermutation = { 2, 1, 3, 4 };
    // to field 0 of B-Tree tuple,
    // etc.
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, recDesc, fieldPermutation, 0.7f, false, 1000L, true, dataflowHelperFactory);
    JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
    // distribute the records from the datagen via hashing to the bulk load
    // ops
    IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
    hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
    IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
    spec.connect(hashConn, dataGen, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    return spec;
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) DataGenOperatorDescriptor(org.apache.hyracks.examples.btree.helper.DataGenOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18