Search in sources :

Example 51 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class RebalanceUtil method createPrimaryIndexUpsertOp.

// Creates the primary index upsert operator for populating the target dataset.
private static IOperatorDescriptor createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset source, Dataset target) throws AlgebricksException {
    int numKeys = source.getPrimaryKeys().size();
    int numValues = source.hasMetaPart() ? 2 : 1;
    int[] fieldPermutation = IntStream.range(0, numKeys + numValues).toArray();
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> upsertOpAndConstraints = DatasetUtil.createPrimaryIndexUpsertOp(spec, metadataProvider, target, source.getPrimaryRecordDescriptor(metadataProvider), fieldPermutation, MissingWriterFactory.INSTANCE);
    IOperatorDescriptor upsertOp = upsertOpAndConstraints.first;
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, upsertOp, upsertOpAndConstraints.second);
    return upsertOp;
}
Also used : IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 52 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class TokenizePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    TokenizeOperator tokenizeOp = (TokenizeOperator) op;
    if (tokenizeOp.getOperation() != Kind.INSERT || !tokenizeOp.isBulkload()) {
        throw new AlgebricksException("Tokenize Operator only works when bulk-loading data.");
    }
    IMetadataProvider mp = context.getMetadataProvider();
    IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
    JobSpecification spec = builder.getJobSpec();
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getTokenizerRuntime(dataSourceIndex, propagatedSchema, inputSchemas, typeEnv, primaryKeys, secondaryKeys, null, inputDesc, context, spec, true);
    builder.contributeHyracksOperator(tokenizeOp, runtimeAndConstraints.first);
    builder.contributeAlgebricksPartitionConstraint(runtimeAndConstraints.first, runtimeAndConstraints.second);
    ILogicalOperator src = tokenizeOp.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, tokenizeOp, 0);
}
Also used : TokenizeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.TokenizeOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IMetadataProvider(org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Example 53 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobBuilder method setPartitionConstraintsBottomup.

private void setPartitionConstraintsBottomup(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp, boolean finalPass) {
    List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
    AlgebricksPartitionConstraint opConstraint = null;
    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
    if (opInputs != null) {
        for (IConnectorDescriptor conn : opInputs) {
            ConnectorDescriptorId cid = conn.getConnectorId();
            org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
            IOperatorDescriptor src = p.getLeft().getLeft();
            // Pre-order DFS
            setPartitionConstraintsBottomup(src.getOperatorId(), tgtConstraints, opDesc, finalPass);
            TargetConstraint constraint = tgtConstraints.get(conn);
            if (constraint != null) {
                switch(constraint) {
                    case ONE:
                        opConstraint = countOneLocation;
                        break;
                    case SAME_COUNT:
                        opConstraint = partitionConstraintMap.get(src);
                        break;
                }
            }
        }
    }
    if (partitionConstraintMap.get(opDesc) == null) {
        if (finalPass && opConstraint == null && (opInputs == null || opInputs.isEmpty())) {
            opConstraint = countOneLocation;
        }
        if (finalPass && opConstraint == null) {
            opConstraint = clusterLocations;
        }
        // Sets up the location constraint.
        if (opConstraint != null) {
            partitionConstraintMap.put(opDesc, opConstraint);
            AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, opDesc, opConstraint);
        }
    }
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 54 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobBuilder method setupConnectors.

private Map<IConnectorDescriptor, TargetConstraint> setupConnectors() throws AlgebricksException {
    Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = new HashMap<>();
    for (ILogicalOperator exchg : connectors.keySet()) {
        ILogicalOperator inOp = inEdges.get(exchg).get(0);
        ILogicalOperator outOp = outEdges.get(exchg).get(0);
        IOperatorDescriptor inOpDesc = findOpDescForAlgebraicOp(inOp);
        IOperatorDescriptor outOpDesc = findOpDescForAlgebraicOp(outOp);
        Pair<IConnectorDescriptor, TargetConstraint> connPair = connectors.get(exchg);
        IConnectorDescriptor conn = connPair.first;
        int producerPort = outEdges.get(inOp).indexOf(exchg);
        int consumerPort = inEdges.get(outOp).indexOf(exchg);
        jobSpec.connect(conn, inOpDesc, producerPort, outOpDesc, consumerPort);
        if (connPair.second != null) {
            tgtConstraints.put(conn, connPair.second);
        }
    }
    return tgtConstraints;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) HashMap(java.util.HashMap) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) AlgebricksCountPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint)

Example 55 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class BTreePrimaryIndexScanOperatorTest method scanPrimaryIndexTest.

@Test
public void scanPrimaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build dummy tuple containing nothing
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    // - infinity
    int[] lowKeyFields = null;
    // + infinity
    int[] highKeyFields = null;
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)89 JobSpecification (org.apache.hyracks.api.job.JobSpecification)61 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)52 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)51 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)48 Test (org.junit.Test)41 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)34 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)34 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)33 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)30 FileSplit (org.apache.hyracks.api.io.FileSplit)28 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)19 PlainFileWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor)19 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)18