Search in sources :

Example 11 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class JobBuilder method contributeMicroOperator.

@Override
public void contributeMicroOperator(ILogicalOperator op, IPushRuntimeFactory runtime, RecordDescriptor recDesc, AlgebricksPartitionConstraint pc) {
    microOps.put(op, new Pair<IPushRuntimeFactory, RecordDescriptor>(runtime, recDesc));
    revMicroOpMap.put(runtime, op);
    if (pc != null) {
        pcForMicroOps.put(op, pc);
    }
    AbstractLogicalOperator logicalOp = (AbstractLogicalOperator) op;
    if (logicalOp.getExecutionMode() == ExecutionMode.UNPARTITIONED && pc == null) {
        AlgebricksPartitionConstraint apc = countOneLocation;
        pcForMicroOps.put(logicalOp, apc);
    }
}
Also used : AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory)

Example 12 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class JobGenHelper method mkRecordDescriptor.

@SuppressWarnings("rawtypes")
public static RecordDescriptor mkRecordDescriptor(IVariableTypeEnvironment env, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
    ITypeTraits[] typeTraits = new ITypeTraits[opSchema.getSize()];
    ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
    ITypeTraitProvider ttp = context.getTypeTraitProvider();
    int i = 0;
    for (LogicalVariable var : opSchema) {
        Object t = env.getVarType(var);
        if (t == null) {
            LOGGER.warning("No type for variable " + var);
        }
        fields[i] = sdp.getSerializerDeserializer(t);
        typeTraits[i] = ttp.getTypeTrait(t);
        i++;
    }
    return new RecordDescriptor(fields, typeTraits);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) ITypeTraitProvider(org.apache.hyracks.algebricks.data.ITypeTraitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ISerializerDeserializerProvider(org.apache.hyracks.algebricks.data.ISerializerDeserializerProvider) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 13 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class SecondaryIndexSearchExample method createJob.

private static JobSpecification createJob(Options options) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples coming out of secondary index
    RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // comparators for sort fields and BTree fields
    IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
    secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // comparators for primary index
    IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
    primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // schema of tuples coming out of primary index
    RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for btree, note that we only need a comparator for the
    // non-unique key
    // i.e. we will have a range condition on the first field only (implying
    // [-infinity, +infinity] for the second field)
    IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
    searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    // build tuple containing low and high search keys
    // low
    ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
    // and
    // high
    // key
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    // key
    tb.addFieldEndOffset();
    // high
    new UTF8StringSerializerDeserializer().serialize("f", dos);
    // key
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
    // low key is in field 0 of tuples
    int[] secondaryLowKeyFields = { 0 };
    // going into secondary index
    // search op
    // high key is in field 1 of
    int[] secondaryHighKeyFields = { 1 };
    // tuples going into secondary
    // index search op
    IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
    BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
    // secondary index will output tuples with [UTF8String, Integer]
    // the Integer field refers to the key in the primary index of the
    // source data records
    // low key is in field 0 of tuples
    int[] primaryLowKeyFields = { 1 };
    // going into primary index search op
    // high key is in field 1 of tuples
    int[] primaryHighKeyFields = { 1 };
    // going into primary index search
    // op
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
    // have each node print the results of its respective B-Tree
    PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, printer, splitNCs);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : DataOutput(java.io.DataOutput) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) PrinterOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.PrinterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 14 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class DataGenOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
    final RecordDescriptor recDesc = outRecDescs[0];
    final ArrayTupleBuilder tb = new ArrayTupleBuilder(recDesc.getFields().length);
    final Random rnd = new Random(randomSeed);
    final int maxUniqueAttempts = 20;
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        // for quick & dirty exclusion of duplicates
        // WARNING: could contain numRecord entries and use a lot of memory
        HashSet<String> stringHs = new HashSet<String>();

        HashSet<Integer> intHs = new HashSet<Integer>();

        @Override
        public void initialize() throws HyracksDataException {
            try {
                writer.open();
                for (int i = 0; i < numRecords; i++) {
                    tb.reset();
                    for (int j = 0; j < recDesc.getFieldCount(); j++) {
                        genField(tb, j);
                    }
                    if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                        appender.write(writer, true);
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
                        }
                    }
                }
                appender.write(writer, true);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
            }
        }

        private void genField(ArrayTupleBuilder tb, int fieldIndex) throws HyracksDataException {
            DataOutput dos = tb.getDataOutput();
            if (recDesc.getFields()[fieldIndex] instanceof IntegerSerializerDeserializer) {
                int val = -1;
                if (fieldIndex == uniqueField) {
                    int attempt = 0;
                    while (attempt < maxUniqueAttempts) {
                        int tmp = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
                        if (intHs.contains(tmp))
                            attempt++;
                        else {
                            val = tmp;
                            intHs.add(val);
                            break;
                        }
                    }
                    if (attempt == maxUniqueAttempts)
                        throw new HyracksDataException("MaxUnique attempts reached in datagen");
                } else {
                    val = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
                }
                recDesc.getFields()[fieldIndex].serialize(val, dos);
                tb.addFieldEndOffset();
            } else if (recDesc.getFields()[fieldIndex] instanceof UTF8StringSerializerDeserializer) {
                String val = null;
                if (fieldIndex == uniqueField) {
                    int attempt = 0;
                    while (attempt < maxUniqueAttempts) {
                        String tmp = randomString(maxStrLen, rnd);
                        if (stringHs.contains(tmp))
                            attempt++;
                        else {
                            val = tmp;
                            stringHs.add(val);
                            break;
                        }
                    }
                    if (attempt == maxUniqueAttempts)
                        throw new HyracksDataException("MaxUnique attempts reached in datagen");
                } else {
                    val = randomString(maxStrLen, rnd);
                }
                recDesc.getFields()[fieldIndex].serialize(val, dos);
                tb.addFieldEndOffset();
            } else {
                throw new HyracksDataException("Type unsupported in data generator. Only integers and strings allowed");
            }
        }

        private String randomString(int length, Random random) {
            String s = Long.toHexString(Double.doubleToLongBits(random.nextDouble()));
            StringBuilder strBuilder = new StringBuilder();
            for (int i = 0; i < s.length() && i < length; i++) {
                strBuilder.append(s.charAt(Math.abs(random.nextInt()) % s.length()));
            }
            return strBuilder.toString();
        }
    };
}
Also used : DataOutput(java.io.DataOutput) AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IntegerSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer) Random(java.util.Random) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) HashSet(java.util.HashSet)

Example 15 with RecordDescriptor

use of org.apache.hyracks.api.dataflow.value.RecordDescriptor in project asterixdb by apache.

the class AbstractBTreeOperatorTest method loadPrimaryIndex.

protected void loadPrimaryIndex() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = inputRecordDesc;
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(inputParserFactories, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
    int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
    TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, primaryRecDesc, fieldPermutation, 0.7f, true, 1000L, true, primaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, primaryBtreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    runTest(spec);
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)169 JobSpecification (org.apache.hyracks.api.job.JobSpecification)90 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)74 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)72 Test (org.junit.Test)69 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)64 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)55 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)53 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)52 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)41 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 FileSplit (org.apache.hyracks.api.io.FileSplit)38 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)37 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)35 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)31 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)27 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25