Search in sources :

Example 31 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class Join method createJob.

private static JobSpecification createJob(FileSplit[] customerSplits, FileSplit[] orderSplits, FileSplit[] resultSplits, int numJoinPartitions, String algo, int graceInputSize, int graceRecordsPerFrame, double graceFactor, int memSize, int tableSize, boolean hasGroupBy, int frameSize) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(customerSplits);
    long custFileSize = 0;
    for (int i = 0; i < customerSplits.length; i++) {
        custFileSize += customerSplits[i].getFile(null).length();
    }
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(orderSplits);
    long orderFileSize = 0;
    for (int i = 0; i < orderSplits.length; i++) {
        orderFileSize += orderSplits[i].getFile(null).length();
    }
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), Common.ordersDesc);
    createPartitionConstraint(spec, ordScanner, orderSplits);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custParserFactories, '|'), Common.custDesc);
    createPartitionConstraint(spec, custScanner, customerSplits);
    IOperatorDescriptor join;
    if ("nestedloop".equalsIgnoreCase(algo)) {
        join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), Common.custOrderJoinDesc, memSize, false, null);
    } else if ("inmem".equalsIgnoreCase(algo)) {
        join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, tableSize, null, memSize * frameSize);
    } else if ("hybrid".equalsIgnoreCase(algo)) {
        join = new OptimizedHybridHashJoinOperatorDescriptor(spec, memSize, graceInputSize, graceFactor, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
    } else {
        System.err.println("unknown algorithm:" + algo);
        return null;
    }
    PartitionConstraintHelper.addPartitionCountConstraint(spec, join, numJoinPartitions);
    IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor custJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IOperatorDescriptor endingOp = join;
    if (hasGroupBy) {
        RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
        ExternalGroupOperatorDescriptor gby = new ExternalGroupOperatorDescriptor(spec, tableSize, custFileSize + orderFileSize, new int[] { 6 }, memSize, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, gby, resultSplits);
        IConnectorDescriptor joinGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 6 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(joinGroupConn, join, 0, gby, 0);
        endingOp = gby;
    }
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(resultSplits);
    //FrameFileWriterOperatorDescriptor writer = new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    IOperatorDescriptor writer = new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|");
    createPartitionConstraint(spec, writer, resultSplits);
    IConnectorDescriptor endingPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(endingPrinterConn, endingOp, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) JoinComparatorFactory(org.apache.hyracks.dataflow.std.join.JoinComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) Common.createPartitionConstraint(org.apache.hyracks.examples.tpch.client.Common.createPartitionConstraint) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)

Example 32 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class DataflowTest method testHDFSReadWriteOperators.

/**
     * Test a job with only HDFS read and writes.
     *
     * @throws Exception
     */
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testHDFSReadWriteOperators() throws Exception {
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
    conf.setInputFormatClass(TextInputFormat.class);
    Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    InputFormat inputFormat = ReflectionUtils.newInstance(conf.getInputFormatClass(), getConfiguration());
    List<InputSplit> splits = inputFormat.getSplits(conf);
    String[] readSchedule = scheduler.getLocationConstraints(splits);
    JobSpecification jobSpec = new JobSpecification();
    RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
    ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
    HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
    jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
    jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
    jobSpec.addRoot(writeOperator);
    IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
    JobId jobId = client.startJob(jobSpec);
    client.waitForCompletion(jobId);
    Assert.assertEquals(true, checkResults());
}
Also used : Path(org.apache.hadoop.fs.Path) IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) Scheduler(org.apache.hyracks.hdfs2.scheduler.Scheduler) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TextTupleWriterFactory(org.apache.hyracks.hdfs.lib.TextTupleWriterFactory) TextInputFormat(org.apache.hadoop.mapreduce.lib.input.TextInputFormat) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) HyracksConnection(org.apache.hyracks.api.client.HyracksConnection) InputSplit(org.apache.hadoop.mapreduce.InputSplit) TextKeyValueParserFactory(org.apache.hyracks.hdfs.lib.TextKeyValueParserFactory) JobId(org.apache.hyracks.api.job.JobId)

Example 33 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class AbstractRTreeExamplesTest method rStarTreePageSplitTestExample.

/**
     * This test the r*tree page split. Originally this test didn't pass since
     * the r*tree assumes always that there will be enough space for the new
     * tuple after split. Now it passes since if there is not space in the
     * designated page, then we will just insert it in the other split page.
     */
@Test
public void rStarTreePageSplitTestExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("R*Tree page split test.");
    }
    // Declare fields.
    int fieldCount = 5;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = IntegerPointable.TYPE_TRAITS;
    typeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
    // Declare field serdes.
    ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
    // Declare RTree keys.
    int rtreeKeyFieldCount = 4;
    IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
    rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // Declare BTree keys, this will only be used for LSMRTree
    int btreeKeyFieldCount;
    IBinaryComparatorFactory[] btreeCmpFactories;
    int[] btreeFields = null;
    if (rTreeType == RTreeType.LSMRTREE) {
        //Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
        btreeKeyFieldCount = 1;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
        btreeFields = new int[btreeKeyFieldCount];
        for (int i = 0; i < btreeKeyFieldCount; i++) {
            btreeFields[i] = rtreeKeyFieldCount + i;
        }
    } else {
        btreeKeyFieldCount = 5;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    }
    // create value providers
    IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
    ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RSTARTREE, null, btreeFields, null, null, null);
    treeIndex.create();
    treeIndex.activate();
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
    int p1x = rnd.nextInt();
    int p1y = rnd.nextInt();
    int p2x = rnd.nextInt();
    int p2y = rnd.nextInt();
    String data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "XXX";
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "XXX";
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    treeIndex.deactivate();
    treeIndex.destroy();
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IPrimitiveValueProviderFactory(org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) Test(org.junit.Test)

Example 34 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class AbstractRTreeExamplesTest method rTreePageSplitTestExample.

/**
     * This test the rtree page split. Originally this test didn't pass since
     * the rtree assumes always that there will be enough space for the new
     * tuple after split. Now it passes since if there is not space in the
     * designated page, then we will just insert it in the other split page.
     */
@Test
public void rTreePageSplitTestExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("RTree page split test.");
    }
    // Declare fields.
    int fieldCount = 5;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = IntegerPointable.TYPE_TRAITS;
    typeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
    // Declare field serdes.
    ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
    // Declare RTree keys.
    int rtreeKeyFieldCount = 4;
    IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
    rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // Declare BTree keys, this will only be used for LSMRTree
    int btreeKeyFieldCount;
    IBinaryComparatorFactory[] btreeCmpFactories;
    int[] btreeFields = null;
    if (rTreeType == RTreeType.LSMRTREE) {
        //Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
        btreeKeyFieldCount = 1;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
        btreeFields = new int[btreeKeyFieldCount];
        for (int i = 0; i < btreeKeyFieldCount; i++) {
            btreeFields[i] = rtreeKeyFieldCount + i;
        }
    } else {
        btreeKeyFieldCount = 5;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    }
    // create value providers
    IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
    //2
    ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RTREE, null, btreeFields, null, null, null);
    treeIndex.create();
    treeIndex.activate();
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
    int p1x = rnd.nextInt();
    int p1y = rnd.nextInt();
    int p2x = rnd.nextInt();
    int p2y = rnd.nextInt();
    String data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "XXX";
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "XXX";
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "XXX";
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    p1x = rnd.nextInt();
    p1y = rnd.nextInt();
    p2x = rnd.nextInt();
    p2y = rnd.nextInt();
    data = "";
    for (int i = 0; i < 210; i++) {
        data += "X";
    }
    TupleUtils.createTuple(tb, tuple, fieldSerdes, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), data);
    indexAccessor.insert(tuple);
    treeIndex.deactivate();
    treeIndex.destroy();
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IPrimitiveValueProviderFactory(org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) Test(org.junit.Test)

Example 35 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class OrderedIndexTestDriver method oneStringKeyAndValue.

@Test
public void oneStringKeyAndValue() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("BTree " + getTestOpName() + " Test With One String Key And Value.");
    }
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    // Range search in ["cbf", cc7"]
    ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf");
    ITupleReference highKey = TupleUtils.createTuple(fieldSerdes, "cc7");
    for (BTreeLeafFrameType leafFrameType : leafFrameTypesToTest) {
        runTest(fieldSerdes, 1, leafFrameType, lowKey, highKey, null, null);
    }
}
Also used : ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) BTreeLeafFrameType(org.apache.hyracks.storage.am.btree.frames.BTreeLeafFrameType) Test(org.junit.Test)

Aggregations

UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)94 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)74 Test (org.junit.Test)69 JobSpecification (org.apache.hyracks.api.job.JobSpecification)67 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)62 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)51 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)37 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)37 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)36 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)34 FileSplit (org.apache.hyracks.api.io.FileSplit)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)33 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)31 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)24