Examples with UTF8StringSerializerDeserializer - org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer

Example 56 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class BTreePrimaryIndexSearchOperatorTest method searchPrimaryIndexTest.

@Test
public void searchPrimaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple containing low and high search key
    // high key and low key
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    new UTF8StringSerializerDeserializer().serialize("100", dos);
    tb.addFieldEndOffset();
    // high key
    new UTF8StringSerializerDeserializer().serialize("200", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] lowKeyFields = { 0 };
    int[] highKeyFields = { 1 };
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 57 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class PrimaryIndexSearchExample method createJob.

private static JobSpecification createJob(Options options) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    int fieldCount = 4;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for btree
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // create roviders for B-Tree
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples coming out of primary index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    // build tuple containing low and high search keys
    // high
    ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2);
    // key
    // and
    // low
    // key
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    IntegerSerializerDeserializer.INSTANCE.serialize(100, dos);
    tb.addFieldEndOffset();
    // build
    IntegerSerializerDeserializer.INSTANCE.serialize(200, dos);
    // high key
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
    // low key is in field 0 of tuples going
    int[] lowKeyFields = { 0 };
    // into search op
    // low key is in field 1 of tuples going
    int[] highKeyFields = { 1 };
    // into search op
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
    // have each node print the results of its respective B-Tree
    PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, printer, splitNCs);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, btreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}

Also used : DataOutput(java.io.DataOutput) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) PrinterOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.PrinterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 58 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class BTreeSecondaryIndexInsertOperatorTest method searchUpdatedSecondaryIndexTest.

@Test
public void searchUpdatedSecondaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple containing search keys (only use the first key as search
    // key)
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.secondaryKeyFieldCount);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
    tb.addFieldEndOffset();
    // high key
    new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] secondaryLowKeyFields = { 0 };
    int[] secondaryHighKeyFields = { 1 };
    // search secondary index
    BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
    // second field from the tuples coming from secondary index
    int[] primaryLowKeyFields = { 1 };
    // second field from the tuples coming from secondary index
    int[] primaryHighKeyFields = { 1 };
    // search primary index
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBtreeSearchOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Example 59 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class LSMBTreePrimaryIndexSearchOperatorTest method shouldWriteNothingIfGivenFilterValueIsOutOfRange.

@Test
public void shouldWriteNothingIfGivenFilterValueIsOutOfRange() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple containing low and high search key
    // high key and low key
    ArrayTupleBuilder tb = new ArrayTupleBuilder((DataSetConstants.primaryKeyFieldCount + DataSetConstants.primaryFilterFields.length) * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    new UTF8StringSerializerDeserializer().serialize("100", dos);
    tb.addFieldEndOffset();
    // high key
    new UTF8StringSerializerDeserializer().serialize("200", dos);
    tb.addFieldEndOffset();
    // min filter
    new UTF8StringSerializerDeserializer().serialize("9999", dos);
    tb.addFieldEndOffset();
    // max filter
    new UTF8StringSerializerDeserializer().serialize("9999", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] lowKeyFields = { 0 };
    int[] highKeyFields = { 1 };
    int[] minFilterFields = { 2 };
    int[] maxFilterFields = { 3 };
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryAndFilterRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, NoopMissingWriterFactory.INSTANCE, NoOpOperationCallbackFactory.INSTANCE, minFilterFields, maxFilterFields, true);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Example 60 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class OrderedIndexExamplesTest method updateExample.

/**
     * Update example. Create a BTree with one variable-length key field and one
     * variable-length value field. Fill B-tree with random values using
     * insertions, then update entries one-by-one. Repeat procedure a few times
     * on same BTree.
     */
@Test
public void updateExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Update example");
    }
    // Declare fields.
    int fieldCount = 2;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    // Declare field serdes.
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    // Declare keys.
    int keyFieldCount = 1;
    IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
    cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    // This is only used for the LSM-BTree.
    int[] bloomFilterKeyFields = new int[keyFieldCount];
    bloomFilterKeyFields[0] = 0;
    ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
    treeIndex.create();
    treeIndex.activate();
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Inserting into tree...");
    }
    IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    int maxLength = 10;
    int ins = 10000;
    String[] keys = new String[10000];
    for (int i = 0; i < ins; i++) {
        String f0 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
        String f1 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
        TupleUtils.createTuple(tb, tuple, fieldSerdes, f0, f1);
        keys[i] = f0;
        if (LOGGER.isLoggable(Level.INFO)) {
            if (i % 1000 == 0) {
                LOGGER.info("Inserting " + i);
            }
        }
        try {
            indexAccessor.insert(tuple);
        } catch (HyracksDataException e) {
            if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
                throw e;
            }
        }
    }
    // Print before doing any updates.
    orderedScan(indexAccessor, fieldSerdes);
    int runs = 3;
    for (int run = 0; run < runs; run++) {
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("Update test run: " + (run + 1) + "/" + runs);
            LOGGER.info("Updating BTree");
        }
        for (int i = 0; i < ins; i++) {
            // Generate a new random value for f1.
            String f1 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
            TupleUtils.createTuple(tb, tuple, fieldSerdes, keys[i], f1);
            if (LOGGER.isLoggable(Level.INFO)) {
                if (i % 1000 == 0) {
                    LOGGER.info("Updating " + i);
                }
            }
            indexAccessor.update(tuple);
        }
        // Do another scan after a round of updates.
        orderedScan(indexAccessor, fieldSerdes);
    }
    treeIndex.validate();
    treeIndex.deactivate();
    treeIndex.destroy();
}

Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) Test(org.junit.Test)

Aggregations

UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)94 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)74 Test (org.junit.Test)69 JobSpecification (org.apache.hyracks.api.job.JobSpecification)67 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)62 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)51 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)37 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)37 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)36 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)34 FileSplit (org.apache.hyracks.api.io.FileSplit)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)33 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)31 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)24