Search in sources :

Example 1 with ISerializerDeserializer

use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.

the class SecondaryIndexOperationsHelper method createExternalIndexingOp.

// This method creates a source indexing operator for external data
protected ExternalScanOperatorDescriptor createExternalIndexingOp(JobSpecification spec) throws AlgebricksException {
    // A record + primary keys
    ISerializerDeserializer[] serdes = new ISerializerDeserializer[1 + numPrimaryKeys];
    ITypeTraits[] typeTraits = new ITypeTraits[1 + numPrimaryKeys];
    // payload serde and type traits for the record slot
    serdes[0] = payloadSerde;
    typeTraits[0] = TypeTraitProvider.INSTANCE.getTypeTrait(itemType);
    //  serdes and type traits for rid fields
    for (int i = 1; i < serdes.length; i++) {
        serdes[i] = IndexingConstants.getSerializerDeserializer(i - 1);
        typeTraits[i] = IndexingConstants.getTypeTraits(i - 1);
    }
    // output record desc
    RecordDescriptor indexerDesc = new RecordDescriptor(serdes, typeTraits);
    // Create the operator and its partition constraits
    Pair<ExternalScanOperatorDescriptor, AlgebricksPartitionConstraint> indexingOpAndConstraints;
    try {
        indexingOpAndConstraints = ExternalIndexingOperations.createExternalIndexingOp(spec, metadataProvider, dataset, itemType, indexerDesc, externalFiles);
    } catch (Exception e) {
        throw new AlgebricksException(e);
    }
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexingOpAndConstraints.first, indexingOpAndConstraints.second);
    // Set the primary partition constraints to this partition constraints
    primaryPartitionConstraint = indexingOpAndConstraints.second;
    return indexingOpAndConstraints.first;
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilationException(org.apache.asterix.common.exceptions.CompilationException)

Example 2 with ISerializerDeserializer

use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.

the class LSMRTreeWithAntiMatterTuplesSecondaryIndexSearchOperatorTest method shouldWriteFilterValueIfAppendFilterIsTrue.

@Test
public void shouldWriteFilterValueIfAppendFilterIsTrue() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple
    ArrayTupleBuilder tb = new ArrayTupleBuilder(secondaryKeyFieldCount);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    DoubleSerializerDeserializer.INSTANCE.serialize(61.2894, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(-149.624, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(61.8894, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(-149.024, dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] keyFields = { 0, 1, 2, 3 };
    RTreeSearchOperatorDescriptor secondarySearchOp = new RTreeSearchOperatorDescriptor(spec, secondaryWithFilterRecDesc, keyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondarySearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) RTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) RTreeSecondaryIndexSearchOperatorTest(org.apache.hyracks.tests.am.rtree.RTreeSecondaryIndexSearchOperatorTest) Test(org.junit.Test)

Example 3 with ISerializerDeserializer

use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.

the class AbstractRTreeOperatorTest method loadSecondaryIndex.

protected void loadSecondaryIndex() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build dummy tuple containing nothing
    ArrayTupleBuilder tb = new ArrayTupleBuilder(primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    // - infinity
    int[] lowKeyFields = null;
    // + infinity
    int[] highKeyFields = null;
    // scan primary index
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
    // load secondary index
    int[] fieldPermutation = { 6, 7, 8, 9, 0 };
    TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBulkLoad, NC1_ID);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, secondaryBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    runTest(spec);
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Example 4 with ISerializerDeserializer

use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.

the class RTreeSecondaryIndexSearchOperatorTest method searchSecondaryIndexTest.

@Test
public void searchSecondaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple
    ArrayTupleBuilder tb = new ArrayTupleBuilder(secondaryKeyFieldCount);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    DoubleSerializerDeserializer.INSTANCE.serialize(61.2894, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(-149.624, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(61.8894, dos);
    tb.addFieldEndOffset();
    DoubleSerializerDeserializer.INSTANCE.serialize(-149.024, dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] keyFields = { 0, 1, 2, 3 };
    RTreeSearchOperatorDescriptor secondarySearchOp = new RTreeSearchOperatorDescriptor(spec, secondaryRecDesc, keyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondarySearchOp, NC1_ID);
    // fifth field from the tuples coming from secondary index
    int[] primaryLowKeyFields = { 4 };
    // fifth field from the tuples coming from secondary index
    int[] primaryHighKeyFields = { 4 };
    // search primary index
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) RTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 5 with ISerializerDeserializer

use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.

the class BTreeSecondaryIndexSearchOperatorTest method searchSecondaryIndexTest.

@Test
public void searchSecondaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple containing search keys (only use the first key as search
    // key)
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.secondaryKeyFieldCount);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
    tb.addFieldEndOffset();
    // high key
    new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] secondaryLowKeyFields = { 0 };
    int[] secondaryHighKeyFields = { 1 };
    // search secondary index
    BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
    // second field from the tuples
    int[] primaryLowKeyFields = { 1 };
    // coming from secondary index
    // second field from the tuples
    int[] primaryHighKeyFields = { 1 };
    // coming from secondary index
    // search primary index
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBtreeSearchOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)211 DataOutput (java.io.DataOutput)124 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)116 IPointable (org.apache.hyracks.data.std.api.IPointable)112 IFrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)112 ArrayBackedValueStorage (org.apache.hyracks.data.std.util.ArrayBackedValueStorage)110 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)107 IScalarEvaluator (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator)106 VoidPointable (org.apache.hyracks.data.std.primitive.VoidPointable)98 TypeMismatchException (org.apache.asterix.runtime.exceptions.TypeMismatchException)93 IOException (java.io.IOException)61 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)58 Test (org.junit.Test)58 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)42 InvalidDataFormatException (org.apache.asterix.runtime.exceptions.InvalidDataFormatException)41 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)36 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)34 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)31 AMutableInt64 (org.apache.asterix.om.base.AMutableInt64)27 UTF8StringPointable (org.apache.hyracks.data.std.primitive.UTF8StringPointable)26