Examples with OneToOneConnectorDescriptor - org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor

Example 46 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class FlushDatasetUtil method flushDataset.

public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, String dataverseName, String datasetName, String indexName) throws Exception {
    CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
    int frameSize = compilerProperties.getFrameSize();
    JobSpecification spec = new JobSpecification(frameSize);
    RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) };
    AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs);
    org.apache.asterix.common.transactions.JobId jobId = JobIdFactory.generateJobId();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, jobId, dataset.getDatasetId());
    spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0);
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint);
    JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
    spec.setJobletEventListenerFactory(jobEventListenerFactory);
    JobUtils.runJob(hcc, spec, true);
}

Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) CompilerProperties(org.apache.asterix.common.config.CompilerProperties) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) FlushDatasetOperatorDescriptor(org.apache.asterix.runtime.operators.std.FlushDatasetOperatorDescriptor) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 47 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class SecondaryIndexBulkLoadExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples that we are retrieving from the primary index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
    IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
    new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for sort fields and BTree fields
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // use a disk-order scan to read primary index
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
    // sort the tuples as preparation for bulk load into secondary index
    // fields to sort on
    int[] sortFields = { 1, 0 };
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
    JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
    // tuples to be put into B-Tree shall have 2 fields
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    int[] fieldPermutation = { 1, 0 };
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
    JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
    // connect the ops
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    return spec;
}

Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TreeIndexDiskOrderScanOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexDiskOrderScanOperatorDescriptor) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 48 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class AbstractBTreeOperatorTest method loadSecondaryIndex.

protected void loadSecondaryIndex() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build dummy tuple containing nothing
    ArrayTupleBuilder tb = new ArrayTupleBuilder(primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    tb.addFieldEndOffset();
    RecordDescriptor keyRecDesc = secondaryRecDesc;
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    // - infinity
    int[] lowKeyFields = null;
    // + infinity
    int[] highKeyFields = null;
    // scan primary index
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    // sort based on secondary keys
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, secondaryFieldPermutationA, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, primaryRecDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
    // load secondary index
    int[] fieldPermutation = { 3, 0 };
    TreeIndexBulkLoadOperatorDescriptor secondaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, 0.7f, true, 1000L, true, secondaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeBulkLoad, NC1_ID);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, secondaryBtreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBtreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    runTest(spec);
}

Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) DataOutput(java.io.DataOutput) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Example 49 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class BTreePrimaryIndexScanOperatorTest method scanPrimaryIndexTest.

@Test
public void scanPrimaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build dummy tuple containing nothing
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    // - infinity
    int[] lowKeyFields = null;
    // + infinity
    int[] highKeyFields = null;
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 50 with OneToOneConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor in project asterixdb by apache.

the class BTreePrimaryIndexSearchOperatorTest method searchPrimaryIndexTest.

@Test
public void searchPrimaryIndexTest() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build tuple containing low and high search key
    // high key and low key
    ArrayTupleBuilder tb = new ArrayTupleBuilder(DataSetConstants.primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low key
    new UTF8StringSerializerDeserializer().serialize("100", dos);
    tb.addFieldEndOffset();
    // high key
    new UTF8StringSerializerDeserializer().serialize("200", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    int[] lowKeyFields = { 0 };
    int[] highKeyFields = { 1 };
    BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, DataSetConstants.primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
    IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { createFile(nc1) });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeSearchOp, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)88 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)72 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)62 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)58 Test (org.junit.Test)58 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)54 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)54 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)48 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)36 FileSplit (org.apache.hyracks.api.io.FileSplit)33 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)29 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)24 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)24 PlainFileWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor)20 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)19