Search in sources :

Example 46 with DelimitedDataTupleParserFactory

use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.

the class AbstractRTreeOperatorTest method loadPrimaryIndex.

protected void loadPrimaryIndex() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "orders-with-locations-part1.txt") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 1000, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
    int[] fieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
    TreeIndexBulkLoadOperatorDescriptor primaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, primaryRecDesc, fieldPermutation, 0.7f, false, 1000L, true, primaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBulkLoad, NC1_ID);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, primaryBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    runTest(spec);
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Aggregations

DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)46 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)45 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)43 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)41 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)39 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)36 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)36 Test (org.junit.Test)34 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)33 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)24 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)12 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)12