Search in sources :

Example 6 with InMemorySortOperatorDescriptor

use of org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor in project asterixdb by apache.

the class SortMergeTest method sortMergeTest01.

@Test
public void sortMergeTest01() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
    runTest(spec);
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 JobSpecification (org.apache.hyracks.api.job.JobSpecification)6 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)6 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)6 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)6 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)6 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)6 InMemorySortOperatorDescriptor (org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor)6 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)5 FileSplit (org.apache.hyracks.api.io.FileSplit)5 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)5 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)5 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)5 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)5 PreclusteredGroupOperatorDescriptor (org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor)5 Test (org.junit.Test)5 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)4 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)4 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)4 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)4