Search in sources :

Example 11 with MToNPartitioningMergingConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor in project asterixdb by apache.

the class Sort method createJob.

private static JobSpecification createJob(FileSplit[] ordersSplits, FileSplit[] outputSplit, String memBufferAlg, int frameLimit, int frameSize, int limit, boolean usingHeapSorter) {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(frameSize);
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), ordersDesc);
    createPartitionConstraint(spec, ordScanner, ordersSplits);
    AbstractSorterOperatorDescriptor sorter;
    if (usingHeapSorter && limit < Integer.MAX_VALUE) {
        sorter = new TopKSorterOperatorDescriptor(spec, frameLimit, limit, SortFields, null, SortFieldsComparatorFactories, ordersDesc);
    } else {
        if (memBufferAlg.equalsIgnoreCase("bestfit")) {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.SMALLEST_FIT, limit);
        } else if (memBufferAlg.equalsIgnoreCase("biggestfit")) {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.BIGGEST_FIT, limit);
        } else {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.LAST_FIT, limit);
        }
    }
    createPartitionConstraint(spec, sorter, ordersSplits);
    IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(outputSplit);
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
    createPartitionConstraint(spec, printer, outputSplit);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(SortFields, orderBinaryHashFunctionFactories), SortFields, SortFieldsComparatorFactories, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AbstractSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.AbstractSorterOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Aggregations

MToNPartitioningMergingConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor)11 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)10 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)10 JobSpecification (org.apache.hyracks.api.job.JobSpecification)8 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)8 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)6 UTF8StringNormalizedKeyComputerFactory (org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory)6 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)6 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)6 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)6 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)6 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)5 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)5 ExternalSortOperatorDescriptor (org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)5 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)4 FileSplit (org.apache.hyracks.api.io.FileSplit)4 Pair (org.apache.hyracks.algebricks.common.utils.Pair)3 TargetConstraint (org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint)3