Search in sources :

Example 16 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class TPCHCustomerOrderHashJoinTest method customerOrderCIDJoinAutoExpand.

@Test
public void customerOrderCIDJoinAutoExpand() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
    InMemoryHashJoinOperatorDescriptor join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 1 }, new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, 128, null, 128);
    PartitionConstraintHelper.addPartitionCountConstraint(spec, join, 2);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(ordJoinConn, ordScanner, 0, join, 0);
    IConnectorDescriptor custJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(custJoinConn, custScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 17 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class TPCHCustomerOrderHashJoinTest method customerOrderCIDJoinMulti.

@Test
public void customerOrderCIDJoinMulti() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
    InMemoryHashJoinOperatorDescriptor join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 1 }, new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, 128, null, 128);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(ordJoinConn, ordScanner, 0, join, 0);
    IConnectorDescriptor custJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(custJoinConn, custScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 18 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class ScanPrintTest method scanPrint03.

@Test
public void scanPrint03() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
    spec.connect(conn1, ordScanner, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 19 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class WordCountMain method createJob.

private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, String algo, int htSize, int frameLimit, String format, int frameSize) {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
    RecordDescriptor wordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new WordTupleParserFactory(), wordDesc);
    createPartitionConstraint(spec, wordScanner, inSplits);
    RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    IOperatorDescriptor gBy;
    int[] keys = new int[] { 0 };
    if ("hash".equalsIgnoreCase(algo)) {
        gBy = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, gBy, outSplits);
        IConnectorDescriptor scanGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(scanGroupConn, wordScanner, 0, gBy, 0);
    } else {
        IBinaryComparatorFactory[] cfs = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
        IOperatorDescriptor sorter = "memsort".equalsIgnoreCase(algo) ? new InMemorySortOperatorDescriptor(spec, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc) : new ExternalSortOperatorDescriptor(spec, frameLimit, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc);
        createPartitionConstraint(spec, sorter, outSplits);
        IConnectorDescriptor scanSortConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(scanSortConn, wordScanner, 0, sorter, 0);
        gBy = new PreclusteredGroupOperatorDescriptor(spec, keys, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), groupResultDesc);
        createPartitionConstraint(spec, gBy, outSplits);
        OneToOneConnectorDescriptor sortGroupConn = new OneToOneConnectorDescriptor(spec);
        spec.connect(sortGroupConn, sorter, 0, gBy, 0);
    }
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
    IOperatorDescriptor writer = "text".equalsIgnoreCase(format) ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, ",") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    createPartitionConstraint(spec, writer, outSplits);
    IConnectorDescriptor gbyPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(gbyPrinterConn, gBy, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : WordTupleParserFactory(org.apache.hyracks.examples.text.WordTupleParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) FrameFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FrameFileWriterOperatorDescriptor)

Example 20 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class Groupby method createJob.

private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, int htSize, long fileSize, int frameLimit, int frameSize, String alg, boolean outPlain) {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
    FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new DelimitedDataTupleParserFactory(lineitemParserFactories, '|'), lineitemDesc);
    createPartitionConstraint(spec, fileScanner, inSplits);
    // Output: each unique string with an integer count
    RecordDescriptor outDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, // IntegerSerializerDeserializer.INSTANCE,
    IntegerSerializerDeserializer.INSTANCE });
    // Specify the grouping key, which will be the string extracted during
    // the scan.
    int[] keys = new int[] { 0 };
    AbstractOperatorDescriptor grouper;
    if (alg.equalsIgnoreCase("hash")) {
        // external hash graph
        grouper = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
        PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new IntegerNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, false) }), outDesc, outDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { MurmurHash3BinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, grouper, outSplits);
    } else if (alg.equalsIgnoreCase("sort")) {
        grouper = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keys, new IntegerNormalizedKeyComputerFactory(), new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
        PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, true) }), outDesc, outDesc, false);
        createPartitionConstraint(spec, grouper, outSplits);
    } else {
        System.err.println("unknow groupby alg:" + alg);
        return null;
    }
    // Connect scanner with the grouper
    IConnectorDescriptor scanGroupConnDef2 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { // PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY),
    PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
    spec.connect(scanGroupConnDef2, fileScanner, 0, grouper, 0);
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
    AbstractSingleActivityOperatorDescriptor writer = outPlain ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    createPartitionConstraint(spec, writer, outSplits);
    IConnectorDescriptor groupOutConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(groupOutConn, grouper, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) IntegerNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.IntegerNormalizedKeyComputerFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) FrameFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FrameFileWriterOperatorDescriptor) SortGroupByOperatorDescriptor(org.apache.hyracks.dataflow.std.group.sort.SortGroupByOperatorDescriptor)

Aggregations

MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)32 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)31 JobSpecification (org.apache.hyracks.api.job.JobSpecification)29 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)29 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)27 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)26 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)25 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)24 Test (org.junit.Test)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)18 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)17 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)15 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)13 AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)13 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)13 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)12 FileSplit (org.apache.hyracks.api.io.FileSplit)10 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)10