Search in sources :

Example 1 with ResultSetId

use of org.apache.hyracks.api.dataset.ResultSetId in project asterixdb by apache.

the class QueryTranslator method deliverResult.

private void deliverResult(IHyracksClientConnection hcc, IHyracksDataset hdc, IStatementCompiler compiler, MetadataProvider metadataProvider, IMetadataLocker locker, ResultDelivery resultDelivery, ResultMetadata outMetadata, Stats stats, String clientContextId, IStatementExecutorContext ctx) throws Exception {
    final ResultSetId resultSetId = metadataProvider.getResultSetId();
    switch(resultDelivery) {
        case ASYNC:
            MutableBoolean printed = new MutableBoolean(false);
            executorService.submit(() -> asyncCreateAndRunJob(hcc, compiler, locker, resultDelivery, clientContextId, ctx, resultSetId, printed));
            synchronized (printed) {
                while (!printed.booleanValue()) {
                    printed.wait();
                }
            }
            break;
        case IMMEDIATE:
            createAndRunJob(hcc, null, compiler, locker, resultDelivery, id -> {
                final ResultReader resultReader = new ResultReader(hdc, id, resultSetId);
                ResultUtil.printResults(appCtx, resultReader, sessionOutput, stats, metadataProvider.findOutputRecordType());
            }, clientContextId, ctx);
            break;
        case DEFERRED:
            createAndRunJob(hcc, null, compiler, locker, resultDelivery, id -> {
                ResultUtil.printResultHandle(sessionOutput, new ResultHandle(id, resultSetId));
                if (outMetadata != null) {
                    outMetadata.getResultSets().add(Triple.of(id, resultSetId, metadataProvider.findOutputRecordType()));
                }
            }, clientContextId, ctx);
            break;
        default:
            break;
    }
}
Also used : ResultReader(org.apache.asterix.app.result.ResultReader) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) MutableBoolean(org.apache.commons.lang3.mutable.MutableBoolean) ResultHandle(org.apache.asterix.app.result.ResultHandle)

Example 2 with ResultSetId

use of org.apache.hyracks.api.dataset.ResultSetId in project asterixdb by apache.

the class HeapSortMergeTest method optimizedSortMergeTest01.

@Test
public void optimizedSortMergeTest01() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    // larger than the total record numbers.
    int outputLimit = 5;
    TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    FileSplit fs = createFile(nc1);
    IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(new FileSplit[] { fs });
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
    runTest(spec);
    System.out.println("Result write into :" + fs.getPath() + " in node: " + fs.getNodeName());
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 3 with ResultSetId

use of org.apache.hyracks.api.dataset.ResultSetId in project asterixdb by apache.

the class HeapSortMergeTest method createSortMergeJobSpec.

public static JobSpecification createSortMergeJobSpec() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    int outputLimit = 20;
    TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
    LimitOperatorDescriptor filter = new LimitOperatorDescriptor(spec, ordersDesc, outputLimit);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, filter, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, filter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), filter, 0, printer, 0);
    return spec;
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) LimitOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.LimitOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 4 with ResultSetId

use of org.apache.hyracks.api.dataset.ResultSetId in project asterixdb by apache.

the class CountOfCountsTest method countOfCountsMultiNC.

@Test
public void countOfCountsMultiNC() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
    RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, sorter, 0);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sorter, 0, group, 0);
    IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn3, group, 0, sorter2, 0);
    IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn4, sorter2, 0, group2, 0);
    IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(conn5, group2, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) Test(org.junit.Test)

Example 5 with ResultSetId

use of org.apache.hyracks.api.dataset.ResultSetId in project asterixdb by apache.

the class CountOfCountsTest method countOfCountsExternalSortMultiNC.

@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
    InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
    RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
    PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, csvScanner, 0, sorter, 0);
    IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn2, sorter, 0, group, 0);
    IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn3, group, 0, sorter2, 0);
    IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
    spec.connect(conn4, sorter2, 0, group2, 0);
    IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(conn5, group2, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) Test(org.junit.Test)

Aggregations

ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)32 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)26 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)24 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)24 JobSpecification (org.apache.hyracks.api.job.JobSpecification)24 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)24 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)24 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)24 FileSplit (org.apache.hyracks.api.io.FileSplit)23 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)23 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)23 Test (org.junit.Test)22 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)19 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)15 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)13 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)9 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)9