Search in sources :

Example 31 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class ScanPrintTest method scanPrint02.

@Test
public void scanPrint02() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    spec.addResultSetId(rsId);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID);
    IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(conn1, ordScanner, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 32 with MToNPartitioningConnectorDescriptor

use of org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor in project asterixdb by apache.

the class TPCHCustomerOrderHashJoinTest method customerOrderCIDJoinMultiMaterialized.

@Test
public void customerOrderCIDJoinMultiMaterialized() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
    MaterializingOperatorDescriptor ordMat = new MaterializingOperatorDescriptor(spec, ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordMat, NC1_ID, NC2_ID);
    MaterializingOperatorDescriptor custMat = new MaterializingOperatorDescriptor(spec, custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custMat, NC1_ID, NC2_ID);
    InMemoryHashJoinOperatorDescriptor join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 1 }, new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, 128, null, 128);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordPartConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(ordPartConn, ordScanner, 0, ordMat, 0);
    IConnectorDescriptor custPartConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(custPartConn, custScanner, 0, custMat, 0);
    IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordMat, 0, join, 0);
    IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(custJoinConn, custMat, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) MaterializingOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.MaterializingOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)32 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)31 JobSpecification (org.apache.hyracks.api.job.JobSpecification)29 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)29 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)27 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)26 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)25 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)24 Test (org.junit.Test)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)18 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)17 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 IntSumFieldAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)15 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)13 AbstractSingleActivityOperatorDescriptor (org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor)13 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)13 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)12 FileSplit (org.apache.hyracks.api.io.FileSplit)10 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)10