Search in sources :

Example 6 with NestedLoopJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor in project asterixdb by apache.

the class TPCHCustomerOrderNestedLoopJoinTest method customerOrderCIDJoin.

/*
     * TPCH Customer table: CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
     * C_NAME VARCHAR(25) NOT NULL, C_ADDRESS VARCHAR(40) NOT NULL, C_NATIONKEY
     * INTEGER NOT NULL, C_PHONE CHAR(15) NOT NULL, C_ACCTBAL DECIMAL(15,2) NOT
     * NULL, C_MKTSEGMENT CHAR(10) NOT NULL, C_COMMENT VARCHAR(117) NOT NULL );
     * TPCH Orders table: CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
     * O_CUSTKEY INTEGER NOT NULL, O_ORDERSTATUS CHAR(1) NOT NULL, O_TOTALPRICE
     * DECIMAL(15,2) NOT NULL, O_ORDERDATE DATE NOT NULL, O_ORDERPRIORITY
     * CHAR(15) NOT NULL, O_CLERK CHAR(15) NOT NULL, O_SHIPPRIORITY INTEGER NOT
     * NULL, O_COMMENT VARCHAR(79) NOT NULL );
     */
@Test
public void customerOrderCIDJoin() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
    NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 4, false, null);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 0);
    IConnectorDescriptor custJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Aggregations

IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)6 NestedLoopJoinOperatorDescriptor (org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor)6 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)5 JobSpecification (org.apache.hyracks.api.job.JobSpecification)5 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)5 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)5 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)5 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)5 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)5 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)5 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)4 FileSplit (org.apache.hyracks.api.io.FileSplit)4 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)4 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)4 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)4 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)3 Test (org.junit.Test)3 NotImplementedException (org.apache.hyracks.algebricks.common.exceptions.NotImplementedException)1 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)1