Search in sources :

Example 1 with OptimizedHybridHashJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor in project asterixdb by apache.

the class Join method createJob.

private static JobSpecification createJob(FileSplit[] customerSplits, FileSplit[] orderSplits, FileSplit[] resultSplits, int numJoinPartitions, String algo, int graceInputSize, int graceRecordsPerFrame, double graceFactor, int memSize, int tableSize, boolean hasGroupBy, int frameSize) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(customerSplits);
    long custFileSize = 0;
    for (int i = 0; i < customerSplits.length; i++) {
        custFileSize += customerSplits[i].getFile(null).length();
    }
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(orderSplits);
    long orderFileSize = 0;
    for (int i = 0; i < orderSplits.length; i++) {
        orderFileSize += orderSplits[i].getFile(null).length();
    }
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), Common.ordersDesc);
    createPartitionConstraint(spec, ordScanner, orderSplits);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custParserFactories, '|'), Common.custDesc);
    createPartitionConstraint(spec, custScanner, customerSplits);
    IOperatorDescriptor join;
    if ("nestedloop".equalsIgnoreCase(algo)) {
        join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), Common.custOrderJoinDesc, memSize, false, null);
    } else if ("inmem".equalsIgnoreCase(algo)) {
        join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, tableSize, null, memSize * frameSize);
    } else if ("hybrid".equalsIgnoreCase(algo)) {
        join = new OptimizedHybridHashJoinOperatorDescriptor(spec, memSize, graceInputSize, graceFactor, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, Common.custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
    } else {
        System.err.println("unknown algorithm:" + algo);
        return null;
    }
    PartitionConstraintHelper.addPartitionCountConstraint(spec, join, numJoinPartitions);
    IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor custJoinConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IOperatorDescriptor endingOp = join;
    if (hasGroupBy) {
        RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
        ExternalGroupOperatorDescriptor gby = new ExternalGroupOperatorDescriptor(spec, tableSize, custFileSize + orderFileSize, new int[] { 6 }, memSize, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, gby, resultSplits);
        IConnectorDescriptor joinGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 6 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(joinGroupConn, join, 0, gby, 0);
        endingOp = gby;
    }
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(resultSplits);
    //FrameFileWriterOperatorDescriptor writer = new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    IOperatorDescriptor writer = new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|");
    createPartitionConstraint(spec, writer, resultSplits);
    IConnectorDescriptor endingPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(endingPrinterConn, endingOp, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) JoinComparatorFactory(org.apache.hyracks.dataflow.std.join.JoinComparatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) Common.createPartitionConstraint(org.apache.hyracks.examples.tpch.client.Common.createPartitionConstraint) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory)

Example 2 with OptimizedHybridHashJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor in project asterixdb by apache.

the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case1.

@Test
public void customerOrderCIDHybridHashJoin_Case1() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer4.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders4.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
    OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 243, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
    String path = getClass().getName() + File.separator + "case1";
    IOperatorDescriptor printer = getPrinter(spec, path);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
    System.out.println("output to " + path);
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) JoinComparatorFactory(org.apache.hyracks.dataflow.std.join.JoinComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 3 with OptimizedHybridHashJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor in project asterixdb by apache.

the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case3.

@Test
public void customerOrderCIDHybridHashJoin_Case3() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer3.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders1.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
    OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 6, 122, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
    String path = getClass().getName() + File.separator + "case3";
    IOperatorDescriptor printer = getPrinter(spec, path);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
    System.out.println("output to " + path);
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) JoinComparatorFactory(org.apache.hyracks.dataflow.std.join.JoinComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 4 with OptimizedHybridHashJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor in project asterixdb by apache.

the class TPCHCustomerOptimizedHybridHashJoinTest method customerOrderCIDHybridHashJoin_Case2.

@Test
public void customerOrderCIDHybridHashJoin_Case2() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer3.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders4.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(orderValueParserFactories, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(custValueParserFactories, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
    OptimizedHybridHashJoinOperatorDescriptor join = new OptimizedHybridHashJoinOperatorDescriptor(spec, 15, 122, 1.2, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, custOrderJoinDesc, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), null);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
    String path = getClass().getName() + File.separator + "case2";
    IOperatorDescriptor printer = getPrinter(spec, path);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
    System.out.println("output to " + path);
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) JoinComparatorFactory(org.apache.hyracks.dataflow.std.join.JoinComparatorFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 5 with OptimizedHybridHashJoinOperatorDescriptor

use of org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor in project asterixdb by apache.

the class JoinMultiComparator method generateOptimizedHashJoinRuntime.

private IOperatorDescriptor generateOptimizedHashJoinRuntime(JobGenContext context, IOperatorSchema[] inputSchemas, int[] keysLeft, int[] keysRight, IBinaryHashFunctionFamily[] hashFunFamilies, IBinaryComparatorFactory[] comparatorFactories, IPredicateEvaluatorFactory predEvaluatorFactory, RecordDescriptor recDescriptor, IOperatorDescriptorRegistry spec) throws AlgebricksException {
    IOperatorDescriptor opDesc;
    try {
        switch(kind) {
            case INNER:
                opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(), maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies, comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories, keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories, keysRight, keysLeft), predEvaluatorFactory);
                break;
            case LEFT_OUTER:
                IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[inputSchemas[1].getSize()];
                for (int j = 0; j < nonMatchWriterFactories.length; j++) {
                    nonMatchWriterFactories[j] = context.getMissingWriterFactory();
                }
                opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(), maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies, comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories, keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories, keysRight, keysLeft), predEvaluatorFactory, true, nonMatchWriterFactories);
                break;
            default:
                throw new NotImplementedException();
        }
    } catch (HyracksDataException e) {
        throw new AlgebricksException(e);
    }
    return opDesc;
}
Also used : OptimizedHybridHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)5 OptimizedHybridHashJoinOperatorDescriptor (org.apache.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor)5 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)4 JobSpecification (org.apache.hyracks.api.job.JobSpecification)4 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)4 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)4 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)4 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)4 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)4 JoinComparatorFactory (org.apache.hyracks.dataflow.std.join.JoinComparatorFactory)4 FileSplit (org.apache.hyracks.api.io.FileSplit)3 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)3 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)3 Test (org.junit.Test)3 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)1 NotImplementedException (org.apache.hyracks.algebricks.common.exceptions.NotImplementedException)1 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)1 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)1 IBinaryHashFunctionFamily (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily)1 IMissingWriterFactory (org.apache.hyracks.api.dataflow.value.IMissingWriterFactory)1