Search in sources :

Example 6 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by axbaretto.

the class TestNullInputMiniPlan method testFullHashJoinEmptyBoth.

@Test
public void testFullHashJoinEmptyBoth() throws Exception {
    final PhysicalOperator join = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("a", "EQUALS", "b")), JoinRelType.FULL);
    testTwoInputNullBatchHandling(join);
}
Also used : PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 7 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by axbaretto.

the class BasicPhysicalOpUnitTest method testSimpleHashJoin.

@SuppressWarnings("unchecked")
@Test
public void testSimpleHashJoin() {
    HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("x", "EQUALS", "x1")), JoinRelType.LEFT);
    // TODO - figure out where to add validation, column names must be unique, even between the two batches,
    // for all columns, not just the one in the join condition
    // TODO - if any are common between the two, it is failing in the generated setup method in HashJoinProbeGen
    List<String> leftJsonBatches = Lists.newArrayList("[{\"x\": 5, \"a\" : \"a string\"}]", "[{\"x\": 5, \"a\" : \"a different string\"},{\"x\": 5, \"a\" : \"meh\"}]");
    List<String> rightJsonBatches = Lists.newArrayList("[{\"x1\": 5, \"a2\" : \"asdf\"}]", "[{\"x1\": 6, \"a2\" : \"qwerty\"},{\"x1\": 5, \"a2\" : \"12345\"}]");
    opTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches)).baselineColumns("x", "a", "a2", "x1").baselineValues(5l, "a string", "asdf", 5l).baselineValues(5l, "a string", "12345", 5l).baselineValues(5l, "a different string", "asdf", 5l).baselineValues(5l, "a different string", "12345", 5l).baselineValues(5l, "meh", "asdf", 5l).baselineValues(5l, "meh", "12345", 5l).go();
}
Also used : HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 8 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestHashJoinJPPD method testBroadcastHashJoin1Cond.

@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin1Cond() {
    List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
    int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
    BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
    bloomFilterDefs.add(bloomFilterDef);
    RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
    HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt")), JoinRelType.INNER, runtimeFilterDef);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 64);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
    // Put some duplicate values
    List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
    List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
    int numRows = 2500;
    for (int cnt = 1; cnt <= numRows; cnt++) {
        leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
    }
    legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(9).go();
}
Also used : ArrayList(java.util.ArrayList) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) SlowTest(org.apache.drill.categories.SlowTest)

Example 9 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestHashJoinJPPD method testBroadcastHashJoin2Cond.

@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin2Cond() {
    List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
    int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
    BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
    BloomFilterDef bloomFilterDef1 = new BloomFilterDef(numBytes, true, "a", "b");
    bloomFilterDefs.add(bloomFilterDef);
    bloomFilterDefs.add(bloomFilterDef1);
    RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
    HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt"), joinCond("a", "EQUALS", "b")), JoinRelType.INNER, runtimeFilterDef);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 128);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
    // Put some duplicate values
    List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
    List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
    int numRows = 2500;
    for (int cnt = 1; cnt <= numRows; cnt++) {
        leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
    }
    legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(3).go();
}
Also used : ArrayList(java.util.ArrayList) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) SlowTest(org.apache.drill.categories.SlowTest)

Example 10 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestHashJoinOutcome method testHashJoinWhenProbeIsNONE.

/**
 * Testing for DRILL-6755: No Hash Table is built when the first probe batch is NONE
 */
@Test
public void testHashJoinWhenProbeIsNONE() {
    inputOutcomesLeft.add(RecordBatch.IterOutcome.NONE);
    inputOutcomesRight.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    inputOutcomesRight.add(RecordBatch.IterOutcome.OK);
    inputOutcomesRight.add(RecordBatch.IterOutcome.NONE);
    // for the probe side input - use multiple batches (to check that they are all cleared/drained)
    final List<VectorContainer> buildSideinputContainer = new ArrayList<>(5);
    buildSideinputContainer.add(emptyInputRowSetRight.container());
    buildSideinputContainer.add(nonEmptyInputRowSetRight.container());
    RowSet.SingleRowSet secondInputRowSetRight = operatorFixture.rowSetBuilder(inputSchemaRight).addRow(456).build();
    RowSet.SingleRowSet thirdInputRowSetRight = operatorFixture.rowSetBuilder(inputSchemaRight).addRow(789).build();
    buildSideinputContainer.add(secondInputRowSetRight.container());
    buildSideinputContainer.add(thirdInputRowSetRight.container());
    final MockRecordBatch mockInputBatchRight = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, buildSideinputContainer, inputOutcomesRight, batchSchemaRight);
    final MockRecordBatch mockInputBatchLeft = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainerLeft, inputOutcomesLeft, batchSchemaLeft);
    List<JoinCondition> conditions = Lists.newArrayList();
    conditions.add(new JoinCondition(SqlKind.EQUALS.toString(), FieldReference.getWithQuotedRef("leftcol"), FieldReference.getWithQuotedRef("rightcol")));
    HashJoinPOP hjConf = new HashJoinPOP(null, null, conditions, JoinRelType.INNER);
    HashJoinBatch hjBatch = new HashJoinBatch(hjConf, operatorFixture.getFragmentContext(), mockInputBatchLeft, mockInputBatchRight);
    RecordBatch.IterOutcome gotOutcome = hjBatch.next();
    assertSame(gotOutcome, RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    gotOutcome = hjBatch.next();
    assertSame(gotOutcome, RecordBatch.IterOutcome.NONE);
    secondInputRowSetRight.clear();
    thirdInputRowSetRight.clear();
    buildSideinputContainer.clear();
}
Also used : MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) RecordBatch(org.apache.drill.exec.record.RecordBatch) ArrayList(java.util.ArrayList) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) VectorContainer(org.apache.drill.exec.record.VectorContainer) JoinCondition(org.apache.drill.common.logical.data.JoinCondition) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Aggregations

HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)34 Test (org.junit.Test)30 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)11 RecordBatch (org.apache.drill.exec.record.RecordBatch)10 BatchSchema (org.apache.drill.exec.record.BatchSchema)8 OperatorTest (org.apache.drill.categories.OperatorTest)6 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)6 SlowTest (org.apache.drill.categories.SlowTest)5 JoinCondition (org.apache.drill.common.logical.data.JoinCondition)5 BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)4 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)4 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)4 ArrayList (java.util.ArrayList)3 JoinRelType (org.apache.calcite.rel.core.JoinRelType)3 RuntimeFilterDef (org.apache.drill.exec.work.filter.RuntimeFilterDef)3 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)2 BloomFilterDef (org.apache.drill.exec.work.filter.BloomFilterDef)2 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)1 VectorContainer (org.apache.drill.exec.record.VectorContainer)1