use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by axbaretto.
the class TestNullInputMiniPlan method testFullHashJoinEmptyBoth.
@Test
public void testFullHashJoinEmptyBoth() throws Exception {
final PhysicalOperator join = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("a", "EQUALS", "b")), JoinRelType.FULL);
testTwoInputNullBatchHandling(join);
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by axbaretto.
the class BasicPhysicalOpUnitTest method testSimpleHashJoin.
@SuppressWarnings("unchecked")
@Test
public void testSimpleHashJoin() {
HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("x", "EQUALS", "x1")), JoinRelType.LEFT);
// TODO - figure out where to add validation, column names must be unique, even between the two batches,
// for all columns, not just the one in the join condition
// TODO - if any are common between the two, it is failing in the generated setup method in HashJoinProbeGen
List<String> leftJsonBatches = Lists.newArrayList("[{\"x\": 5, \"a\" : \"a string\"}]", "[{\"x\": 5, \"a\" : \"a different string\"},{\"x\": 5, \"a\" : \"meh\"}]");
List<String> rightJsonBatches = Lists.newArrayList("[{\"x1\": 5, \"a2\" : \"asdf\"}]", "[{\"x1\": 6, \"a2\" : \"qwerty\"},{\"x1\": 5, \"a2\" : \"12345\"}]");
opTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches)).baselineColumns("x", "a", "a2", "x1").baselineValues(5l, "a string", "asdf", 5l).baselineValues(5l, "a string", "12345", 5l).baselineValues(5l, "a different string", "asdf", 5l).baselineValues(5l, "a different string", "12345", 5l).baselineValues(5l, "meh", "asdf", 5l).baselineValues(5l, "meh", "12345", 5l).go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestHashJoinJPPD method testBroadcastHashJoin1Cond.
@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin1Cond() {
List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
bloomFilterDefs.add(bloomFilterDef);
RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt")), JoinRelType.INNER, runtimeFilterDef);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 64);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
// Put some duplicate values
List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
int numRows = 2500;
for (int cnt = 1; cnt <= numRows; cnt++) {
leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
}
legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(9).go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestHashJoinJPPD method testBroadcastHashJoin2Cond.
@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin2Cond() {
List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
BloomFilterDef bloomFilterDef1 = new BloomFilterDef(numBytes, true, "a", "b");
bloomFilterDefs.add(bloomFilterDef);
bloomFilterDefs.add(bloomFilterDef1);
RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt"), joinCond("a", "EQUALS", "b")), JoinRelType.INNER, runtimeFilterDef);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 128);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
// Put some duplicate values
List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
int numRows = 2500;
for (int cnt = 1; cnt <= numRows; cnt++) {
leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
}
legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(3).go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestHashJoinOutcome method testHashJoinWhenProbeIsNONE.
/**
* Testing for DRILL-6755: No Hash Table is built when the first probe batch is NONE
*/
@Test
public void testHashJoinWhenProbeIsNONE() {
inputOutcomesLeft.add(RecordBatch.IterOutcome.NONE);
inputOutcomesRight.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomesRight.add(RecordBatch.IterOutcome.OK);
inputOutcomesRight.add(RecordBatch.IterOutcome.NONE);
// for the probe side input - use multiple batches (to check that they are all cleared/drained)
final List<VectorContainer> buildSideinputContainer = new ArrayList<>(5);
buildSideinputContainer.add(emptyInputRowSetRight.container());
buildSideinputContainer.add(nonEmptyInputRowSetRight.container());
RowSet.SingleRowSet secondInputRowSetRight = operatorFixture.rowSetBuilder(inputSchemaRight).addRow(456).build();
RowSet.SingleRowSet thirdInputRowSetRight = operatorFixture.rowSetBuilder(inputSchemaRight).addRow(789).build();
buildSideinputContainer.add(secondInputRowSetRight.container());
buildSideinputContainer.add(thirdInputRowSetRight.container());
final MockRecordBatch mockInputBatchRight = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, buildSideinputContainer, inputOutcomesRight, batchSchemaRight);
final MockRecordBatch mockInputBatchLeft = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainerLeft, inputOutcomesLeft, batchSchemaLeft);
List<JoinCondition> conditions = Lists.newArrayList();
conditions.add(new JoinCondition(SqlKind.EQUALS.toString(), FieldReference.getWithQuotedRef("leftcol"), FieldReference.getWithQuotedRef("rightcol")));
HashJoinPOP hjConf = new HashJoinPOP(null, null, conditions, JoinRelType.INNER);
HashJoinBatch hjBatch = new HashJoinBatch(hjConf, operatorFixture.getFragmentContext(), mockInputBatchLeft, mockInputBatchRight);
RecordBatch.IterOutcome gotOutcome = hjBatch.next();
assertSame(gotOutcome, RecordBatch.IterOutcome.OK_NEW_SCHEMA);
gotOutcome = hjBatch.next();
assertSame(gotOutcome, RecordBatch.IterOutcome.NONE);
secondInputRowSetRight.clear();
thirdInputRowSetRight.clear();
buildSideinputContainer.clear();
}
Aggregations