use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestOutputBatchSize method testLeftOuterHashJoin.
@Test
public void testLeftOuterHashJoin() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.LEFT);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestOutputBatchSize method testHashJoinSingleOutputBatch.
@Test
public void testHashJoinSingleOutputBatch() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(hashJoin, initReservation, maxAllocation);
// create multiple batches from both sides.
numRows = 4096 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to twice of total size expected.
// We should get 1 batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
1).expectedBatchSize(// verify batch size
totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestOutputBatchSize method testRightOuterHashJoin.
@Test
public void testRightOuterHashJoin() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.RIGHT);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class TestNullInputMiniPlan method testFullHashJoinEmptyBoth.
@Test
public void testFullHashJoinEmptyBoth() throws Exception {
final PhysicalOperator join = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("a", "EQUALS", "b")), JoinRelType.FULL, null);
testTwoInputNullBatchHandling(join);
}
use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.
the class BasicPhysicalOpUnitTest method testSimpleHashJoin.
@SuppressWarnings("unchecked")
@Test
public void testSimpleHashJoin() {
HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("x", "EQUALS", "x1")), JoinRelType.LEFT, null);
// TODO - figure out where to add validation, column names must be unique, even between the two batches,
// for all columns, not just the one in the join condition
// TODO - if any are common between the two, it is failing in the generated setup method in HashJoinProbeGen
List<String> leftJsonBatches = Lists.newArrayList("[{\"x\": 5, \"a\" : \"a string\"}]", "[{\"x\": 5, \"a\" : \"a different string\"},{\"x\": 5, \"a\" : \"meh\"}]");
List<String> rightJsonBatches = Lists.newArrayList("[{\"x1\": 5, \"a2\" : \"asdf\"}]", "[{\"x1\": 6, \"a2\" : \"qwerty\"},{\"x1\": 5, \"a2\" : \"12345\"}]");
legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches)).baselineColumns("x", "a", "a2", "x1").baselineValues(5l, "a string", "asdf", 5l).baselineValues(5l, "a string", "12345", 5l).baselineValues(5l, "a different string", "asdf", 5l).baselineValues(5l, "a different string", "12345", 5l).baselineValues(5l, "meh", "asdf", 5l).baselineValues(5l, "meh", "12345", 5l).go();
}
Aggregations