Search in sources :

Example 1 with MergeJoinPOP

use of org.apache.drill.exec.physical.config.MergeJoinPOP in project drill by axbaretto.

the class MergeJoinPrel method getPhysicalOperator.

@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
    final List<String> fields = getRowType().getFieldNames();
    assert isUnique(fields);
    final int leftCount = left.getRowType().getFieldCount();
    final List<String> leftFields = fields.subList(0, leftCount);
    final List<String> rightFields = fields.subList(leftCount, fields.size());
    PhysicalOperator leftPop = ((Prel) left).getPhysicalOperator(creator);
    PhysicalOperator rightPop = ((Prel) right).getPhysicalOperator(creator);
    JoinRelType jtype = this.getJoinType();
    List<JoinCondition> conditions = Lists.newArrayList();
    buildJoinConditions(conditions, leftFields, rightFields, leftKeys, rightKeys);
    MergeJoinPOP mjoin = new MergeJoinPOP(leftPop, rightPop, conditions, jtype);
    return creator.addMetadata(this, mjoin);
}
Also used : JoinRelType(org.apache.calcite.rel.core.JoinRelType) MergeJoinPOP(org.apache.drill.exec.physical.config.MergeJoinPOP) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JoinCondition(org.apache.drill.common.logical.data.JoinCondition)

Example 2 with MergeJoinPOP

use of org.apache.drill.exec.physical.config.MergeJoinPOP in project drill by axbaretto.

the class TestNullInputMiniPlan method testLeftMergeJoinEmptyBoth.

@Test
public void testLeftMergeJoinEmptyBoth() throws Exception {
    final PhysicalOperator join = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("a", "EQUALS", "b")), JoinRelType.LEFT);
    testTwoInputNullBatchHandling(join);
}
Also used : MergeJoinPOP(org.apache.drill.exec.physical.config.MergeJoinPOP) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) Test(org.junit.Test)

Example 3 with MergeJoinPOP

use of org.apache.drill.exec.physical.config.MergeJoinPOP in project drill by apache.

the class TestOutputBatchSize method testMergeJoinMultipleOutputBatches.

@Test
public void testMergeJoinMultipleOutputBatches() throws Exception {
    MergeJoinPOP mergeJoin = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
    mockOpContext(mergeJoin, initReservation, maxAllocation);
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(mergeJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : MergeJoinPOP(org.apache.drill.exec.physical.config.MergeJoinPOP) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) Test(org.junit.Test)

Example 4 with MergeJoinPOP

use of org.apache.drill.exec.physical.config.MergeJoinPOP in project drill by apache.

the class TestOutputBatchSize method testMergeJoinLowerLimit.

@Test
public void testMergeJoinLowerLimit() throws Exception {
    // test the lower limit of at least one batch
    MergeJoinPOP mergeJoin = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.RIGHT);
    mockOpContext(mergeJoin, initReservation, maxAllocation);
    numRows = 10;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    // set very low value of output batch size so we can do only one row per batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(mergeJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : MergeJoinPOP(org.apache.drill.exec.physical.config.MergeJoinPOP) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) Test(org.junit.Test)

Example 5 with MergeJoinPOP

use of org.apache.drill.exec.physical.config.MergeJoinPOP in project drill by apache.

the class BasicPhysicalOpUnitTest method testSimpleMergeJoin.

@SuppressWarnings("unchecked")
@Test
public void testSimpleMergeJoin() {
    MergeJoinPOP joinConf = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("x", "EQUALS", "x1")), JoinRelType.LEFT);
    // TODO - figure out where to add validation, column names must be unique, even between the two batches,
    // for all columns, not just the one in the join condition
    List<String> leftJsonBatches = Lists.newArrayList("[{\"x\": 5, \"a\" : \"a string\"}]", "[{\"x\": 5, \"a\" : \"a different string\"},{\"x\": 5, \"a\" : \"meh\"}]");
    List<String> rightJsonBatches = Lists.newArrayList("[{\"x1\": 5, \"a2\" : \"asdf\"}]", "[{\"x1\": 5, \"a2\" : \"12345\"}, {\"x1\": 6, \"a2\" : \"qwerty\"}]");
    legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches)).baselineColumns("x", "a", "a2", "x1").baselineValues(5l, "a string", "asdf", 5l).baselineValues(5l, "a string", "12345", 5l).baselineValues(5l, "a different string", "asdf", 5l).baselineValues(5l, "a different string", "12345", 5l).baselineValues(5l, "meh", "asdf", 5l).baselineValues(5l, "meh", "12345", 5l).go();
}
Also used : MergeJoinPOP(org.apache.drill.exec.physical.config.MergeJoinPOP) Test(org.junit.Test)

Aggregations

MergeJoinPOP (org.apache.drill.exec.physical.config.MergeJoinPOP)20 Test (org.junit.Test)18 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)10 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)4 JoinRelType (org.apache.calcite.rel.core.JoinRelType)2 JoinCondition (org.apache.drill.common.logical.data.JoinCondition)2 Ignore (org.junit.Ignore)2