Search in sources :

Example 1 with NestedLoopJoinPOP

use of org.apache.drill.exec.physical.config.NestedLoopJoinPOP in project drill by apache.

the class TestOutputBatchSize method testLeftNestedLoopJoin.

@Test
public void testLeftNestedLoopJoin() throws Exception {
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.LEFT, functionCallExpr);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 2 with NestedLoopJoinPOP

use of org.apache.drill.exec.physical.config.NestedLoopJoinPOP in project drill by apache.

the class TestOutputBatchSize method testNestedLoopJoinMultipleOutputBatches.

@Test
public void testNestedLoopJoinMultipleOutputBatches() throws Exception {
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
    mockOpContext(nestedLoopJoin, initReservation, maxAllocation);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 3 with NestedLoopJoinPOP

use of org.apache.drill.exec.physical.config.NestedLoopJoinPOP in project drill by apache.

the class TestOutputBatchSize method testNestedLoopJoinLowerLimit.

@Test
public void testNestedLoopJoinLowerLimit() throws Exception {
    // test the lower limit of at least one batch
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
    numRows = 10;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    // set very low value of output batch size so we can do only one row per batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 4 with NestedLoopJoinPOP

use of org.apache.drill.exec.physical.config.NestedLoopJoinPOP in project drill by apache.

the class NestedLoopJoinPrel method getPhysicalOperator.

@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
    PhysicalOperator leftPop = ((Prel) left).getPhysicalOperator(creator);
    PhysicalOperator rightPop = ((Prel) right).getPhysicalOperator(creator);
    /*
       Raw expression will be transformed into its logical representation. For example:
       Query:
         select t1.c1, t2.c1, t2.c2 from t1 inner join t2 on t1.c1 between t2.c1 and t2.c2
       Raw expression:
         AND(>=($0, $1), <=($0, $2))
       Logical expression:
         FunctionCall [func=booleanAnd,
         args=[FunctionCall [func=greater_than_or_equal_to, args=[`i1`, `i10`]],
               FunctionCall [func=less_than_or_equal_to, args=[`i1`, `i2`]]]

       Both tables have the same column name thus duplicated column name in second table are renamed: i1 -> i10.
    */
    LogicalExpression condition = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getSettings(getCluster())), getInputs(), getCondition());
    NestedLoopJoinPOP nlj = new NestedLoopJoinPOP(leftPop, rightPop, getJoinType(), condition);
    return creator.addMetadata(this, nlj);
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP)

Example 5 with NestedLoopJoinPOP

use of org.apache.drill.exec.physical.config.NestedLoopJoinPOP in project drill by axbaretto.

the class NestedLoopJoinPrel method getPhysicalOperator.

@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
    PhysicalOperator leftPop = ((Prel) left).getPhysicalOperator(creator);
    PhysicalOperator rightPop = ((Prel) right).getPhysicalOperator(creator);
    /*
       Raw expression will be transformed into its logical representation. For example:
       Query:
         select t1.c1, t2.c1, t2.c2 from t1 inner join t2 on t1.c1 between t2.c1 and t2.c2
       Raw expression:
         AND(>=($0, $1), <=($0, $2))
       Logical expression:
         FunctionCall [func=booleanAnd,
         args=[FunctionCall [func=greater_than_or_equal_to, args=[`i1`, `i10`]],
               FunctionCall [func=less_than_or_equal_to, args=[`i1`, `i2`]]]

       Both tables have the same column name thus duplicated column name in second table are renamed: i1 -> i10.
    */
    LogicalExpression condition = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getSettings(getCluster())), getInputs(), getCondition());
    NestedLoopJoinPOP nlj = new NestedLoopJoinPOP(leftPop, rightPop, getJoinType(), condition);
    return creator.addMetadata(this, nlj);
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP)

Aggregations

LogicalExpression (org.apache.drill.common.expression.LogicalExpression)7 NestedLoopJoinPOP (org.apache.drill.exec.physical.config.NestedLoopJoinPOP)7 FieldReference (org.apache.drill.common.expression.FieldReference)5 FunctionCall (org.apache.drill.common.expression.FunctionCall)5 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)5 Test (org.junit.Test)5 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)2 DrillParseContext (org.apache.drill.exec.planner.logical.DrillParseContext)2