Search in sources :

Example 1 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testFlattenLargeRecords.

@Test
public void testFlattenLargeRecords() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : <id1>, "b" : wideString, "c" : [ 10 wideStrings ]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    int arrayLength = 10;
    StringBuilder test = new StringBuilder();
    test.append("[ \"");
    for (int i = 0; i < arrayLength; i++) {
        test.append(wideString);
        test.append("\",\"");
    }
    test.append(wideString);
    test.append("\"]");
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{" + "\"a\" :" + (new StringBuilder().append(i)) + ",\"b\": \"" + wideString + "\"," + "\"c\": " + test + "},");
    }
    batchString.append("{" + "\"a\" :" + (new StringBuilder().append(numRows)) + ",\"b\": \"" + wideString + "\"," + "\"c\": " + test + "}");
    batchString.append("]");
    inputJsonBatches.add(batchString.toString());
    // output rows will be like this.
    // "a" : <id1>, "b" : wideString, "c" : wideString
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int k = 0; k < (numRows) * 11; k++) {
        expectedBatchString.append("{" + "\"a\" :" + (new StringBuilder().append(k)) + ",\"b\": \"" + wideString + "\",");
        expectedBatchString.append("\"c\": \"" + wideString + "\"},");
    }
    expectedBatchString.append("{" + "\"a\" :" + (new StringBuilder().append(numRows)) + ",\"b\": \"" + wideString + "\",");
    expectedBatchString.append("\"c\": \"" + wideString + "\"}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    for (long k = 0; k < ((numRows + 1)); k++) {
        for (int j = 0; j < arrayLength + 1; j++) {
            opTestBuilder.baselineValues(k, wideString, wideString);
        }
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) Test(org.junit.Test)

Example 2 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testLeftOuterHashJoin.

@Test
public void testLeftOuterHashJoin() throws Exception {
    HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.LEFT);
    mockOpContext(hashJoin, initReservation, maxAllocation);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 3 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testHashJoinSingleOutputBatch.

@Test
public void testHashJoinSingleOutputBatch() throws Exception {
    HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
    mockOpContext(hashJoin, initReservation, maxAllocation);
    // create multiple batches from both sides.
    numRows = 4096 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to twice of total size expected.
    // We should get 1 batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    1).expectedBatchSize(// verify batch size
    totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 4 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testFlattenVariableWidth.

@Test
public void testFlattenVariableWidth() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString, "c" : ["parrot", "hummingbird", "owl", "woodpecker", "peacock"]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\",\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\", \"peacock\"]},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\",\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\", \"peacock\"]}");
    batchString.append("]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : parrot
    // "a" : 5, "b" : wideString, "c" : hummingbird
    // "a" : 5, "b" : wideString, "c" : owl
    // "a" : 5, "b" : wideString, "c" : woodpecker
    // "a" : 5, "b" : wideString, "c" : peacock
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"parrot\"},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"hummingbird\"},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"owl\"},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"woodpecker\"},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"peacock\"},");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"parrot\"},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"hummingbird\"},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"owl\"},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"woodpecker\"},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"peacock\"}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, "parrot");
        opTestBuilder.baselineValues(5l, wideString, "hummingbird");
        opTestBuilder.baselineValues(5l, wideString, "owl");
        opTestBuilder.baselineValues(5l, wideString, "woodpecker");
        opTestBuilder.baselineValues(5l, wideString, "peacock");
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) Test(org.junit.Test)

Example 5 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testRightOuterHashJoin.

@Test
public void testRightOuterHashJoin() throws Exception {
    HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.RIGHT);
    mockOpContext(hashJoin, initReservation, maxAllocation);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Aggregations

LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)39 Test (org.junit.Test)36 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)12 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)12 HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)6 FieldReference (org.apache.drill.common.expression.FieldReference)5 FunctionCall (org.apache.drill.common.expression.FunctionCall)5 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)5 NestedLoopJoinPOP (org.apache.drill.exec.physical.config.NestedLoopJoinPOP)5 Project (org.apache.drill.exec.physical.config.Project)5 Text (org.apache.drill.exec.util.Text)5 MergeJoinPOP (org.apache.drill.exec.physical.config.MergeJoinPOP)4 HashAggregate (org.apache.drill.exec.physical.config.HashAggregate)3 UnionAll (org.apache.drill.exec.physical.config.UnionAll)3 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)3 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)3 MinorFragmentEndpoint (org.apache.drill.exec.physical.MinorFragmentEndpoint)2 ExternalSort (org.apache.drill.exec.physical.config.ExternalSort)1