Search in sources :

Example 31 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testProjectMap.

@Test
public void testProjectMap() throws Exception {
    // create input rows like this.
    // "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports},
    // {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
    StringBuilder batchString = new StringBuilder("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + "abc" + "\"," + " \"c\" : { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " \"d\": { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
        batchString.append(i != numRows - 1 ? "}," : "}]");
    }
    List<String> inputJsonBatches = Lists.newArrayList();
    inputJsonBatches.add(batchString.toString());
    StringBuilder expectedString = new StringBuilder("[");
    for (int i = 0; i < numRows; i++) {
        expectedString.append("{\"aplusamount\": 105");
        expectedString.append(i != numRows - 1 ? "}," : "}]");
    }
    List<String> expectedJsonBatches = Lists.newArrayList();
    expectedJsonBatches.add(expectedString.toString());
    String[] baselineColumns = new String[1];
    baselineColumns[0] = "aplusamount";
    String[] expr = { "a + c.amount ", baselineColumns[0] };
    Project projectConf = new Project(parseExprs(expr), null);
    mockOpContext(projectConf, initReservation, maxAllocation);
    long totalSize = getExpectedSize(expectedJsonBatches);
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    // a + c.amount
    Long[] baseLineValues = { (5l + 100l) };
    for (int i = 0; i < numRows; i++) {
        opTestBuilder.baselineValues(baseLineValues);
    }
    opTestBuilder.go();
}
Also used : Project(org.apache.drill.exec.physical.config.Project) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) Test(org.junit.Test)

Example 32 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testProjectFixedWidthImpl.

/**
 * Tests BatchSizing of fixed-width transfers and new column creations in Project.
 * Transfer: Evaluates 'select *'
 * New Columns: Evalutes 'select C0 + 5 as C0 ... C[columnCount] + 5 as C[columnCount]
 * @param transfer
 * @throws Exception
 */
public void testProjectFixedWidthImpl(boolean transfer, int columnCount) throws Exception {
    // generate a row with N columns C0..C[columnCount], value in a column is same as column id
    StringBuilder jsonRow = new StringBuilder("{");
    String[] baselineColumns = new String[columnCount];
    Object[] baselineValues = new Long[columnCount];
    int exprSize = (transfer ? 2 : 2 * columnCount);
    String[] expr = new String[exprSize];
    // Expr for a 'select *' as expected by parseExprs()
    if (transfer) {
        expr[0] = "`**`";
        expr[1] = "`**`";
    }
    for (int i = 0; i < columnCount; i++) {
        jsonRow.append("\"" + "C" + i + "\": " + i + ((i == columnCount - 1) ? "" : ","));
        baselineColumns[i] = "C" + i;
        if (!transfer) {
            expr[i * 2] = baselineColumns[i] + " + 5";
            expr[i * 2 + 1] = baselineColumns[i];
        }
        baselineValues[i] = (long) (transfer ? i : i + 5);
    }
    jsonRow.append("}");
    StringBuilder batchString = new StringBuilder("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append(jsonRow + ((i == numRows - 1) ? "" : ","));
    }
    batchString.append("]");
    List<String> inputJsonBatches = Lists.newArrayList();
    inputJsonBatches.add(batchString.toString());
    List<String> expectedJsonBatches = Lists.newArrayList();
    expectedJsonBatches.add(batchString.toString());
    Project projectConf = new Project(parseExprs(expr), null);
    mockOpContext(projectConf, initReservation, maxAllocation);
    long totalSize = getExpectedSize(expectedJsonBatches);
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    for (int i = 0; i < numRows; i++) {
        opTestBuilder.baselineValues(baselineValues);
    }
    opTestBuilder.go();
}
Also used : Project(org.apache.drill.exec.physical.config.Project) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder)

Example 33 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testHashAggMax.

@Test
public void testHashAggMax() throws ExecutionSetupException {
    HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("max(b)", "b_max"), 1.0f);
    // create input rows like this.
    // "a" : 1, "b" : "a"
    // "a" : 2, "b" : "aa"
    // "a" : 3, "b" : "aaa"
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": " + i + ", \"b\": " + "\"a\"" + "},");
        batchString.append("{\"a\": " + i + ", \"b\": " + "\"aa\"" + "},");
        batchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
    }
    batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"a\"" + "},");
    batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aa\"" + "},");
    batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
    batchString.append("]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of hash agg for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 1, "b" : "aaa"
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
    }
    expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_max").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize);
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues((long) i, "aaa");
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashAggregate(org.apache.drill.exec.physical.config.HashAggregate) Test(org.junit.Test)

Example 34 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testFlattenNestedMap.

@Test
public void testFlattenNestedMap() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString,
    // "c" : [ {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries},
    // {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries} ]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder innerMap = new StringBuilder();
    innerMap.append("{ \"trans_id\":\"inner_trans_t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}");
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]");
        batchString.append("},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ",  \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>();
    innerMapResult.put("trans_id", new Text("inner_trans_t1"));
    innerMapResult.put("amount", new Long(100));
    innerMapResult.put("trans_time", new Long(7777777));
    innerMapResult.put("type", new Text("sports"));
    JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    resultExpected1.put("innerMap", innerMapResult);
    JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    resultExpected2.put("innerMap", innerMapResult);
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, resultExpected1);
        opTestBuilder.baselineValues(5l, wideString, resultExpected2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Example 35 with LegacyOperatorTestBuilder

use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.

the class TestOutputBatchSize method testProjectVariableWidthImpl.

public void testProjectVariableWidthImpl(boolean transfer, int columnCount, String testString) throws Exception {
    StringBuilder jsonRow = new StringBuilder("{");
    String[] baselineColumns = new String[columnCount];
    Object[] baselineValues = new String[columnCount];
    int exprSize = (transfer ? 2 : 2 * columnCount);
    String[] expr = new String[exprSize];
    // Expr for a 'select *' as expected by parseExprs()
    if (transfer) {
        expr[0] = "`**`";
        expr[1] = "`**`";
    }
    for (int i = 0; i < columnCount; i++) {
        jsonRow.append("\"" + "C" + i + "\": " + "\"" + testString + "\"" + ((i == columnCount - 1) ? "" : ","));
        baselineColumns[i] = "C" + i;
        if (!transfer) {
            expr[i * 2] = "lower(" + baselineColumns[i] + ")";
            expr[i * 2 + 1] = baselineColumns[i];
        }
        baselineValues[i] = (transfer ? testString : StringUtils.lowerCase(testString));
    }
    jsonRow.append("}");
    StringBuilder batchString = new StringBuilder("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append(jsonRow + ((i == numRows - 1) ? "" : ","));
    }
    batchString.append("]");
    List<String> inputJsonBatches = Lists.newArrayList();
    inputJsonBatches.add(batchString.toString());
    List<String> expectedJsonBatches = Lists.newArrayList();
    expectedJsonBatches.add(batchString.toString());
    Project projectConf = new Project(parseExprs(expr), null);
    mockOpContext(projectConf, initReservation, maxAllocation);
    long totalSize = getExpectedSize(expectedJsonBatches);
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    for (int i = 0; i < numRows; i++) {
        opTestBuilder.baselineValues(baselineValues);
    }
    opTestBuilder.go();
}
Also used : Project(org.apache.drill.exec.physical.config.Project) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder)

Aggregations

LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)39 Test (org.junit.Test)36 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)12 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)12 HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)6 FieldReference (org.apache.drill.common.expression.FieldReference)5 FunctionCall (org.apache.drill.common.expression.FunctionCall)5 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)5 NestedLoopJoinPOP (org.apache.drill.exec.physical.config.NestedLoopJoinPOP)5 Project (org.apache.drill.exec.physical.config.Project)5 Text (org.apache.drill.exec.util.Text)5 MergeJoinPOP (org.apache.drill.exec.physical.config.MergeJoinPOP)4 HashAggregate (org.apache.drill.exec.physical.config.HashAggregate)3 UnionAll (org.apache.drill.exec.physical.config.UnionAll)3 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)3 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)3 MinorFragmentEndpoint (org.apache.drill.exec.physical.MinorFragmentEndpoint)2 ExternalSort (org.apache.drill.exec.physical.config.ExternalSort)1