Search in sources :

Example 16 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenVariableWidthList.

@Test
public void testFlattenVariableWidthList() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    // create input rows like this.
    // "a" : 5, "b" : wideString, "c" : [["parrot", "hummingbird", "owl", "woodpecker"], ["hawk", "nightingale", "swallow", "peacock"]]
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]," + "[\"hawk\",\"nightingale\",\"swallow\",\"peacock\"]" + "]");
        batchString.append("},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]," + "[\"hawk\",\"nightingale\",\"swallow\",\"peacock\"]" + "]");
    batchString.append("}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : ["parrot", "hummingbird", "owl", "woodpecker"]
    // "a" : 5, "b" : wideString, "c" : ["hawk", "nightingale", "swallow", "peacock"]
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"hawk\", \"nightingale\", \"swallow\", \"peacock\"]},");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"hawk\", \"nightingale\", \"swallow\", \"peacock\"]}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize);
    final JsonStringArrayList<Text> birds1 = new JsonStringArrayList<Text>() {

        {
            add(new Text("parrot"));
            add(new Text("hummingbird"));
            add(new Text("owl"));
            add(new Text("woodpecker"));
        }
    };
    final JsonStringArrayList<Text> birds2 = new JsonStringArrayList<Text>() {

        {
            add(new Text("hawk"));
            add(new Text("nightingale"));
            add(new Text("swallow"));
            add(new Text("peacock"));
        }
    };
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, birds1);
        opTestBuilder.baselineValues(5l, wideString, birds2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringArrayList(org.apache.drill.exec.util.JsonStringArrayList) Text(org.apache.drill.exec.util.Text) Test(org.junit.Test)

Example 17 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenNestedMap.

@Test
public void testFlattenNestedMap() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString,
    // "c" : [ {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries},
    // {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries} ]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder innerMap = new StringBuilder();
    innerMap.append("{ \"trans_id\":\"inner_trans_t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}");
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]");
        batchString.append("},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ",  \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>();
    innerMapResult.put("trans_id", new Text("inner_trans_t1"));
    innerMapResult.put("amount", new Long(100));
    innerMapResult.put("trans_time", new Long(7777777));
    innerMapResult.put("type", new Text("sports"));
    JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    resultExpected1.put("innerMap", innerMapResult);
    JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    resultExpected2.put("innerMap", innerMapResult);
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, resultExpected1);
        opTestBuilder.baselineValues(5l, wideString, resultExpected2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Example 18 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenMap.

@Test
public void testFlattenMap() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
        batchString.append("]},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777," + " \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}
    // "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}},");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, resultExpected1);
        opTestBuilder.baselineValues(5l, wideString, resultExpected2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Example 19 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class TestDirectoryExplorerUDFs method testConstExprFolding_maxDir0.

@Test
public void testConstExprFolding_maxDir0() throws Exception {
    test("use dfs.root");
    List<String> allFiles = ImmutableList.<String>builder().add("smallfile").add("SMALLFILE_2").add("bigfile").add("BIGFILE_2").build();
    String query = "select * from dfs.`%s/*/*.csv` where dir0 = %s('dfs.root','%s')";
    for (ConstantFoldingTestConfig config : tests) {
        // make all of the other folders unexpected patterns, except for the one expected in this case
        List<String> excludedPatterns = Lists.newArrayList();
        excludedPatterns.addAll(allFiles);
        excludedPatterns.remove(config.expectedFolderName);
        // The list is easier to construct programmatically, but the API below takes an array to make it easier
        // to write a list as a literal array in a typical test definition
        String[] excludedArray = new String[excludedPatterns.size()];
        testPlanMatchingPatterns(String.format(query, path, config.funcName, path), new String[] { config.expectedFolderName }, excludedPatterns.toArray(excludedArray));
    }
    JsonStringArrayList<Text> list = new JsonStringArrayList<>();
    list.add(new Text("1"));
    list.add(new Text("2"));
    list.add(new Text("3"));
    testBuilder().sqlQuery(query, path, tests.get(0).funcName, path).unOrdered().baselineColumns("columns", "dir0").baselineValues(list, tests.get(0).expectedFolderName).go();
}
Also used : JsonStringArrayList(org.apache.drill.exec.util.JsonStringArrayList) Text(org.apache.drill.exec.util.Text) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Test(org.junit.Test) PlannerTest(org.apache.drill.categories.PlannerTest) SqlTest(org.apache.drill.categories.SqlTest)

Example 20 with Text

use of org.apache.drill.exec.util.Text in project drill by axbaretto.

the class DrillTestWrapper method addToMaterializedResults.

public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords, List<QueryDataBatch> records, RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException {
    long totalRecords = 0;
    QueryDataBatch batch;
    int size = records.size();
    for (int i = 0; i < size; i++) {
        batch = records.get(0);
        loader.load(batch.getHeader().getDef(), batch.getData());
        // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
        // SchemaChangeException, so check/clean throws clause above.
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (int j = 0; j < loader.getRecordCount(); j++) {
            Map<String, Object> record = new TreeMap<>();
            for (VectorWrapper<?> w : loader) {
                Object obj = w.getValueVector().getAccessor().getObject(j);
                if (obj != null) {
                    if (obj instanceof Text) {
                        obj = obj.toString();
                    }
                    record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
                }
                record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
            }
            materializedRecords.add(record);
        }
        records.remove(0);
        batch.release();
        loader.clear();
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap)

Aggregations

Text (org.apache.drill.exec.util.Text)36 Test (org.junit.Test)22 JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)14 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)10 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)10 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)8 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)6 TreeMap (java.util.TreeMap)5 UnlikelyTest (org.apache.drill.categories.UnlikelyTest)5 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)5 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)5 HashMap (java.util.HashMap)4 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 BatchSchema (org.apache.drill.exec.record.BatchSchema)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 SelectionVector2 (org.apache.drill.exec.record.selection.SelectionVector2)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 BufferedWriter (java.io.BufferedWriter)3