Search in sources :

Example 6 with JsonStringHashMap

use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.

the class TestJsonReader method testUntypedPathWithUnion.

// DRILL-6020
@Test
public void testUntypedPathWithUnion() throws Exception {
    String fileName = "table.json";
    try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(dirTestWatcher.getRootDir(), fileName)))) {
        writer.write("{\"rk\": {\"a\": {\"b\": \"1\"}}}");
        writer.write("{\"rk\": {\"a\": \"2\"}}");
    }
    JsonStringHashMap<String, Text> map = new JsonStringHashMap<>();
    map.put("b", new Text("1"));
    try {
        testBuilder().sqlQuery("select t.rk.a as a from dfs.`%s` t", fileName).ordered().optionSettingQueriesForTestQuery("alter session set `exec.enable_union_type`=true").baselineColumns("a").baselineValues(map).baselineValues("2").go();
    } finally {
        resetSessionOption(ExecConstants.ENABLE_UNION_TYPE_KEY);
    }
}
Also used : FileWriter(java.io.FileWriter) Text(org.apache.drill.exec.util.Text) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) File(java.io.File) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 7 with JsonStringHashMap

use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.

the class TestBuilder method mapOfObject.

/**
 * Convenience method to create an instance of {@link JsonStringHashMap}{@code <Object, Object>} with the given key-value sequence.
 *
 * By default, any {@link String} instance will be wrapped by {@link Text} instance. To disable wrapping pass
 * {@code false} as the first object to key-value sequence.
 *
 * @param keyValueSequence sequence of key-value pairs with optional boolean
 *                         flag which disables wrapping String instances by {@link Text}.
 * @return map consisting of entries given in the key-value sequence.
 */
public static JsonStringHashMap<Object, Object> mapOfObject(Object... keyValueSequence) {
    boolean convertStringToText = true;
    final int startIndex;
    if (keyValueSequence.length % 2 == 1) {
        convertStringToText = (boolean) keyValueSequence[0];
        startIndex = 1;
    } else {
        startIndex = 0;
    }
    final JsonStringHashMap<Object, Object> map = new JsonStringHashMap<>();
    for (int i = startIndex; i < keyValueSequence.length; i += 2) {
        Object key = keyValueSequence[i];
        if (convertStringToText && key instanceof CharSequence) {
            key = new Text(key.toString());
        }
        Object value = keyValueSequence[i + 1];
        if (value instanceof CharSequence) {
            value = new Text(value.toString());
        }
        map.put(key, value);
    }
    return map;
}
Also used : Text(org.apache.drill.exec.util.Text) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap)

Example 8 with JsonStringHashMap

use of org.apache.drill.exec.util.JsonStringHashMap in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenNestedMap.

@Test
public void testFlattenNestedMap() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString,
    // "c" : [ {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries},
    // {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries} ]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    StringBuilder innerMap = new StringBuilder();
    innerMap.append("{ \"trans_id\":\"inner_trans_t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}");
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]");
        batchString.append("},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ",  \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    // "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>();
    innerMapResult.put("trans_id", new Text("inner_trans_t1"));
    innerMapResult.put("amount", new Long(100));
    innerMapResult.put("trans_time", new Long(7777777));
    innerMapResult.put("type", new Text("sports"));
    JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    resultExpected1.put("innerMap", innerMapResult);
    JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    resultExpected2.put("innerMap", innerMapResult);
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, resultExpected1);
        opTestBuilder.baselineValues(5l, wideString, resultExpected2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Example 9 with JsonStringHashMap

use of org.apache.drill.exec.util.JsonStringHashMap in project drill by axbaretto.

the class TestOutputBatchSize method testFlattenMap.

@Test
public void testFlattenMap() throws Exception {
    PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
    mockOpContext(flatten, initReservation, maxAllocation);
    // create input rows like this.
    // "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
    List<String> inputJsonBatches = Lists.newArrayList();
    StringBuilder batchString = new StringBuilder();
    batchString.append("[");
    for (int i = 0; i < numRows; i++) {
        batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
        batchString.append("]},");
    }
    batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777," + " \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
    batchString.append("]}]");
    inputJsonBatches.add(batchString.toString());
    // Figure out what will be approximate total output size out of flatten for input above
    // We will use this sizing information to set output batch size so we can produce desired
    // number of batches that can be verified.
    // output rows will be like this.
    // "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}
    // "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
        expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}},");
    }
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
    expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately get 2 batches and max of 4.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    OperatorTestBuilder opTestBuilder = opTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
    2).expectedBatchSize(// verify batch size.
    totalSize / 2);
    JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
    resultExpected1.put("trans_id", new Text("t1"));
    resultExpected1.put("amount", new Long(100));
    resultExpected1.put("trans_time", new Long(7777777));
    resultExpected1.put("type", new Text("sports"));
    JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
    resultExpected2.put("trans_id", new Text("t2"));
    resultExpected2.put("amount", new Long(1000));
    resultExpected2.put("trans_time", new Long(8888888));
    resultExpected2.put("type", new Text("groceries"));
    for (int i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, resultExpected1);
        opTestBuilder.baselineValues(5l, wideString, resultExpected2);
    }
    opTestBuilder.go();
}
Also used : FlattenPOP(org.apache.drill.exec.physical.config.FlattenPOP) Text(org.apache.drill.exec.util.Text) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) Test(org.junit.Test)

Example 10 with JsonStringHashMap

use of org.apache.drill.exec.util.JsonStringHashMap in project drill by axbaretto.

the class TestFlatten method testFlatten_Drill2162_simple.

@Test
public void testFlatten_Drill2162_simple() throws Exception {
    List<Long> inputList = Lists.newArrayList();
    String jsonRecord = "{ \"int_list\" : [";
    final int listSize = 30;
    for (int i = 1; i < listSize; i++) {
        jsonRecord += i + ", ";
        inputList.add((long) i);
    }
    jsonRecord += listSize + "] }";
    inputList.add((long) listSize);
    int numRecords = 3000;
    new TestConstantFolding.SmallFileCreator(pathDir).setRecord(jsonRecord).createFiles(1, numRecords, "json");
    @SuppressWarnings("unchecked") List<JsonStringHashMap<String, Object>> data = Lists.newArrayList(mapOf("int_list", inputList));
    List<JsonStringHashMap<String, Object>> result = flatten(data, "int_list");
    TestBuilder builder = testBuilder().sqlQuery("select flatten(int_list) as int_list from dfs.`%s/bigfile/bigfile.json`", TEST_DIR).unOrdered().baselineColumns("int_list");
    for (int i = 0; i < numRecords; i++) {
        for (JsonStringHashMap<String, Object> record : result) {
            builder.baselineValues(record.get("int_list"));
        }
    }
    builder.go();
}
Also used : JsonStringHashMap(org.apache.drill.exec.util.JsonStringHashMap) TestBuilder(org.apache.drill.test.TestBuilder) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) UnlikelyTest(org.apache.drill.categories.UnlikelyTest)

Aggregations

JsonStringHashMap (org.apache.drill.exec.util.JsonStringHashMap)19 Text (org.apache.drill.exec.util.Text)14 Test (org.junit.Test)14 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)6 FlattenPOP (org.apache.drill.exec.physical.config.FlattenPOP)6 TestBuilder (org.apache.drill.test.TestBuilder)5 OperatorTest (org.apache.drill.categories.OperatorTest)4 UnlikelyTest (org.apache.drill.categories.UnlikelyTest)4 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)3 BufferedWriter (java.io.BufferedWriter)2 File (java.io.File)2 FileWriter (java.io.FileWriter)2 JsonStringArrayList (org.apache.drill.exec.util.JsonStringArrayList)2 ClusterTest (org.apache.drill.test.ClusterTest)2 Category (org.junit.experimental.categories.Category)2 ArrayList (java.util.ArrayList)1 TestBuilder.mapOfObject (org.apache.drill.test.TestBuilder.mapOfObject)1 Snapshot (org.apache.iceberg.Snapshot)1 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)1