use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.
the class TestOutputBatchSize method testFlattenListOfMaps.
@Test
public void testFlattenListOfMaps() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString,
// "c" : [ [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
// [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}],
// [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}] ]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
batchString.append("]},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ], " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]");
batchString.append("]}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"} ]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "[ { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { " + "\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
final JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
resultExpected1.put("trans_id", new Text("t1"));
resultExpected1.put("amount", new Long(100));
resultExpected1.put("trans_time", new Long(7777777));
resultExpected1.put("type", new Text("sports"));
final JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
resultExpected2.put("trans_id", new Text("t2"));
resultExpected2.put("amount", new Long(1000));
resultExpected2.put("trans_time", new Long(8888888));
resultExpected2.put("type", new Text("groceries"));
final JsonStringArrayList<JsonStringHashMap<String, Object>> results = new JsonStringArrayList<JsonStringHashMap<String, Object>>() {
{
add(resultExpected1);
add(resultExpected2);
}
};
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, results);
opTestBuilder.baselineValues(5l, wideString, results);
opTestBuilder.baselineValues(5l, wideString, results);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.
the class TestOutputBatchSize method testFlattenMap.
@Test
public void testFlattenMap() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
batchString.append("]},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777," + " \"type\":\"sports\"}," + " { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
batchString.append("]}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}
// "a" : 5, "b" : wideString, "c" : {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t1\", \"amount\":100, " + "\"trans_time\":7777777, \"type\":\"sports\"}},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + "{\"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
resultExpected1.put("trans_id", new Text("t1"));
resultExpected1.put("amount", new Long(100));
resultExpected1.put("trans_time", new Long(7777777));
resultExpected1.put("type", new Text("sports"));
JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
resultExpected2.put("trans_id", new Text("t2"));
resultExpected2.put("amount", new Long(1000));
resultExpected2.put("trans_time", new Long(8888888));
resultExpected2.put("type", new Text("groceries"));
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, resultExpected1);
opTestBuilder.baselineValues(5l, wideString, resultExpected2);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.
the class TestOutputBatchSize method testFlattenNestedMap.
@Test
public void testFlattenNestedMap() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString,
// "c" : [ {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries},
// {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries} ]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder innerMap = new StringBuilder();
innerMap.append("{ \"trans_id\":\"inner_trans_t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}");
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]");
batchString.append("},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
batchString.append("]}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
// "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>();
innerMapResult.put("trans_id", new Text("inner_trans_t1"));
innerMapResult.put("amount", new Long(100));
innerMapResult.put("trans_time", new Long(7777777));
innerMapResult.put("type", new Text("sports"));
JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
resultExpected1.put("trans_id", new Text("t1"));
resultExpected1.put("amount", new Long(100));
resultExpected1.put("trans_time", new Long(7777777));
resultExpected1.put("type", new Text("sports"));
resultExpected1.put("innerMap", innerMapResult);
JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
resultExpected2.put("trans_id", new Text("t2"));
resultExpected2.put("amount", new Long(1000));
resultExpected2.put("trans_time", new Long(8888888));
resultExpected2.put("type", new Text("groceries"));
resultExpected2.put("innerMap", innerMapResult);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, resultExpected1);
opTestBuilder.baselineValues(5l, wideString, resultExpected2);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.util.JsonStringHashMap in project drill by apache.
the class IcebergQueriesTest method testSelectSnapshotsMetadata.
@Test
public void testSelectSnapshotsMetadata() throws Exception {
String query = "select * from dfs.tmp.`testAllTypes#snapshots`";
List<Snapshot> snapshots = new ArrayList<>();
table.snapshots().forEach(snapshots::add);
JsonStringHashMap<Object, Object> summaryMap = new JsonStringHashMap<>();
snapshots.get(0).summary().forEach((k, v) -> summaryMap.put(new Text(k.getBytes(StandardCharsets.UTF_8)), new Text(v.getBytes(StandardCharsets.UTF_8))));
JsonStringHashMap<Object, Object> secondSummaryMap = new JsonStringHashMap<>();
snapshots.get(1).summary().forEach((k, v) -> secondSummaryMap.put(new Text(k.getBytes(StandardCharsets.UTF_8)), new Text(v.getBytes(StandardCharsets.UTF_8))));
testBuilder().sqlQuery(query).unOrdered().baselineColumns("committed_at", "snapshot_id", "parent_id", "operation", "manifest_list", "summary").baselineValues(LocalDateTime.ofInstant(Instant.ofEpochMilli(snapshots.get(0).timestampMillis()), ZoneId.of("UTC")), snapshots.get(0).snapshotId(), snapshots.get(0).parentId(), snapshots.get(0).operation(), snapshots.get(0).manifestListLocation(), summaryMap).baselineValues(LocalDateTime.ofInstant(Instant.ofEpochMilli(snapshots.get(1).timestampMillis()), ZoneId.of("UTC")), snapshots.get(1).snapshotId(), snapshots.get(1).parentId(), snapshots.get(1).operation(), snapshots.get(1).manifestListLocation(), secondSummaryMap).go();
}
Aggregations