use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testProjectMap.
@Test
public void testProjectMap() throws Exception {
// create input rows like this.
// "a" : 5, "b" : wideString, "c" : [{"trans_id":"t1", amount:100, trans_time:7777777, type:sports},
// {"trans_id":"t1", amount:100, trans_time:8888888, type:groceries}]
StringBuilder batchString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + "abc" + "\"," + " \"c\" : { \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}," + " \"d\": { \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
batchString.append(i != numRows - 1 ? "}," : "}]");
}
List<String> inputJsonBatches = Lists.newArrayList();
inputJsonBatches.add(batchString.toString());
StringBuilder expectedString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
expectedString.append("{\"aplusamount\": 105");
expectedString.append(i != numRows - 1 ? "}," : "}]");
}
List<String> expectedJsonBatches = Lists.newArrayList();
expectedJsonBatches.add(expectedString.toString());
String[] baselineColumns = new String[1];
baselineColumns[0] = "aplusamount";
String[] expr = { "a + c.amount ", baselineColumns[0] };
Project projectConf = new Project(parseExprs(expr), null);
mockOpContext(projectConf, initReservation, maxAllocation);
long totalSize = getExpectedSize(expectedJsonBatches);
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
// a + c.amount
Long[] baseLineValues = { (5l + 100l) };
for (int i = 0; i < numRows; i++) {
opTestBuilder.baselineValues(baseLineValues);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testProjectFixedWidthImpl.
/**
* Tests BatchSizing of fixed-width transfers and new column creations in Project.
* Transfer: Evaluates 'select *'
* New Columns: Evalutes 'select C0 + 5 as C0 ... C[columnCount] + 5 as C[columnCount]
* @param transfer
* @throws Exception
*/
public void testProjectFixedWidthImpl(boolean transfer, int columnCount) throws Exception {
// generate a row with N columns C0..C[columnCount], value in a column is same as column id
StringBuilder jsonRow = new StringBuilder("{");
String[] baselineColumns = new String[columnCount];
Object[] baselineValues = new Long[columnCount];
int exprSize = (transfer ? 2 : 2 * columnCount);
String[] expr = new String[exprSize];
// Expr for a 'select *' as expected by parseExprs()
if (transfer) {
expr[0] = "`**`";
expr[1] = "`**`";
}
for (int i = 0; i < columnCount; i++) {
jsonRow.append("\"" + "C" + i + "\": " + i + ((i == columnCount - 1) ? "" : ","));
baselineColumns[i] = "C" + i;
if (!transfer) {
expr[i * 2] = baselineColumns[i] + " + 5";
expr[i * 2 + 1] = baselineColumns[i];
}
baselineValues[i] = (long) (transfer ? i : i + 5);
}
jsonRow.append("}");
StringBuilder batchString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
batchString.append(jsonRow + ((i == numRows - 1) ? "" : ","));
}
batchString.append("]");
List<String> inputJsonBatches = Lists.newArrayList();
inputJsonBatches.add(batchString.toString());
List<String> expectedJsonBatches = Lists.newArrayList();
expectedJsonBatches.add(batchString.toString());
Project projectConf = new Project(parseExprs(expr), null);
mockOpContext(projectConf, initReservation, maxAllocation);
long totalSize = getExpectedSize(expectedJsonBatches);
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows; i++) {
opTestBuilder.baselineValues(baselineValues);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashAggMax.
@Test
public void testHashAggMax() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("max(b)", "b_max"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : "a"
// "a" : 2, "b" : "aa"
// "a" : 3, "b" : "aaa"
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + "\"a\"" + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + "\"aa\"" + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"a\"" + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aa\"" + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : "aaa"
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_max").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, "aaa");
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenNestedMap.
@Test
public void testFlattenNestedMap() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString,
// "c" : [ {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries},
// {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries} ]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder innerMap = new StringBuilder();
innerMap.append("{ \"trans_id\":\"inner_trans_t1\", \"amount\":100, \"trans_time\":7777777, \"type\":\"sports\"}");
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}]");
batchString.append("},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"}," + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t2\", \"amount\":1000, \"trans_time\":8888888, \"type\":\"groceries\"}");
batchString.append("]}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
// "a" : 5, "b" : wideString, "c" : {innerMap: {"trans_id":"t1", amount:100, trans_time:7777777, type:sports}, "trans_id":"t1", amount:100, trans_time:8888888, type:groceries}
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }, ");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : " + " { \"innerMap\": " + innerMap + ", \"trans_id\":\"t1\", \"amount\":100, \"trans_time\":7777777, " + "\"type\":\"sports\"} }");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
JsonStringHashMap<String, Object> innerMapResult = new JsonStringHashMap<>();
innerMapResult.put("trans_id", new Text("inner_trans_t1"));
innerMapResult.put("amount", new Long(100));
innerMapResult.put("trans_time", new Long(7777777));
innerMapResult.put("type", new Text("sports"));
JsonStringHashMap<String, Object> resultExpected1 = new JsonStringHashMap<>();
resultExpected1.put("trans_id", new Text("t1"));
resultExpected1.put("amount", new Long(100));
resultExpected1.put("trans_time", new Long(7777777));
resultExpected1.put("type", new Text("sports"));
resultExpected1.put("innerMap", innerMapResult);
JsonStringHashMap<String, Object> resultExpected2 = new JsonStringHashMap<>();
resultExpected2.put("trans_id", new Text("t2"));
resultExpected2.put("amount", new Long(1000));
resultExpected2.put("trans_time", new Long(8888888));
resultExpected2.put("type", new Text("groceries"));
resultExpected2.put("innerMap", innerMapResult);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, resultExpected1);
opTestBuilder.baselineValues(5l, wideString, resultExpected2);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testProjectVariableWidthImpl.
public void testProjectVariableWidthImpl(boolean transfer, int columnCount, String testString) throws Exception {
StringBuilder jsonRow = new StringBuilder("{");
String[] baselineColumns = new String[columnCount];
Object[] baselineValues = new String[columnCount];
int exprSize = (transfer ? 2 : 2 * columnCount);
String[] expr = new String[exprSize];
// Expr for a 'select *' as expected by parseExprs()
if (transfer) {
expr[0] = "`**`";
expr[1] = "`**`";
}
for (int i = 0; i < columnCount; i++) {
jsonRow.append("\"" + "C" + i + "\": " + "\"" + testString + "\"" + ((i == columnCount - 1) ? "" : ","));
baselineColumns[i] = "C" + i;
if (!transfer) {
expr[i * 2] = "lower(" + baselineColumns[i] + ")";
expr[i * 2 + 1] = baselineColumns[i];
}
baselineValues[i] = (transfer ? testString : StringUtils.lowerCase(testString));
}
jsonRow.append("}");
StringBuilder batchString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
batchString.append(jsonRow + ((i == numRows - 1) ? "" : ","));
}
batchString.append("]");
List<String> inputJsonBatches = Lists.newArrayList();
inputJsonBatches.add(batchString.toString());
List<String> expectedJsonBatches = Lists.newArrayList();
expectedJsonBatches.add(batchString.toString());
Project projectConf = new Project(parseExprs(expr), null);
mockOpContext(projectConf, initReservation, maxAllocation);
long totalSize = getExpectedSize(expectedJsonBatches);
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows; i++) {
opTestBuilder.baselineValues(baselineValues);
}
opTestBuilder.go();
}
Aggregations