use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testMergeJoinLowerLimit.
@Test
public void testMergeJoinLowerLimit() throws Exception {
// test the lower limit of at least one batch
MergeJoinPOP mergeJoin = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.RIGHT);
mockOpContext(mergeJoin, initReservation, maxAllocation);
numRows = 10;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
// set very low value of output batch size so we can do only one row per batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(mergeJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashAggAvg.
@Test
public void testHashAggAvg() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("avg(b)", "b_avg"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + (3 * i) + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_avg").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, (double) i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashAggSum.
@Test
public void testHashAggSum() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("sum(b)", "b_sum"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + (3 * i) + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_sum").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, (long) 3 * i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenFixedWidthList.
@Test
public void testFlattenFixedWidthList() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString, "c" : [[1,2,3,4], [5,6,7,8]]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[1,2,3,4]," + "[5,6,7,8]" + "]");
batchString.append("},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[1,2,3,4]," + "[5,6,7,8]" + "]");
batchString.append("}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : [1,2,3,4]
// "a" : 5, "b" : wideString, "c" : [5,6,7,8]
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"[1,2,3,4]\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"[5,6,7,8]\"},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"[1,2,3,4]\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"[5,6,7,8]\"}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, new ArrayList<Long>(Arrays.asList(1L, 2L, 3L, 4L)));
opTestBuilder.baselineValues(5l, wideString, new ArrayList<Long>(Arrays.asList(5L, 6L, 7L, 8L)));
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testUnionLowerLimit.
@Test
public void testUnionLowerLimit() throws Exception {
UnionAll unionAll = new UnionAll(Collections.<PhysicalOperator>emptyList());
mockOpContext(unionAll, initReservation, maxAllocation);
// create multiple batches from both sides.
numRows = 10;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
rightBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1
// "a1" : 5, "b1" : wideString, "c1" : 2
// "a1" : 5, "b1" : wideString, "c1" : 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows * 2; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"c1\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"c1\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size very low so we get only one row per batch.
// We should get 22 batches for 22 rows.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(unionAll).baselineColumns("a1", "b1", "c1").expectedNumBatches(// verify number of batches
22).expectedBatchSize(// verify batch size
totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i);
}
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i);
}
opTestBuilder.go();
}
Aggregations