use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testProjectVariableWidthFunctions.
@Test
public void testProjectVariableWidthFunctions() throws Exception {
// size calculators
StringBuilder batchString = new StringBuilder("[");
String strValue = "abcde";
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\" : " + "\"" + strValue + "\"");
batchString.append(i != numRows - 1 ? "}," : "}]");
}
List<String> inputJsonBatches = Lists.newArrayList();
inputJsonBatches.add(batchString.toString());
// inputSize, as calculated below will be numRows * (inputRowsize),
// inputRowSize = metadata cols + sizeof("abcde"), numRows = 4000
// So, inputSize = 4000 * ( 4 + 1 + 5 ) = 40000
// inputSize is used as the batch memory limit for the tests.
// Depending on the function being evaluated, different output batch counts will be expected
long inputSize = getExpectedSize(inputJsonBatches);
String inputSizeStr = inputSize + "";
String[][] functions = { // concat() o/p size will be 2 x input size, so at least 2 batches expected
{ "concat", strValue + strValue, "concat(a,a)", inputSizeStr, 2 + "" }, // upper() o/p size will same as input size, so at least 1 batch is expected
{ "upper", strValue.toUpperCase(), "upper(a)", inputSizeStr, 1 + "" }, // so at least 5 batches are expected
{ "repeat", strValue + strValue, "repeatstr(a, 2)", inputSizeStr, 5 + "" }, // so at least 1 batch is expected
{ "substr", strValue.substring(0, 4), "substr(a, 1, 4)", inputSizeStr, 1 + "" } };
for (String[] fn : functions) {
String outputColumnName = fn[0] + "_result";
String operationResult = fn[1];
String exprStr = fn[2];
long memoryLimit = Long.valueOf(fn[3]);
int expectedNumBatches = Integer.valueOf(fn[4]);
StringBuilder expectedString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
expectedString.append("{\"" + outputColumnName + "\":" + operationResult);
expectedString.append(i != numRows - 1 ? "}," : "}]");
}
List<String> expectedJsonBatches = Lists.newArrayList();
expectedJsonBatches.add(expectedString.toString());
String[] baselineColumns = new String[1];
baselineColumns[0] = outputColumnName;
String[] expr = { exprStr, baselineColumns[0] };
Project projectConf = new Project(parseExprs(expr), null);
mockOpContext(projectConf, initReservation, maxAllocation);
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", memoryLimit);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
expectedNumBatches).expectedBatchSize(// verify batch size.
memoryLimit);
// operation(a, a)
String[] baseLineValues = { operationResult };
for (int i = 0; i < numRows; i++) {
opTestBuilder.baselineValues(baseLineValues);
}
opTestBuilder.go();
}
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashJoinMultipleOutputBatches.
@Test
public void testHashJoinMultipleOutputBatches() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashJoinUpperLimit.
@Test
public void testHashJoinUpperLimit() throws Exception {
// test the upper limit of 65535 records per batch.
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 100000;
// create left input rows like this.
// "a1" : 5, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "c1" : 1, "a2":6, "c2": 1
// "a1" : 5, "c1" : 2, "a2":6, "c2": 2
// "a1" : 5, "c1" : 3, "a2":6, "c2": 3
// expect two batches, batch limited by 65535 records
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "c1", "a2", "c2").expectedNumBatches(// verify number of batches
2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, i, 6l, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashJoinLowerLimit.
@Test
public void testHashJoinLowerLimit() throws Exception {
// test the lower limit of at least one batch
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 10;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
// set very low value of output batch size so we can do only one row per batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenEmptyList.
@Test
public void testFlattenEmptyList() throws Exception {
final PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("b"));
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder flattenElement = new StringBuilder();
flattenElement.append("[");
flattenElement.append("]");
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + flattenElement + "},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + flattenElement + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b").expectZeroRows();
opTestBuilder.go();
}
Aggregations