use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testMergeJoinUpperLimit.
@Test
public void testMergeJoinUpperLimit() throws Exception {
// test the upper limit of 65535 records per batch.
MergeJoinPOP mergeJoin = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.LEFT);
mockOpContext(mergeJoin, initReservation, maxAllocation);
numRows = 100000;
// create left input rows like this.
// "a1" : 5, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "c1" : 1, "a2":6, "c2": 1
// "a1" : 5, "c1" : 2, "a2":6, "c2": 2
// "a1" : 5, "c1" : 3, "a2":6, "c2": 3
// expect two batches, batch limited by 65535 records
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(mergeJoin).baselineColumns("a1", "c1", "a2", "c2").expectedNumBatches(// verify number of batches
2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, i, 6l, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testProjectVariableWidthMixed.
/**
* Test expression with transfer and new columns
* @throws Exception
*/
@Test
public void testProjectVariableWidthMixed() throws Exception {
String testString = "ABCDEFGHIJ";
StringBuilder jsonRow = new StringBuilder("{");
// 50 new columns and 1 transfer
final int colCount = 50 + 1;
String[] baselineColumns = new String[colCount];
Object[] baselineValues = new String[colCount];
int exprSize = 2 * colCount;
String[] expr = new String[exprSize];
// columns C1 ... C50
for (int i = 1; i < colCount; i++) {
jsonRow.append("\"" + "C" + i + "\": " + "\"" + testString + "\"" + ((i == colCount - 1) ? "" : ","));
baselineColumns[i] = "C" + i;
// New columns lower(C1) as C1, ... lower(C50) as C50
expr[i * 2] = "lower(" + baselineColumns[i] + ")";
expr[i * 2 + 1] = baselineColumns[i];
baselineValues[i] = StringUtils.lowerCase(testString);
}
// Transfer: C1 as COL1TR
expr[0] = "C1";
expr[1] = "COL1TR";
baselineColumns[0] = "COL1TR";
baselineValues[0] = testString;
String expectedJsonRow = jsonRow.toString() + ", \"COL1TR\": \"" + testString + "\"}";
jsonRow.append("}");
StringBuilder batchString = new StringBuilder("[");
StringBuilder expectedString = new StringBuilder("[");
for (int i = 0; i < numRows; i++) {
batchString.append(jsonRow + ((i == numRows - 1) ? "" : ","));
expectedString.append(expectedJsonRow + ((i == numRows - 1) ? "" : ","));
}
batchString.append("]");
expectedString.append("]");
List<String> inputJsonBatches = Lists.newArrayList();
inputJsonBatches.add(batchString.toString());
List<String> expectedJsonBatches = Lists.newArrayList();
expectedJsonBatches.add(expectedString.toString());
Project projectConf = new Project(parseExprs(expr), null);
mockOpContext(projectConf, initReservation, maxAllocation);
long totalSize = getExpectedSize(expectedJsonBatches);
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(projectConf).inputDataStreamJson(inputJsonBatches).baselineColumns(baselineColumns).expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows; i++) {
opTestBuilder.baselineValues(baselineValues);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenVariableWidthList.
@Test
public void testFlattenVariableWidthList() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
// create input rows like this.
// "a" : 5, "b" : wideString, "c" : [["parrot", "hummingbird", "owl", "woodpecker"], ["hawk", "nightingale", "swallow", "peacock"]]
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]," + "[\"hawk\",\"nightingale\",\"swallow\",\"peacock\"]" + "]");
batchString.append("},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [" + "[\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]," + "[\"hawk\",\"nightingale\",\"swallow\",\"peacock\"]" + "]");
batchString.append("}]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : ["parrot", "hummingbird", "owl", "woodpecker"]
// "a" : 5, "b" : wideString, "c" : ["hawk", "nightingale", "swallow", "peacock"]
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"hawk\", \"nightingale\", \"swallow\", \"peacock\"]},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\"]},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : [\"hawk\", \"nightingale\", \"swallow\", \"peacock\"]}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize);
final JsonStringArrayList<Text> birds1 = new JsonStringArrayList<Text>() {
{
add(new Text("parrot"));
add(new Text("hummingbird"));
add(new Text("owl"));
add(new Text("woodpecker"));
}
};
final JsonStringArrayList<Text> birds2 = new JsonStringArrayList<Text>() {
{
add(new Text("hawk"));
add(new Text("nightingale"));
add(new Text("swallow"));
add(new Text("peacock"));
}
};
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, birds1);
opTestBuilder.baselineValues(5l, wideString, birds2);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testMergeJoinSingleOutputBatch.
@Test
public void testMergeJoinSingleOutputBatch() throws Exception {
MergeJoinPOP mergeJoin = new MergeJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(mergeJoin, initReservation, maxAllocation);
// create multiple batches from both sides.
numRows = 4096 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to twice of total size expected.
// We should get 1 batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(mergeJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
1).expectedBatchSize(// verify batch size
totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
Aggregations