use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenLargeRecords.
@Test
public void testFlattenLargeRecords() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : <id1>, "b" : wideString, "c" : [ 10 wideStrings ]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
int arrayLength = 10;
StringBuilder test = new StringBuilder();
test.append("[ \"");
for (int i = 0; i < arrayLength; i++) {
test.append(wideString);
test.append("\",\"");
}
test.append(wideString);
test.append("\"]");
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{" + "\"a\" :" + (new StringBuilder().append(i)) + ",\"b\": \"" + wideString + "\"," + "\"c\": " + test + "},");
}
batchString.append("{" + "\"a\" :" + (new StringBuilder().append(numRows)) + ",\"b\": \"" + wideString + "\"," + "\"c\": " + test + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// output rows will be like this.
// "a" : <id1>, "b" : wideString, "c" : wideString
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int k = 0; k < (numRows) * 11; k++) {
expectedBatchString.append("{" + "\"a\" :" + (new StringBuilder().append(k)) + ",\"b\": \"" + wideString + "\",");
expectedBatchString.append("\"c\": \"" + wideString + "\"},");
}
expectedBatchString.append("{" + "\"a\" :" + (new StringBuilder().append(numRows)) + ",\"b\": \"" + wideString + "\",");
expectedBatchString.append("\"c\": \"" + wideString + "\"}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
for (long k = 0; k < ((numRows + 1)); k++) {
for (int j = 0; j < arrayLength + 1; j++) {
opTestBuilder.baselineValues(k, wideString, wideString);
}
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testLeftOuterHashJoin.
@Test
public void testLeftOuterHashJoin() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.LEFT);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testHashJoinSingleOutputBatch.
@Test
public void testHashJoinSingleOutputBatch() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
mockOpContext(hashJoin, initReservation, maxAllocation);
// create multiple batches from both sides.
numRows = 4096 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to twice of total size expected.
// We should get 1 batch.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize * 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
1).expectedBatchSize(// verify batch size
totalSize).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testFlattenVariableWidth.
@Test
public void testFlattenVariableWidth() throws Exception {
PhysicalOperator flatten = new FlattenPOP(null, SchemaPath.getSimplePath("c"));
mockOpContext(flatten, initReservation, maxAllocation);
// create input rows like this.
// "a" : 5, "b" : wideString, "c" : ["parrot", "hummingbird", "owl", "woodpecker", "peacock"]
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\",\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\", \"peacock\"]},");
}
batchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\",\"c\" : [\"parrot\", \"hummingbird\", \"owl\", \"woodpecker\", \"peacock\"]}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of flatten for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 5, "b" : wideString, "c" : parrot
// "a" : 5, "b" : wideString, "c" : hummingbird
// "a" : 5, "b" : wideString, "c" : owl
// "a" : 5, "b" : wideString, "c" : woodpecker
// "a" : 5, "b" : wideString, "c" : peacock
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"parrot\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"hummingbird\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"owl\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"woodpecker\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"peacock\"},");
}
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"parrot\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"hummingbird\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"owl\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"woodpecker\"},");
expectedBatchString.append("{\"a\": 5, " + "\"b\" : " + "\"" + wideString + "\"," + "\"c\" : \"peacock\"}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(flatten).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b", "c").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, "parrot");
opTestBuilder.baselineValues(5l, wideString, "hummingbird");
opTestBuilder.baselineValues(5l, wideString, "owl");
opTestBuilder.baselineValues(5l, wideString, "woodpecker");
opTestBuilder.baselineValues(5l, wideString, "peacock");
}
opTestBuilder.go();
}
use of org.apache.drill.test.LegacyOperatorTestBuilder in project drill by apache.
the class TestOutputBatchSize method testRightOuterHashJoin.
@Test
public void testRightOuterHashJoin() throws Exception {
HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.RIGHT);
mockOpContext(hashJoin, initReservation, maxAllocation);
numRows = 4000 * 2;
// create left input rows like this.
// "a1" : 5, "b1" : wideString, "c1" : <id>
List<String> leftJsonBatches = Lists.newArrayList();
StringBuilder leftBatchString = new StringBuilder();
leftBatchString.append("[");
for (int i = 0; i < numRows; i++) {
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
}
leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
leftBatchString.append("]");
leftJsonBatches.add(leftBatchString.toString());
// create right input rows like this.
// "a2" : 6, "b2" : wideString, "c2" : <id>
List<String> rightJsonBatches = Lists.newArrayList();
StringBuilder rightBatchString = new StringBuilder();
rightBatchString.append("[");
for (int i = 0; i < numRows; i++) {
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
rightBatchString.append("]");
rightJsonBatches.add(rightBatchString.toString());
// output rows will be like this.
// "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
// "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
// "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
}
expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately 4 batches because of fragmentation factor of 2 accounted for in merge join.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size
totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
for (long i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
}
opTestBuilder.go();
}
Aggregations