use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class TestHashAggEmitOutcome method testHashAggrEmit.
// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestHashAggEmitOutcome.class);
/**
* A generic method to execute a Hash-Aggr emit test, based on the given parameters.
* Can take at most two generic input batches, and verify against at most two non-empty output
* batches (and unlimited number of input/output empty batches may be used)
*
* This interface is a little ugly, because Java does not support simple initializations
* other than for arrays (e.g., no "structs" like in c++)
*
* Input batch 1 is build in (see BaseTestOpBatchEmitOutcome.java)
* @param inp2_1 - Input batch 2, first col (use null if not needed)
* @param inp2_2 - input batch 2, second col
* @param inp2_3 - Input batch 2, third col
* @param inp3_1 - Input batch 3, first col (use null if not needed)
* @param inp3_2 - input batch 3, second col
* @param inp3_3 - input batch 3, third col
* @param exp1_1 - First expected batch, col 1
* @param exp1_2 - First expected batch, col 2
* @param exp2_1 - Second expected batch, col 1
* @param exp2_2 - Second expected batch, col 2
* @param inpRowSet - Which input batches to use (the empty, i.e. 0, can be used multiple times)
* @param inpOutcomes - Which input IterOutcomes to mark each input batch
* @param outputRowCounts - expected number of rows, in each output batch
* @param outputOutcomes - the expected output outcomes
*/
private // first input batch
void testHashAggrEmit(// first input batch
int[] inp2_1, // first input batch
int[] inp2_2, // first input batch
String[] inp2_3, // second input batch
int[] inp3_1, // second input batch
int[] inp3_2, // second input batch
String[] inp3_3, // first expected
String[] exp1_1, // first expected
int[] exp1_2, // second expected
String[] exp2_1, // second expected
int[] exp2_2, // input batches + outcomes
int[] inpRowSet, // input batches + outcomes
RecordBatch.IterOutcome[] inpOutcomes, // output row counts per each out batch
List<Integer> outputRowCounts, // output outcomes
List<RecordBatch.IterOutcome> outputOutcomes) {
// First input batch
RowSetBuilder builder2 = operatorFixture.rowSetBuilder(inputSchema);
if (inp2_1 != null) {
for (int i = 0; i < inp2_1.length; i++) {
builder2 = builder2.addRow(inp2_1[i], inp2_2[i], inp2_3[i]);
}
}
final RowSet.SingleRowSet nonEmptyInputRowSet2 = builder2.build();
// Second input batch
RowSetBuilder builder3 = operatorFixture.rowSetBuilder(inputSchema);
if (inp3_1 != null) {
for (int i = 0; i < inp3_1.length; i++) {
builder3 = builder3.addRow(inp3_1[i], inp3_2[i], inp3_3[i]);
}
}
final RowSet.SingleRowSet nonEmptyInputRowSet3 = builder3.build();
final TupleMetadata resultSchema = new SchemaBuilder().add("name", TypeProtos.MinorType.VARCHAR).addNullable("total_sum", TypeProtos.MinorType.BIGINT).buildSchema();
// First expected batch
RowSetBuilder expectedBuilder1 = operatorFixture.rowSetBuilder(resultSchema);
if (exp1_1 != null) {
for (int i = 0; i < exp1_1.length; i++) {
expectedBuilder1 = expectedBuilder1.addRow(exp1_1[i], (long) exp1_2[i]);
}
}
final RowSet.SingleRowSet expectedRowSet1 = expectedBuilder1.build();
// Second expected batch
RowSetBuilder expectedBuilder2 = operatorFixture.rowSetBuilder(resultSchema);
if (exp2_1 != null) {
for (int i = 0; i < exp2_1.length; i++) {
expectedBuilder2 = expectedBuilder2.addRow(exp2_1[i], (long) exp2_2[i]);
}
}
final RowSet.SingleRowSet expectedRowSet2 = expectedBuilder2.build();
// Add the input batches, in the order/type given
for (int inp : inpRowSet) {
switch(inp) {
case 0:
inputContainer.add(emptyInputRowSet.container());
break;
case 1:
inputContainer.add(nonEmptyInputRowSet.container());
break;
case 2:
inputContainer.add(nonEmptyInputRowSet2.container());
break;
case 3:
inputContainer.add(nonEmptyInputRowSet3.container());
break;
default:
fail();
}
}
// build the outcomes
inputOutcomes.addAll(Arrays.asList(inpOutcomes));
//
// Build the Hash Agg Batch operator
//
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
final HashAggregate hashAggrConfig = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("name_left", "name"), parseExprs("sum(id_left+cost_left)", "total_sum"), 1.0f);
final HashAggBatch haBatch = new HashAggBatch(hashAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
//
// Iterate thru the next batches, and verify expected outcomes
//
assertEquals(outputRowCounts.size(), outputOutcomes.size());
boolean firstOne = true;
for (int ind = 0; ind < outputOutcomes.size(); ind++) {
RecordBatch.IterOutcome expOut = outputOutcomes.get(ind);
assertSame(expOut, haBatch.next());
if (expOut == NONE) {
break;
}
// done
RowSet actualRowSet = DirectRowSet.fromContainer(haBatch.getContainer());
int expectedSize = outputRowCounts.get(ind);
// System.out.println(expectedSize);
if (0 == expectedSize) {
assertEquals(expectedSize, haBatch.getRecordCount());
} else if (firstOne) {
firstOne = false;
new RowSetComparison(expectedRowSet1).verify(actualRowSet);
} else {
new RowSetComparison(expectedRowSet2).verify(actualRowSet);
}
}
// Release memory for row sets
nonEmptyInputRowSet2.clear();
nonEmptyInputRowSet3.clear();
expectedRowSet2.clear();
expectedRowSet1.clear();
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class TestOutputBatchSize method testHashAggMax.
@Test
public void testHashAggMax() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("max(b)", "b_max"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : "a"
// "a" : 2, "b" : "aa"
// "a" : 3, "b" : "aaa"
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + "\"a\"" + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + "\"aa\"" + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"a\"" + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aa\"" + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : "aaa"
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + "\"aaa\"" + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + "\"aaa\"" + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_max").expectedNumBatches(// verify number of batches
2).expectedBatchSize(// verify batch size.
totalSize);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, "aaa");
}
opTestBuilder.go();
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class HashAggPrel method getPhysicalOperator.
@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
Prel child = (Prel) this.getInput();
HashAggregate g = new HashAggregate(child.getPhysicalOperator(creator), operPhase, keys, aggExprs, 1.0f);
return creator.addMetadata(this, g);
}
Aggregations