use of org.apache.drill.exec.physical.config.HashAggregate in project drill by axbaretto.
the class HashAggPrel method getPhysicalOperator.
@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
Prel child = (Prel) this.getInput();
HashAggregate g = new HashAggregate(child.getPhysicalOperator(creator), operPhase, keys, aggExprs, 1.0f);
return creator.addMetadata(this, g);
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by axbaretto.
the class BasicPhysicalOpUnitTest method testSimpleHashAgg.
@Test
public void testSimpleHashAgg() {
HashAggregate aggConf = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("sum(b)", "b_sum"), 1.0f);
List<String> inputJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 1 }]", "[{\"a\": 5, \"b\" : 5},{\"a\": 3, \"b\" : 8}]");
opTestBuilder().physicalOperator(aggConf).inputDataStreamJson(inputJsonBatches).baselineColumns("b_sum", "a").baselineValues(6l, 5l).baselineValues(8l, 3l).go();
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class TestOutputBatchSize method testSimpleHashAgg.
@Test
public void testSimpleHashAgg() {
HashAggregate aggConf = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("sum(b)", "b_sum"), 1.0f);
List<String> inputJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 1 }]", "[{\"a\": 5, \"b\" : 5},{\"a\": 3, \"b\" : 8}]");
legacyOpTestBuilder().physicalOperator(aggConf).inputDataStreamJson(inputJsonBatches).baselineColumns("b_sum", "a").baselineValues(6l, 5l).baselineValues(8l, 3l).go();
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class TestOutputBatchSize method testHashAggAvg.
@Test
public void testHashAggAvg() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("avg(b)", "b_avg"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + (3 * i) + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_avg").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, (double) i);
}
opTestBuilder.go();
}
use of org.apache.drill.exec.physical.config.HashAggregate in project drill by apache.
the class TestOutputBatchSize method testHashAggSum.
@Test
public void testHashAggSum() throws ExecutionSetupException {
HashAggregate hashAgg = new HashAggregate(null, AggPrelBase.OperatorPhase.PHASE_1of1, parseExprs("a", "a"), parseExprs("sum(b)", "b_sum"), 1.0f);
// create input rows like this.
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
// "a" : 1, "b" : 1
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
batchString.append("{\"a\": " + i + ", \"b\": " + i + "},");
}
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "},");
batchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Figure out what will be approximate total output size out of hash agg for input above
// We will use this sizing information to set output batch size so we can produce desired
// number of batches that can be verified.
// output rows will be like this.
// "a" : 1, "b" : 3
List<String> expectedJsonBatches = Lists.newArrayList();
StringBuilder expectedBatchString = new StringBuilder();
expectedBatchString.append("[");
for (int i = 0; i < numRows; i++) {
expectedBatchString.append("{\"a\": " + i + ", \"b\": " + (3 * i) + "},");
}
expectedBatchString.append("{\"a\": " + numRows + ", \"b\": " + numRows + "}");
expectedBatchString.append("]");
expectedJsonBatches.add(expectedBatchString.toString());
long totalSize = getExpectedSize(expectedJsonBatches);
// set the output batch size to 1/2 of total size expected.
// We will get approximately get 2 batches and max of 4.
fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashAgg).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b_sum").expectedNumBatches(// verify number of batches
4).expectedBatchSize(// verify batch size.
totalSize / 2);
for (int i = 0; i < numRows + 1; i++) {
opTestBuilder.baselineValues((long) i, (long) 3 * i);
}
opTestBuilder.go();
}
Aggregations