use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testAggregateDouble.
public void testAggregateDouble(String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
@SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "double" }, values);
testAggregateDoubleIterable(aggregateName, fdr, expected);
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testRollupAggregation.
@Test
public void testRollupAggregation() throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("k1");
mapColumnNames.add("k2");
mapColumnNames.add("v");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
// select count(v) from name group by rollup (k1,k2);
Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
GroupByDesc desc = pair.left;
VectorGroupByDesc vectorDesc = pair.right;
desc.setGroupingSetsPresent(true);
ArrayList<Long> groupingSets = new ArrayList<>();
// groupingSets
groupingSets.add(0L);
groupingSets.add(1L);
groupingSets.add(2L);
desc.setListGroupingSets(groupingSets);
// add grouping sets dummy key
ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
// this only works because we used an arraylist in buildKeyGroupByDesc
// don't do this in actual compiler
desc.getKeys().add(groupingSetDummyKey);
// groupingSet Position
desc.setGroupingSetPosition(2);
CompilationOpContext cCtx = new CompilationOpContext();
desc.setMinReductionHashAggr(0.5f);
// Set really low check interval setting
hconf.set("hive.groupby.mapaggr.checkinterval", "10");
hconf.set("hive.vectorized.groupby.checkinterval", "10");
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
this.outputRowCount = 0;
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
@Override
public void inspectRow(Object row, int tag) throws HiveException {
++outputRowCount;
}
});
// vrb of 1 row each
FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
long countRowsProduced = 0;
for (VectorizedRowBatch unit : data) {
// after 24 rows, we'd have seen all the keys
// find 14 keys in the hashmap
// but 24*0.5 = 12
// won't turn off hash mode because of the 3 grouping sets
// if it turns off the hash mode, we'd get 14 + 3*(100-24) rows
countRowsProduced += unit.size;
vgo.process(unit, 0);
if (countRowsProduced >= 100) {
break;
}
}
vgo.close(false);
// all groupings
// 10 keys generates 14 rows with the rollup
assertEquals(1 + 3 + 10, outputRowCount);
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testAggregateStringKeyAggregate.
public void testAggregateStringKeyAggregate(String aggregateName, int batchSize, Iterable<Object> list, Iterable<Object> values, HashMap<Object, Object> expected) throws HiveException {
@SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "string", "long" }, list, values);
testAggregateStringKeyIterable(aggregateName, fdr, TypeInfoFactory.longTypeInfo, expected);
}
Aggregations