Search in sources :

Example 1 with VectorGroupByOperator

use of org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator in project hive by apache.

the class TestVectorizer method testAggregateOnUDF.

@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
    ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
    ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colExprA);
    ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(exprNodeDesc);
    List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
    paramOIs.add(exprNodeDesc.getWritableObjectInspector());
    AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
    vectorDesc.setProcessingMode(ProcessingMode.HASH);
    vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc.getGenericUDAFName(), new GenericUDAFSum.GenericUDAFSumLong(), aggDesc.getMode(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
    desc.setOutputColumnNames(outputColumnNames);
    ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
    aggDescList.add(aggDesc);
    desc.setAggregators(aggDescList);
    ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
    grpByKeys.add(colExprB);
    desc.setKeys(grpByKeys);
    Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
    desc.setMode(GroupByDesc.Mode.HASH);
    VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
    Vectorizer v = new Vectorizer();
    v.testSetCurrentBaseWork(new MapWork());
    VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
    Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorUDAFCountStar(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar) GenericUDAFSumLong(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.GenericUDAFSumLong) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 VectorAggregationDesc (org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc)1 VectorGroupByOperator (org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator)1 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)1 VectorUDAFCountStar (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar)1 GenericUDAFSumLong (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.GenericUDAFSumLong)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 Test (org.junit.Test)1