Search in sources :

Example 41 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorizer method testAggregateOnUDF.

@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
    ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
    ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colExprA);
    ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(exprNodeDesc);
    List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
    paramOIs.add(exprNodeDesc.getWritableObjectInspector());
    AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
    vectorDesc.setProcessingMode(ProcessingMode.HASH);
    vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc, new GenericUDAFSum.GenericUDAFSumLong(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
    desc.setOutputColumnNames(outputColumnNames);
    ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
    aggDescList.add(aggDesc);
    desc.setAggregators(aggDescList);
    ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
    grpByKeys.add(colExprB);
    desc.setKeys(grpByKeys);
    Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
    desc.setMode(GroupByDesc.Mode.HASH);
    VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
    Vectorizer v = new Vectorizer();
    v.testSetCurrentBaseWork(new MapWork());
    VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
    Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorUDAFCountStar(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar) GenericUDAFSumLong(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.GenericUDAFSumLong) Test(org.junit.Test)

Example 42 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorizer method testValidateMapJoinOperator.

/**
 * testValidateMapJoinOperator validates that the Map join operator can be vectorized.
 */
@Test
public void testValidateMapJoinOperator() {
    MapJoinOperator map = new MapJoinOperator(new CompilationOpContext());
    MapJoinDesc mjdesc = new MapJoinDesc();
    prepareAbstractMapJoin(map, mjdesc);
    map.setConf(mjdesc);
    Vectorizer vectorizer = new Vectorizer();
    vectorizer.testSetCurrentBaseWork(new MapWork());
// UNDONE
// Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
}
Also used : CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Test(org.junit.Test)

Example 43 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorizer method testValidateSMBJoinOperator.

/**
 * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized.
 */
@Test
public void testValidateSMBJoinOperator() {
    SMBMapJoinOperator map = new SMBMapJoinOperator(new CompilationOpContext());
    SMBJoinDesc mjdesc = new SMBJoinDesc();
    prepareAbstractMapJoin(map, mjdesc);
    map.setConf(mjdesc);
    Vectorizer vectorizer = new Vectorizer();
    vectorizer.testSetCurrentBaseWork(new MapWork());
// UNDONE
// Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
}
Also used : CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Test(org.junit.Test)

Example 44 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class MapJoinTestConfig method createMapJoin.

public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) throws SerDeException, IOException, HiveException {
    final Byte bigTablePos = 0;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
    MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
    MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
    mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    MapJoinOperator operator;
    if (!isVectorMapJoin) {
        operator = new MapJoinOperator(new CompilationOpContext());
        operator.setConf(mapJoinDesc);
    } else {
        VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList);
        // Create scratch columns to hold small table results.
        for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
            vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]);
        }
        // This is what the Vectorizer class does.
        VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
        byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
        VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
        vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
        Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
        VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
        vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
        List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
        boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
        if (!isOuterAndFiltered) {
            operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        } else {
            operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        }
    }
    MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
    operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    return operator;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)44 ArrayList (java.util.ArrayList)27 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)12 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)12 JobConf (org.apache.hadoop.mapred.JobConf)12 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 HashMap (java.util.HashMap)7 LinkedHashMap (java.util.LinkedHashMap)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 HashSet (java.util.HashSet)5 Configuration (org.apache.hadoop.conf.Configuration)5 Path (org.apache.hadoop.fs.Path)5 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Set (java.util.Set)4