Search in sources :

Example 31 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorNegative method doVectorArithmeticTest.

private void doVectorArithmeticTest(TypeInfo typeInfo, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, NegativeTestMode negativeTestMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (negativeTestMode == NegativeTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    vectorExpression.transientInit(hiveConf);
    if (negativeTestMode == NegativeTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " negativeTestMode " + negativeTestMode + " vectorExpression " + vectorExpression.toString());
    }
    String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, null);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " negativeTestMode " + negativeTestMode +
        " vectorExpression " + vectorExpression.toString());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        rowIndex += batch.size;
    }
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)

Example 32 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class Vectorizer method validateAndVectorizeOperatorTree.

private Operator<? extends OperatorDesc> validateAndVectorizeOperatorTree(Operator<? extends OperatorDesc> nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws VectorizerCannotVectorizeException {
    VectorizationContext taskVContext = new VectorizationContext("Task", vectorTaskColumnInfo.allColumnNames, vectorTaskColumnInfo.allTypeInfos, vectorTaskColumnInfo.allDataTypePhysicalVariations, hiveConf);
    List<Operator<? extends OperatorDesc>> currentParentList = newOperatorList();
    currentParentList.add(nonVecRootOperator);
    // Start with dummy vector operator as the parent of the parallel vector operator tree we are
    // creating
    Operator<? extends OperatorDesc> dummyVectorOperator = new DummyVectorOperator(taskVContext);
    List<Operator<? extends OperatorDesc>> currentVectorParentList = newOperatorList();
    currentVectorParentList.add(dummyVectorOperator);
    delayedFixups.clear();
    do {
        List<Operator<? extends OperatorDesc>> nextParentList = newOperatorList();
        List<Operator<? extends OperatorDesc>> nextVectorParentList = newOperatorList();
        final int count = currentParentList.size();
        for (int i = 0; i < count; i++) {
            Operator<? extends OperatorDesc> parent = currentParentList.get(i);
            List<Operator<? extends OperatorDesc>> childrenList = parent.getChildOperators();
            if (childrenList == null || childrenList.size() == 0) {
                continue;
            }
            Operator<? extends OperatorDesc> vectorParent = currentVectorParentList.get(i);
            /*
         * Vectorize this parent's children.  Plug them into vectorParent's children list.
         *
         * Add those children / vector children to nextParentList / nextVectorParentList.
         */
            doProcessChildren(parent, vectorParent, nextParentList, nextVectorParentList, isReduce, isTezOrSpark, vectorTaskColumnInfo);
        }
        currentParentList = nextParentList;
        currentVectorParentList = nextVectorParentList;
    } while (currentParentList.size() > 0);
    runDelayedFixups();
    return dummyVectorOperator;
}
Also used : VectorFileSinkArrowOperator(org.apache.hadoop.hive.ql.exec.vector.filesink.VectorFileSinkArrowOperator) VectorMapJoinAntiJoinMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinAntiJoinMultiKeyOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinFullOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterStringOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorMapJoinFullOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterLongOperator) VectorMapJoinFullOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinAntiJoinStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinAntiJoinStringOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) VectorMapJoinAntiJoinLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinAntiJoinLongOperator) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 33 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorPTFGroupBatches method getFakeOperator.

private VectorPTFOperator getFakeOperator() throws HiveException {
    VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
    vectorPTFDesc.setVectorPTFInfo(new VectorPTFInfo());
    vectorPTFDesc.setOutputColumnNames(new String[0]);
    vectorPTFDesc.setEvaluatorFunctionNames(new String[0]);
    return new VectorPTFOperator(new CompilationOpContext(), new PTFDesc(), new VectorizationContext("fake"), vectorPTFDesc);
}
Also used : VectorPTFInfo(org.apache.hadoop.hive.ql.plan.VectorPTFInfo) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)

Example 34 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorizer method testAggregateOnUDF.

@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
    ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
    ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colExprA);
    ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(exprNodeDesc);
    List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
    paramOIs.add(exprNodeDesc.getWritableObjectInspector());
    AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
    vectorDesc.setProcessingMode(ProcessingMode.HASH);
    vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc.getGenericUDAFName(), new GenericUDAFSum.GenericUDAFSumLong(), aggDesc.getMode(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
    desc.setOutputColumnNames(outputColumnNames);
    ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
    aggDescList.add(aggDesc);
    desc.setAggregators(aggDescList);
    ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
    grpByKeys.add(colExprB);
    desc.setKeys(grpByKeys);
    Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
    desc.setMode(GroupByDesc.Mode.HASH);
    VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
    Vectorizer v = new Vectorizer();
    v.testSetCurrentBaseWork(new MapWork());
    VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
    Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorUDAFCountStar(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar) GenericUDAFSumLong(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.GenericUDAFSumLong) Test(org.junit.Test)

Example 35 with VectorizationContext

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.

the class TestVectorizer method setUp.

@Before
public void setUp() {
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    columns.add("col3");
    // Generate vectorized expression
    vContext = new VectorizationContext("name", columns);
}
Also used : ArrayList(java.util.ArrayList) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) Before(org.junit.Before)

Aggregations

VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)36 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)19 HiveConf (org.apache.hadoop.hive.conf.HiveConf)18 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)18 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)18 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)12 ArrayList (java.util.ArrayList)8 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)8 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)7 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)7 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)6 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)6 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)6 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)6 List (java.util.List)5