Search in sources :

Example 1 with VectorPTFInfo

use of org.apache.hadoop.hive.ql.plan.VectorPTFInfo in project hive by apache.

the class Vectorizer method createVectorPTFInfo.

/*
   * Create the additional vectorization PTF information needed by the VectorPTFOperator during
   * execution.
   */
private static VectorPTFInfo createVectorPTFInfo(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    ExprNodeDesc[] orderExprNodeDescs = vectorPTFDesc.getOrderExprNodeDescs();
    ExprNodeDesc[] partitionExprNodeDescs = vectorPTFDesc.getPartitionExprNodeDescs();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int evaluatorCount = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    /*
     * Output columns.
     */
    int[] outputColumnProjectionMap = new int[outputSize];
    // Evaluator results are first.
    for (int i = 0; i < evaluatorCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        final int outputColumnNum;
        outputColumnNum = vContext.allocateScratchColumn(typeInfo);
        outputColumnProjectionMap[i] = outputColumnNum;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = evaluatorCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnProjectionMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName());
    }
    /*
     * Partition and order by.
     */
    int[] partitionColumnMap;
    Type[] partitionColumnVectorTypes;
    VectorExpression[] partitionExpressions;
    if (!isPartitionOrderBy) {
        partitionColumnMap = null;
        partitionColumnVectorTypes = null;
        partitionExpressions = null;
    } else {
        final int partitionKeyCount = partitionExprNodeDescs.length;
        partitionColumnMap = new int[partitionKeyCount];
        partitionColumnVectorTypes = new Type[partitionKeyCount];
        partitionExpressions = new VectorExpression[partitionKeyCount];
        for (int i = 0; i < partitionKeyCount; i++) {
            VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]);
            TypeInfo typeInfo = partitionExpression.getOutputTypeInfo();
            Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
            partitionColumnVectorTypes[i] = columnVectorType;
            partitionColumnMap[i] = partitionExpression.getOutputColumnNum();
            partitionExpressions[i] = partitionExpression;
        }
    }
    final int orderKeyCount = orderExprNodeDescs.length;
    int[] orderColumnMap = new int[orderKeyCount];
    Type[] orderColumnVectorTypes = new Type[orderKeyCount];
    VectorExpression[] orderExpressions = new VectorExpression[orderKeyCount];
    for (int i = 0; i < orderKeyCount; i++) {
        VectorExpression orderExpression = vContext.getVectorExpression(orderExprNodeDescs[i]);
        TypeInfo typeInfo = orderExpression.getOutputTypeInfo();
        Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
        orderColumnVectorTypes[i] = columnVectorType;
        orderColumnMap[i] = orderExpression.getOutputColumnNum();
        orderExpressions[i] = orderExpression;
    }
    ArrayList<Integer> keyInputColumns = new ArrayList<Integer>();
    ArrayList<Integer> nonKeyInputColumns = new ArrayList<Integer>();
    determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns);
    int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0]));
    int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0]));
    VectorExpression[] evaluatorInputExpressions = new VectorExpression[evaluatorCount];
    Type[] evaluatorInputColumnVectorTypes = new Type[evaluatorCount];
    for (int i = 0; i < evaluatorCount; i++) {
        String functionName = evaluatorFunctionNames[i];
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        VectorExpression inputVectorExpression;
        final Type columnVectorType;
        if (exprNodeDescList != null) {
            // Validation has limited evaluatorInputExprNodeDescLists to size 1.
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            // Determine input vector expression using the VectorizationContext.
            inputVectorExpression = vContext.getVectorExpression(exprNodeDesc);
            TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
        } else {
            inputVectorExpression = null;
            columnVectorType = ColumnVector.Type.NONE;
        }
        evaluatorInputExpressions[i] = inputVectorExpression;
        evaluatorInputColumnVectorTypes[i] = columnVectorType;
    }
    VectorPTFInfo vectorPTFInfo = new VectorPTFInfo();
    vectorPTFInfo.setOutputColumnMap(outputColumnProjectionMap);
    vectorPTFInfo.setPartitionColumnMap(partitionColumnMap);
    vectorPTFInfo.setPartitionColumnVectorTypes(partitionColumnVectorTypes);
    vectorPTFInfo.setPartitionExpressions(partitionExpressions);
    vectorPTFInfo.setOrderColumnMap(orderColumnMap);
    vectorPTFInfo.setOrderColumnVectorTypes(orderColumnVectorTypes);
    vectorPTFInfo.setOrderExpressions(orderExpressions);
    vectorPTFInfo.setEvaluatorInputExpressions(evaluatorInputExpressions);
    vectorPTFInfo.setEvaluatorInputColumnVectorTypes(evaluatorInputColumnVectorTypes);
    vectorPTFInfo.setKeyInputColumnMap(keyInputColumnMap);
    vectorPTFInfo.setNonKeyInputColumnMap(nonKeyInputColumnMap);
    return vectorPTFInfo;
}
Also used : VectorPTFInfo(org.apache.hadoop.hive.ql.plan.VectorPTFInfo) ArrayList(java.util.ArrayList) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with VectorPTFInfo

use of org.apache.hadoop.hive.ql.plan.VectorPTFInfo in project hive by apache.

the class Vectorizer method vectorizePTFOperator.

/*
   * NOTE: The VectorPTFDesc has already been allocated and populated.
   */
public static Operator<? extends OperatorDesc> vectorizePTFOperator(Operator<? extends OperatorDesc> ptfOp, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf();
    VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext, vectorPTFDesc);
    vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo);
    Class<? extends Operator<?>> opClass = VectorPTFOperator.class;
    return OperatorFactory.getVectorOperator(opClass, ptfOp.getCompilationOpContext(), ptfOp.getConf(), vContext, vectorPTFDesc);
}
Also used : VectorPTFInfo(org.apache.hadoop.hive.ql.plan.VectorPTFInfo) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator)

Aggregations

VectorPTFInfo (org.apache.hadoop.hive.ql.plan.VectorPTFInfo)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 InConstantType (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorPTFOperator (org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 PTFDesc (org.apache.hadoop.hive.ql.plan.PTFDesc)1 HashTableImplementationType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType)1 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)1 VectorPTFDesc (org.apache.hadoop.hive.ql.plan.VectorPTFDesc)1 SupportedFunctionType (org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType)1 VectorDeserializeType (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType)1 OperatorType (org.apache.hadoop.hive.ql.plan.api.OperatorType)1 PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)1 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)1 UDFToInteger (org.apache.hadoop.hive.ql.udf.UDFToInteger)1 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)1 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)1