Search in sources :

Example 6 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFTranslator method translate.

private PartitionedTableFunctionDef translate(PartitionedTableFunctionSpec spec, PTFInputDef inpDef, int inpNum) throws SemanticException {
    TableFunctionResolver tFn = FunctionRegistry.getTableFunctionResolver(spec.getName());
    if (tFn == null) {
        throw new SemanticException(String.format("Unknown Table Function %s", spec.getName()));
    }
    PartitionedTableFunctionDef def = new PartitionedTableFunctionDef();
    def.setInput(inpDef);
    def.setName(spec.getName());
    def.setResolverClassName(tFn.getClass().getName());
    def.setAlias(spec.getAlias() == null ? "ptf_" + inpNum : spec.getAlias());
    def.setExpressionTreeString(spec.getAstNode().toStringTree());
    def.setTransformsRawInput(tFn.transformsRawInput());
    /*
     * translate args
     */
    List<ASTNode> args = spec.getArgs();
    if (args != null) {
        for (ASTNode expr : args) {
            PTFExpressionDef argDef = null;
            try {
                argDef = buildExpressionDef(inpDef.getOutputShape(), expr);
            } catch (HiveException he) {
                throw new SemanticException(he);
            }
            def.addArg(argDef);
        }
    }
    tFn.initialize(hCfg, ptfDesc, def);
    TableFunctionEvaluator tEval = tFn.getEvaluator();
    def.setTFunction(tEval);
    def.setCarryForwardNames(tFn.carryForwardNames());
    tFn.setupRawInputOI();
    if (tFn.transformsRawInput()) {
        StructObjectInspector rawInOutOI = tEval.getRawInputOI();
        List<String> rawInOutColNames = tFn.getRawInputColumnNames();
        RowResolver rawInRR = buildRowResolverForPTF(def.getName(), spec.getAlias(), rawInOutOI, rawInOutColNames, inpDef.getOutputShape().getRr());
        ShapeDetails rawInpShape = setupTableFnShape(def.getName(), inpDef.getOutputShape(), rawInOutOI, rawInOutColNames, rawInRR);
        def.setRawInputShape(rawInpShape);
    } else {
        def.setRawInputShape(inpDef.getOutputShape());
    }
    translatePartitioning(def, spec);
    tFn.setupOutputOI();
    StructObjectInspector outputOI = tEval.getOutputOI();
    List<String> outColNames = tFn.getOutputColumnNames();
    RowResolver outRR = buildRowResolverForPTF(def.getName(), spec.getAlias(), outputOI, outColNames, def.getRawInputShape().getRr());
    ShapeDetails outputShape = setupTableFnShape(def.getName(), inpDef.getOutputShape(), outputOI, outColNames, outRR);
    def.setOutputShape(outputShape);
    def.setReferencedColumns(tFn.getReferencedColumns());
    return def;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) WindowingTableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver) TableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver) TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 7 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class PTFDeserializer method alterOutputOIForStreaming.

/*
   * If the final PTF in a PTFChain can stream its output, then set the OI of its OutputShape
   * to the OI returned by the TableFunctionEvaluator.
   */
public static void alterOutputOIForStreaming(PTFDesc ptfDesc) {
    PartitionedTableFunctionDef tDef = ptfDesc.getFuncDef();
    TableFunctionEvaluator tEval = tDef.getTFunction();
    if (tEval.canIterateOutput()) {
        tDef.getOutputShape().setOI(tEval.getOutputOI());
    }
}
Also used : TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Example 8 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class SemanticAnalyzer method genPTFPlanForComponentQuery.

private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
    /*
     * 1. Create the PTFDesc from the Qspec attached to this QB.
     */
    RowResolver rr = opParseCtx.get(input).getRowResolver();
    PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
    /*
     * 2. build Map-side Op Graph. Graph template is either:
     * Input -> PTF_map -> ReduceSink
     * or
     * Input -> ReduceSink
     *
     * Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
     */
    {
        PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
        /*
       * a. add Map-side PTF Operator if needed
       */
        if (tabDef.isTransformsRawInput()) {
            RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
            ptfDesc.setMapSide(true);
            input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
            rr = opParseCtx.get(input).getRowResolver();
        }
        /*
       * b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
       */
        ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
        StringBuilder orderString = new StringBuilder();
        StringBuilder nullOrderString = new StringBuilder();
        /*
       * Use the input RR of TableScanOperator in case there is no map-side
       * reshape of input.
       * If the parent of ReduceSinkOperator is PTFOperator, use it's
       * output RR.
       */
        buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
        input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
    }
    /*
     * 3. build Reduce-side Op Graph
     */
    {
        /*
       * c. Rebuilt the QueryDef.
       * Why?
       * - so that the ExprNodeDescriptors in the QueryDef are based on the
       *   Select Operator's RowResolver
       */
        rr = opParseCtx.get(input).getRowResolver();
        ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
        /*
       * d. Construct PTF Operator.
       */
        RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
    }
    return input;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Example 9 with PartitionedTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.

the class Vectorizer method validatePTFOperator.

private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    if (!isPtfVectorizationEnabled) {
        setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
        return false;
    }
    PTFDesc ptfDesc = (PTFDesc) op.getConf();
    boolean isMapSide = ptfDesc.isMapSide();
    if (isMapSide) {
        setOperatorIssue("PTF Mapper not supported");
        return false;
    }
    List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
    if (ptfParents != null && ptfParents.size() > 0) {
        Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
        if (!(ptfParent instanceof ReduceSinkOperator)) {
            boolean isReduceShufflePtf = false;
            if (ptfParent instanceof SelectOperator) {
                ptfParents = ptfParent.getParentOperators();
                if (ptfParents == null || ptfParents.size() == 0) {
                    isReduceShufflePtf = true;
                } else {
                    ptfParent = ptfParent.getParentOperators().get(0);
                    isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
                }
            }
            if (!isReduceShufflePtf) {
                setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
                return false;
            }
        }
    }
    boolean forNoop = ptfDesc.forNoop();
    if (forNoop) {
        setOperatorIssue("NOOP not supported");
        return false;
    }
    boolean forWindowing = ptfDesc.forWindowing();
    if (!forWindowing) {
        setOperatorIssue("Windowing required");
        return false;
    }
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
    if (!isWindowTableFunctionDef) {
        setOperatorIssue("Must be a WindowTableFunctionDef");
        return false;
    }
    try {
        createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
    } catch (HiveException e) {
        setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
        return false;
    }
    // Output columns ok?
    String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
    TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
    final int outputCount = outputColumnNames.length;
    for (int i = 0; i < outputCount; i++) {
        String typeName = outputTypeInfos[i].getTypeName();
        boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
        false);
        if (!ret) {
            setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
            return false;
        }
    }
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int count = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    for (int i = 0; i < count; i++) {
        String functionName = evaluatorFunctionNames[i];
        SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        if (supportedFunctionType == null) {
            setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
            return false;
        }
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        if (!windowFrameDef.isStartUnbounded()) {
            setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
            return false;
        }
        switch(windowFrameDef.getWindowType()) {
            case RANGE:
                if (!windowFrameDef.getEnd().isCurrentRow()) {
                    setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
                    return false;
                }
                break;
            case ROWS:
                if (!windowFrameDef.isEndUnbounded()) {
                    setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
        }
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
            setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
            return false;
        }
        if (exprNodeDescList != null) {
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            if (containsLeadLag(exprNodeDesc)) {
                setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
                return false;
            }
            if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
                // COUNT, DENSE_RANK, and RANK do not care about column types.  The rest do.
                TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
                Category category = typeInfo.getCategory();
                boolean isSupportedType;
                if (category != Category.PRIMITIVE) {
                    isSupportedType = false;
                } else {
                    ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
                    switch(colVecType) {
                        case LONG:
                        case DOUBLE:
                        case DECIMAL:
                            isSupportedType = true;
                            break;
                        default:
                            isSupportedType = false;
                            break;
                    }
                }
                if (!isSupportedType) {
                    setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)9 ArrayList (java.util.ArrayList)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 List (java.util.List)3 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)3 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)3 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 PTFDesc (org.apache.hadoop.hive.ql.plan.PTFDesc)2 SupportedFunctionType (org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType)2 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)2 PTFInputDef (org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef)2 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)2 TableFunctionEvaluator (org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator)2 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)2 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 ArrayDeque (java.util.ArrayDeque)1 Stack (java.util.Stack)1