Search in sources :

Example 11 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class WindowingTableFunction method finishPartition.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
   * 
   * for fns that are not ISupportStreamingModeForWindowing give them the
   * remaining rows (rows whose span went beyond the end of the partition) for
   * rest of the functions invoke terminate.
   * 
   * while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
   * invoke getNextObj if there is no O/p then flag this as an error.
   */
@Override
public List<Object> finishPartition() throws HiveException {
    /*
     * Once enough rows have been output, there is no need to generate more output.
     */
    if (streamingState.rankLimitReached()) {
        return null;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
        GenericUDAFEvaluator fnEval = wFn.getWFnEval();
        int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
        if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
            fnEval.terminate(streamingState.aggBuffers[i]);
            WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
            if (!wFnInfo.isSupportsWindow()) {
                numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval).getRowsRemainingAfterTerminate();
            }
            if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
                while (numRowsRemaining > 0) {
                    Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
                    if (out != null) {
                        streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
                    }
                    numRowsRemaining--;
                }
            }
        } else {
            while (numRowsRemaining > 0) {
                int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
                if (rowToProcess >= 0) {
                    Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
                    streamingState.fnOutputs[i].add(out);
                }
                numRowsRemaining--;
            }
        }
    }
    List<Object> oRows = new ArrayList<Object>();
    while (!streamingState.rollingPart.processedAllRows() && !streamingState.rankLimitReached()) {
        boolean hasRow = streamingState.hasOutputRow();
        if (!hasRow && !streamingState.rankLimitReached()) {
            throw new HiveException("Internal Error: cannot generate all output rows for a Partition");
        }
        if (hasRow) {
            oRows.add(streamingState.nextOutputRow());
        }
    }
    return oRows.size() == 0 ? null : oRows;
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 12 with WindowTableFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.

the class Vectorizer method validatePTFOperator.

private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    if (!isPtfVectorizationEnabled) {
        setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
        return false;
    }
    PTFDesc ptfDesc = (PTFDesc) op.getConf();
    boolean isMapSide = ptfDesc.isMapSide();
    if (isMapSide) {
        setOperatorIssue("PTF Mapper not supported");
        return false;
    }
    List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
    if (ptfParents != null && ptfParents.size() > 0) {
        Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
        if (!(ptfParent instanceof ReduceSinkOperator)) {
            boolean isReduceShufflePtf = false;
            if (ptfParent instanceof SelectOperator) {
                ptfParents = ptfParent.getParentOperators();
                if (ptfParents == null || ptfParents.size() == 0) {
                    isReduceShufflePtf = true;
                } else {
                    ptfParent = ptfParent.getParentOperators().get(0);
                    isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
                }
            }
            if (!isReduceShufflePtf) {
                setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
                return false;
            }
        }
    }
    boolean forNoop = ptfDesc.forNoop();
    if (forNoop) {
        setOperatorIssue("NOOP not supported");
        return false;
    }
    boolean forWindowing = ptfDesc.forWindowing();
    if (!forWindowing) {
        setOperatorIssue("Windowing required");
        return false;
    }
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
    if (!isWindowTableFunctionDef) {
        setOperatorIssue("Must be a WindowTableFunctionDef");
        return false;
    }
    try {
        createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
    } catch (HiveException e) {
        setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
        return false;
    }
    // Output columns ok?
    String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
    TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
    final int outputCount = outputColumnNames.length;
    for (int i = 0; i < outputCount; i++) {
        String typeName = outputTypeInfos[i].getTypeName();
        boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
        false);
        if (!ret) {
            setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
            return false;
        }
    }
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int count = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    for (int i = 0; i < count; i++) {
        String functionName = evaluatorFunctionNames[i];
        SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        if (supportedFunctionType == null) {
            setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
            return false;
        }
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        if (!windowFrameDef.isStartUnbounded()) {
            setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
            return false;
        }
        switch(windowFrameDef.getWindowType()) {
            case RANGE:
                if (!windowFrameDef.getEnd().isCurrentRow()) {
                    setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
                    return false;
                }
                break;
            case ROWS:
                if (!windowFrameDef.isEndUnbounded()) {
                    setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
        }
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
            setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
            return false;
        }
        if (exprNodeDescList != null) {
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            if (containsLeadLag(exprNodeDesc)) {
                setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
                return false;
            }
            if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
                // COUNT, DENSE_RANK, and RANK do not care about column types.  The rest do.
                TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
                Category category = typeInfo.getCategory();
                boolean isSupportedType;
                if (category != Category.PRIMITIVE) {
                    isSupportedType = false;
                } else {
                    ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
                    switch(colVecType) {
                        case LONG:
                        case DOUBLE:
                        case DECIMAL:
                            isSupportedType = true;
                            break;
                        default:
                            isSupportedType = false;
                            break;
                    }
                }
                if (!isSupportedType) {
                    setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)12 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)9 ArrayList (java.util.ArrayList)7 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)5 List (java.util.List)4 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)4 ISupportStreamingModeForWindowing (org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing)4 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)3 AbstractList (java.util.AbstractList)2 PTFPartition (org.apache.hadoop.hive.ql.exec.PTFPartition)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 PTFDesc (org.apache.hadoop.hive.ql.plan.PTFDesc)2 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)2 PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)2 ArrayDeque (java.util.ArrayDeque)1 WindowFunctionInfo (org.apache.hadoop.hive.ql.exec.WindowFunctionInfo)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1