Search in sources :

Example 6 with WindowFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.

the class WindowingTableFunction method iterator.

@SuppressWarnings("rawtypes")
@Override
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
    WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
    ArrayList<Object> output = new ArrayList<Object>();
    List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
    ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
    PTFPartition iPart = pItr.getPartition();
    int i = 0;
    for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
        boolean processWindow = processWindow(wFn.getWindowFrame());
        pItr.reset();
        if (!processWindow && !wFn.isPivotResult()) {
            Object out = evaluateFunctionOnPartition(wFn, iPart);
            output.add(out);
        } else if (wFn.isPivotResult()) {
            GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
            if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
                ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
                if (strEval.getRowsRemainingAfterTerminate() == 0) {
                    wFn.setWFnEval(streamingEval);
                    if (wFn.getOI() instanceof ListObjectInspector) {
                        ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
                        wFn.setOI(listOI.getListElementObjectInspector());
                    }
                    output.add(null);
                    wFnsWithWindows.add(i);
                } else {
                    outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                    output.add(null);
                }
            } else {
                outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                output.add(null);
            }
        } else {
            output.add(null);
            wFnsWithWindows.add(i);
        }
        i++;
    }
    for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
        output.add(null);
    }
    if (wTFnDef.getRankLimit() != -1) {
        rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
    }
    return new WindowingIterator(iPart, output, outputFromPivotFunctions, ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) PTFPartition(org.apache.hadoop.hive.ql.exec.PTFPartition) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 7 with WindowFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.

the class Vectorizer method createVectorPTFDesc.

/*
   * Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
   * VectorizationContext to lookup column names, etc.
   */
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
    List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
    final int functionCount = windowsFunctions.size();
    ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    /*
     * Output columns.
     */
    // Evaluator results are first.
    String[] outputColumnNames = new String[outputSize];
    TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
    for (int i = 0; i < functionCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = typeInfo;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = functionCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnNames[i] = colInfo.getInternalName();
        outputTypeInfos[i] = colInfo.getType();
    }
    List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
    final int partitionKeyCount = partitionExpressions.size();
    ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
    List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
    final int orderKeyCount = orderExpressions.size();
    ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
    // When there are PARTITION and ORDER BY clauses, will have different partition expressions.
    // Otherwise, only order by expressions.
    boolean isPartitionOrderBy = false;
    if (partitionKeyCount != orderKeyCount) {
        // Obviously different expressions.
        isPartitionOrderBy = true;
    } else {
        // Check each ExprNodeDesc.
        for (int i = 0; i < partitionKeyCount; i++) {
            final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
            final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
            if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
                isPartitionOrderBy = true;
                break;
            }
        }
    }
    String[] evaluatorFunctionNames = new String[functionCount];
    WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
    fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
    TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
    vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos);
    vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
    vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
    vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
    vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
    vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
    vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
    vectorPTFDesc.setOutputColumnNames(outputColumnNames);
    vectorPTFDesc.setOutputTypeInfos(outputTypeInfos);
    vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
Also used : WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeDescEqualityWrapper(org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef)

Example 8 with WindowFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.

the class PTFTranslator method translate.

public PTFDesc translate(WindowingSpec wdwSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException {
    init(semAly, hCfg, inputRR, unparseT);
    windowingSpec = wdwSpec;
    ptfDesc = new PTFDesc();
    ptfDesc.setCfg(hCfg);
    ptfDesc.setLlInfo(llInfo);
    WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef();
    ptfDesc.setFuncDef(wdwTFnDef);
    PTFQueryInputSpec inpSpec = new PTFQueryInputSpec();
    inpSpec.setType(PTFQueryInputType.WINDOWING);
    wdwTFnDef.setInput(translate(inpSpec, 0));
    ShapeDetails inpShape = wdwTFnDef.getInput().getOutputShape();
    WindowingTableFunctionResolver tFn = (WindowingTableFunctionResolver) FunctionRegistry.getTableFunctionResolver(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
    if (tFn == null) {
        throw new SemanticException(String.format("Internal Error: Unknown Table Function %s", FunctionRegistry.WINDOWING_TABLE_FUNCTION));
    }
    wdwTFnDef.setName(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
    wdwTFnDef.setResolverClassName(tFn.getClass().getName());
    wdwTFnDef.setAlias("ptf_" + 1);
    wdwTFnDef.setExpressionTreeString(null);
    wdwTFnDef.setTransformsRawInput(false);
    tFn.initialize(hCfg, ptfDesc, wdwTFnDef);
    TableFunctionEvaluator tEval = tFn.getEvaluator();
    wdwTFnDef.setTFunction(tEval);
    wdwTFnDef.setCarryForwardNames(tFn.carryForwardNames());
    wdwTFnDef.setRawInputShape(inpShape);
    PartitioningSpec partiSpec = wdwSpec.getQueryPartitioningSpec();
    if (partiSpec == null) {
        throw new SemanticException("Invalid use of Windowing: there is no Partitioning associated with Windowing");
    }
    PartitionDef partDef = translate(inpShape, wdwSpec.getQueryPartitionSpec());
    OrderDef ordDef = translate(inpShape, wdwSpec.getQueryOrderSpec(), partDef);
    wdwTFnDef.setPartition(partDef);
    wdwTFnDef.setOrder(ordDef);
    /*
     * process Wdw functions
     */
    ArrayList<WindowFunctionDef> windowFunctions = new ArrayList<WindowFunctionDef>();
    if (wdwSpec.getWindowExpressions() != null) {
        for (WindowExpressionSpec expr : wdwSpec.getWindowExpressions()) {
            if (expr instanceof WindowFunctionSpec) {
                WindowFunctionDef wFnDef = translate(wdwTFnDef, (WindowFunctionSpec) expr);
                windowFunctions.add(wFnDef);
            }
        }
        wdwTFnDef.setWindowFunctions(windowFunctions);
    }
    /*
     * set outputFromWdwFnProcessing
     */
    ArrayList<String> aliases = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (WindowFunctionDef wFnDef : windowFunctions) {
        aliases.add(wFnDef.getAlias());
        if (wFnDef.isPivotResult()) {
            fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
        } else {
            fieldOIs.add(wFnDef.getOI());
        }
    }
    PTFTranslator.addInputColumnsToList(inpShape, aliases, fieldOIs);
    StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
    tFn.setWdwProcessingOutputOI(wdwOutOI);
    RowResolver wdwOutRR = buildRowResolverForWindowing(wdwTFnDef);
    ShapeDetails wdwOutShape = setupShape(wdwOutOI, null, wdwOutRR);
    wdwTFnDef.setOutputShape(wdwOutShape);
    tFn.setupOutputOI();
    PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
    return ptfDesc;
}
Also used : WindowingTableFunctionResolver(org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) PTFQueryInputSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) PartitioningSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec) TableFunctionEvaluator(org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) PartitionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionDef) WindowExpressionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec) OrderDef(org.apache.hadoop.hive.ql.plan.ptf.OrderDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 9 with WindowFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.

the class PTFTranslator method translate.

private WindowFunctionDef translate(WindowTableFunctionDef wdwTFnDef, WindowFunctionSpec spec) throws SemanticException {
    WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(spec.getName());
    if (wFnInfo == null) {
        throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(spec.getName()));
    }
    WindowFunctionDef def = new WindowFunctionDef();
    def.setName(spec.getName());
    def.setAlias(spec.getAlias());
    def.setDistinct(spec.isDistinct());
    def.setExpressionTreeString(spec.getExpression().toStringTree());
    def.setStar(spec.isStar());
    def.setPivotResult(wFnInfo.isPivotResult());
    ShapeDetails inpShape = wdwTFnDef.getRawInputShape();
    /*
     * translate args
     */
    ArrayList<ASTNode> args = spec.getArgs();
    if (args != null) {
        for (ASTNode expr : args) {
            PTFExpressionDef argDef = null;
            try {
                argDef = buildExpressionDef(inpShape, expr);
            } catch (HiveException he) {
                throw new SemanticException(he);
            }
            def.addArg(argDef);
        }
    }
    if (FunctionRegistry.isRankingFunction(spec.getName())) {
        setupRankingArgs(wdwTFnDef, def, spec);
    }
    WindowSpec wdwSpec = spec.getWindowSpec();
    if (wdwSpec != null) {
        String desc = spec.toString();
        WindowFrameDef wdwFrame = translate(spec.getName(), inpShape, wdwSpec);
        if (!wFnInfo.isSupportsWindow()) {
            BoundarySpec start = wdwSpec.getWindowFrame().getStart();
            if (start.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
                throw new SemanticException(String.format("Expecting left window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
            }
            BoundarySpec end = wdwSpec.getWindowFrame().getEnd();
            if (end.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
                throw new SemanticException(String.format("Expecting right window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
            }
        }
        def.setWindowFrame(wdwFrame);
    }
    try {
        setupWdwFnEvaluator(def);
    } catch (HiveException he) {
        throw new SemanticException(he);
    }
    return def;
}
Also used : WindowFunctionInfo(org.apache.hadoop.hive.ql.exec.WindowFunctionInfo) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) ShapeDetails(org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails) BoundarySpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec) WindowSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec)

Example 10 with WindowFunctionDef

use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.

the class WindowingTableFunction method initializeStreaming.

@Override
public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
    int[] span = setCanAcceptInputAsStream(cfg);
    if (!canAcceptInputAsStream) {
        return;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
        WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
        GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
        GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
        if (streamingEval != null) {
            wFnDef.setWFnEval(streamingEval);
            if (wFnDef.isPivotResult()) {
                ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
                wFnDef.setOI(listOI.getListElementObjectInspector());
            }
        }
    }
    if (tabDef.getRankLimit() != -1) {
        rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
    }
    streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
}
Also used : WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Aggregations

WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)13 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)9 ArrayList (java.util.ArrayList)8 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)5 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)5 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)5 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)5 ISupportStreamingModeForWindowing (org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 List (java.util.List)3 ShapeDetails (org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 AbstractList (java.util.AbstractList)2 PTFPartition (org.apache.hadoop.hive.ql.exec.PTFPartition)2 WindowFunctionInfo (org.apache.hadoop.hive.ql.exec.WindowFunctionInfo)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 WindowExpressionSpec (org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 TableFunctionEvaluator (org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator)2 WindowingTableFunctionResolver (org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver)2