Search in sources :

Example 1 with ISupportStreamingModeForWindowing

use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.

the class WindowingTableFunction method processRow.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#processRow(java
   * .lang.Object)
   * 
   * - hand row to each Function, provided there are enough rows for Function's
   * window. - call getNextObject on each Function. - output as many rows as
   * possible, based on minimum sz of Output List
   */
@Override
public List<Object> processRow(Object row) throws HiveException {
    /*
     * Once enough rows have been output, there is no need to process input rows.
     */
    if (streamingState.rankLimitReached()) {
        return null;
    }
    streamingState.rollingPart.append(row);
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) tableDef;
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
        GenericUDAFEvaluator fnEval = wFn.getWFnEval();
        int a = 0;
        if (wFn.getArgs() != null) {
            for (PTFExpressionDef arg : wFn.getArgs()) {
                streamingState.funcArgs[i][a++] = arg.getExprEvaluator().evaluate(row);
            }
        }
        if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
            fnEval.aggregate(streamingState.aggBuffers[i], streamingState.funcArgs[i]);
            Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
            if (out != null) {
                streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
            }
        } else {
            int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame());
            if (rowToProcess >= 0) {
                Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
                streamingState.fnOutputs[i].add(out);
            }
        }
    }
    List<Object> oRows = new ArrayList<Object>();
    while (true) {
        boolean hasRow = streamingState.hasOutputRow();
        if (!hasRow) {
            break;
        }
        oRows.add(streamingState.nextOutputRow());
    }
    return oRows.size() == 0 ? null : oRows;
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 2 with ISupportStreamingModeForWindowing

use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.

the class WindowingTableFunction method iterator.

@SuppressWarnings("rawtypes")
@Override
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
    WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
    ArrayList<Object> output = new ArrayList<Object>();
    List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
    ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
    PTFPartition iPart = pItr.getPartition();
    int i = 0;
    for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
        boolean processWindow = processWindow(wFn.getWindowFrame());
        pItr.reset();
        if (!processWindow && !wFn.isPivotResult()) {
            Object out = evaluateFunctionOnPartition(wFn, iPart);
            output.add(out);
        } else if (wFn.isPivotResult()) {
            GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
            if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
                ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
                if (strEval.getRowsRemainingAfterTerminate() == 0) {
                    wFn.setWFnEval(streamingEval);
                    if (wFn.getOI() instanceof ListObjectInspector) {
                        ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
                        wFn.setOI(listOI.getListElementObjectInspector());
                    }
                    output.add(null);
                    wFnsWithWindows.add(i);
                } else {
                    outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                    output.add(null);
                }
            } else {
                outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
                output.add(null);
            }
        } else {
            output.add(null);
            wFnsWithWindows.add(i);
        }
        i++;
    }
    for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
        output.add(null);
    }
    if (wTFnDef.getRankLimit() != -1) {
        rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
    }
    return new WindowingIterator(iPart, output, outputFromPivotFunctions, ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) PTFPartition(org.apache.hadoop.hive.ql.exec.PTFPartition) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 3 with ISupportStreamingModeForWindowing

use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.

the class WindowingTableFunction method setCanAcceptInputAsStream.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
   * ()
   * 
   * WindowTableFunction supports streaming if all functions meet one of these
   * conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
   * Or returns a non null Object for the getWindowingEvaluator, that implements
   * ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
   * So no Unbounded Preceding or Following.
   */
@SuppressWarnings("resource")
private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
    canAcceptInputAsStream = false;
    if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
        return null;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    int startPos = Integer.MAX_VALUE;
    int endPos = Integer.MIN_VALUE;
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
        WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
        GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
        boolean streamingPossible = streamingPossible(cfg, wFnDef);
        GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
        if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
            continue;
        }
        BoundaryDef start = wdwFrame.getStart();
        BoundaryDef end = wdwFrame.getEnd();
        if (wdwFrame.getWindowType() == WindowType.ROWS) {
            if (!end.isUnbounded() && !start.isUnbounded()) {
                startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
                endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
                continue;
            }
        }
        return null;
    }
    int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
    if (windowLimit < (endPos - startPos + 1)) {
        return null;
    }
    canAcceptInputAsStream = true;
    return new int[] { startPos, endPos };
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) BoundaryDef(org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 4 with ISupportStreamingModeForWindowing

use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.

the class WindowingTableFunction method finishPartition.

/*
   * (non-Javadoc)
   * 
   * @see
   * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
   * 
   * for fns that are not ISupportStreamingModeForWindowing give them the
   * remaining rows (rows whose span went beyond the end of the partition) for
   * rest of the functions invoke terminate.
   * 
   * while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
   * invoke getNextObj if there is no O/p then flag this as an error.
   */
@Override
public List<Object> finishPartition() throws HiveException {
    /*
     * Once enough rows have been output, there is no need to generate more output.
     */
    if (streamingState.rankLimitReached()) {
        return null;
    }
    WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
    for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
        WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
        GenericUDAFEvaluator fnEval = wFn.getWFnEval();
        int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
        if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
            fnEval.terminate(streamingState.aggBuffers[i]);
            WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
            if (!wFnInfo.isSupportsWindow()) {
                numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval).getRowsRemainingAfterTerminate();
            }
            if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
                while (numRowsRemaining > 0) {
                    Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
                    if (out != null) {
                        streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
                    }
                    numRowsRemaining--;
                }
            }
        } else {
            while (numRowsRemaining > 0) {
                int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
                if (rowToProcess >= 0) {
                    Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
                    streamingState.fnOutputs[i].add(out);
                }
                numRowsRemaining--;
            }
        }
    }
    List<Object> oRows = new ArrayList<Object>();
    while (!streamingState.rollingPart.processedAllRows() && !streamingState.rankLimitReached()) {
        boolean hasRow = streamingState.hasOutputRow();
        if (!hasRow && !streamingState.rankLimitReached()) {
            throw new HiveException("Internal Error: cannot generate all output rows for a Partition");
        }
        if (hasRow) {
            oRows.add(streamingState.nextOutputRow());
        }
    }
    return oRows.size() == 0 ? null : oRows;
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) ArrayList(java.util.ArrayList) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)

Example 5 with ISupportStreamingModeForWindowing

use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.

the class TestStreamingSum method _agg.

public static <T, TW> void _agg(GenericUDAFResolver fnR, TypeInfo[] inputTypes, Iterator<T> inVals, TypeHandler<T, TW> typeHandler, TW[] in, ObjectInspector[] inputOIs, int inSz, int numPreceding, int numFollowing, Iterator<T> outVals) throws HiveException {
    GenericUDAFEvaluator fn = fnR.getEvaluator(inputTypes);
    fn.init(Mode.COMPLETE, inputOIs);
    fn = fn.getWindowingEvaluator(wdwFrame(numPreceding, numFollowing));
    AggregationBuffer agg = fn.getNewAggregationBuffer();
    ISupportStreamingModeForWindowing oS = (ISupportStreamingModeForWindowing) fn;
    int outSz = 0;
    while (inVals.hasNext()) {
        typeHandler.set(inVals.next(), in[0]);
        fn.aggregate(agg, in);
        Object out = oS.getNextResult(agg);
        if (out != null) {
            if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
                out = null;
            } else {
                try {
                    out = typeHandler.get((TW) out);
                } catch (ClassCastException ce) {
                }
            }
            Assert.assertEquals(out, outVals.next());
            outSz++;
        }
    }
    fn.terminate(agg);
    while (outSz < inSz) {
        Object out = oS.getNextResult(agg);
        if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
            out = null;
        } else {
            try {
                out = typeHandler.get((TW) out);
            } catch (ClassCastException ce) {
            }
        }
        Assert.assertEquals(out, outVals.next());
        outSz++;
    }
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Aggregations

GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)5 ISupportStreamingModeForWindowing (org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing)5 WindowFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef)4 WindowTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef)4 ArrayList (java.util.ArrayList)3 AbstractList (java.util.AbstractList)1 List (java.util.List)1 PTFPartition (org.apache.hadoop.hive.ql.exec.PTFPartition)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 BoundaryDef (org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef)1 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)1 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)1 AggregationBuffer (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)1 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)1