use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.
the class WindowingTableFunction method processRow.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#processRow(java
* .lang.Object)
*
* - hand row to each Function, provided there are enough rows for Function's
* window. - call getNextObject on each Function. - output as many rows as
* possible, based on minimum sz of Output List
*/
@Override
public List<Object> processRow(Object row) throws HiveException {
/*
* Once enough rows have been output, there is no need to process input rows.
*/
if (streamingState.rankLimitReached()) {
return null;
}
streamingState.rollingPart.append(row);
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) tableDef;
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
GenericUDAFEvaluator fnEval = wFn.getWFnEval();
int a = 0;
if (wFn.getArgs() != null) {
for (PTFExpressionDef arg : wFn.getArgs()) {
streamingState.funcArgs[i][a++] = arg.getExprEvaluator().evaluate(row);
}
}
if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
fnEval.aggregate(streamingState.aggBuffers[i], streamingState.funcArgs[i]);
Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
if (out != null) {
streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
}
} else {
int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame());
if (rowToProcess >= 0) {
Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
streamingState.fnOutputs[i].add(out);
}
}
}
List<Object> oRows = new ArrayList<Object>();
while (true) {
boolean hasRow = streamingState.hasOutputRow();
if (!hasRow) {
break;
}
oRows.add(streamingState.nextOutputRow());
}
return oRows.size() == 0 ? null : oRows;
}
use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.
the class WindowingTableFunction method iterator.
@SuppressWarnings("rawtypes")
@Override
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
ArrayList<Object> output = new ArrayList<Object>();
List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
PTFPartition iPart = pItr.getPartition();
int i = 0;
for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
boolean processWindow = processWindow(wFn.getWindowFrame());
pItr.reset();
if (!processWindow && !wFn.isPivotResult()) {
Object out = evaluateFunctionOnPartition(wFn, iPart);
output.add(out);
} else if (wFn.isPivotResult()) {
GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
if (strEval.getRowsRemainingAfterTerminate() == 0) {
wFn.setWFnEval(streamingEval);
if (wFn.getOI() instanceof ListObjectInspector) {
ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
wFn.setOI(listOI.getListElementObjectInspector());
}
output.add(null);
wFnsWithWindows.add(i);
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
output.add(null);
wFnsWithWindows.add(i);
}
i++;
}
for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
output.add(null);
}
if (wTFnDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
}
return new WindowingIterator(iPart, output, outputFromPivotFunctions, ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
}
use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.
the class WindowingTableFunction method setCanAcceptInputAsStream.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
* ()
*
* WindowTableFunction supports streaming if all functions meet one of these
* conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
* Or returns a non null Object for the getWindowingEvaluator, that implements
* ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
* So no Unbounded Preceding or Following.
*/
@SuppressWarnings("resource")
private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
canAcceptInputAsStream = false;
if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
return null;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
int startPos = Integer.MAX_VALUE;
int endPos = Integer.MIN_VALUE;
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
boolean streamingPossible = streamingPossible(cfg, wFnDef);
GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
continue;
}
BoundaryDef start = wdwFrame.getStart();
BoundaryDef end = wdwFrame.getEnd();
if (wdwFrame.getWindowType() == WindowType.ROWS) {
if (!end.isUnbounded() && !start.isUnbounded()) {
startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
continue;
}
}
return null;
}
int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
if (windowLimit < (endPos - startPos + 1)) {
return null;
}
canAcceptInputAsStream = true;
return new int[] { startPos, endPos };
}
use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.
the class WindowingTableFunction method finishPartition.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
*
* for fns that are not ISupportStreamingModeForWindowing give them the
* remaining rows (rows whose span went beyond the end of the partition) for
* rest of the functions invoke terminate.
*
* while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
* invoke getNextObj if there is no O/p then flag this as an error.
*/
@Override
public List<Object> finishPartition() throws HiveException {
/*
* Once enough rows have been output, there is no need to generate more output.
*/
if (streamingState.rankLimitReached()) {
return null;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
GenericUDAFEvaluator fnEval = wFn.getWFnEval();
int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
fnEval.terminate(streamingState.aggBuffers[i]);
WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
if (!wFnInfo.isSupportsWindow()) {
numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval).getRowsRemainingAfterTerminate();
}
if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
while (numRowsRemaining > 0) {
Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
if (out != null) {
streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
}
numRowsRemaining--;
}
}
} else {
while (numRowsRemaining > 0) {
int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
if (rowToProcess >= 0) {
Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
streamingState.fnOutputs[i].add(out);
}
numRowsRemaining--;
}
}
}
List<Object> oRows = new ArrayList<Object>();
while (!streamingState.rollingPart.processedAllRows() && !streamingState.rankLimitReached()) {
boolean hasRow = streamingState.hasOutputRow();
if (!hasRow && !streamingState.rankLimitReached()) {
throw new HiveException("Internal Error: cannot generate all output rows for a Partition");
}
if (hasRow) {
oRows.add(streamingState.nextOutputRow());
}
}
return oRows.size() == 0 ? null : oRows;
}
use of org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing in project hive by apache.
the class TestStreamingSum method _agg.
public static <T, TW> void _agg(GenericUDAFResolver fnR, TypeInfo[] inputTypes, Iterator<T> inVals, TypeHandler<T, TW> typeHandler, TW[] in, ObjectInspector[] inputOIs, int inSz, int numPreceding, int numFollowing, Iterator<T> outVals) throws HiveException {
GenericUDAFEvaluator fn = fnR.getEvaluator(inputTypes);
fn.init(Mode.COMPLETE, inputOIs);
fn = fn.getWindowingEvaluator(wdwFrame(numPreceding, numFollowing));
AggregationBuffer agg = fn.getNewAggregationBuffer();
ISupportStreamingModeForWindowing oS = (ISupportStreamingModeForWindowing) fn;
int outSz = 0;
while (inVals.hasNext()) {
typeHandler.set(inVals.next(), in[0]);
fn.aggregate(agg, in);
Object out = oS.getNextResult(agg);
if (out != null) {
if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
out = null;
} else {
try {
out = typeHandler.get((TW) out);
} catch (ClassCastException ce) {
}
}
Assert.assertEquals(out, outVals.next());
outSz++;
}
}
fn.terminate(agg);
while (outSz < inSz) {
Object out = oS.getNextResult(agg);
if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
out = null;
} else {
try {
out = typeHandler.get((TW) out);
} catch (ClassCastException ce) {
}
}
Assert.assertEquals(out, outVals.next());
outSz++;
}
}
Aggregations