use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class WindowingTableFunction method finishPartition.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
*
* for fns that are not ISupportStreamingModeForWindowing give them the
* remaining rows (rows whose span went beyond the end of the partition) for
* rest of the functions invoke terminate.
*
* while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
* invoke getNextObj if there is no O/p then flag this as an error.
*/
@Override
public List<Object> finishPartition() throws HiveException {
/*
* Once enough rows have been output, there is no need to generate more output.
*/
if (streamingState.rankLimitReached()) {
return null;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
GenericUDAFEvaluator fnEval = wFn.getWFnEval();
int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
fnEval.terminate(streamingState.aggBuffers[i]);
WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
if (!wFnInfo.isSupportsWindow()) {
numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval).getRowsRemainingAfterTerminate();
}
if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
while (numRowsRemaining > 0) {
Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
if (out != null) {
streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
}
numRowsRemaining--;
}
}
} else {
while (numRowsRemaining > 0) {
int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
if (rowToProcess >= 0) {
Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
streamingState.fnOutputs[i].add(out);
}
numRowsRemaining--;
}
}
}
List<Object> oRows = new ArrayList<Object>();
while (!streamingState.rollingPart.processedAllRows() && !streamingState.rankLimitReached()) {
boolean hasRow = streamingState.hasOutputRow();
if (!hasRow && !streamingState.rankLimitReached()) {
throw new HiveException("Internal Error: cannot generate all output rows for a Partition");
}
if (hasRow) {
oRows.add(streamingState.nextOutputRow());
}
}
return oRows.size() == 0 ? null : oRows;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = (PTFDesc) op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
if (!windowFrameDef.isStartUnbounded()) {
setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
return false;
}
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
if (!windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
return false;
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
if (exprNodeDescList != null) {
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
// COUNT, DENSE_RANK, and RANK do not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
return true;
}
Aggregations