use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class PTFTranslator method translate.
private WindowFunctionDef translate(WindowTableFunctionDef wdwTFnDef, WindowFunctionSpec spec) throws SemanticException {
WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(spec.getName());
if (wFnInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(spec.getName()));
}
WindowFunctionDef def = new WindowFunctionDef();
def.setName(spec.getName());
def.setAlias(spec.getAlias());
def.setDistinct(spec.isDistinct());
def.setExpressionTreeString(spec.getExpression().toStringTree());
def.setStar(spec.isStar());
def.setPivotResult(wFnInfo.isPivotResult());
def.setRespectNulls(spec.isRespectNulls());
ShapeDetails inpShape = wdwTFnDef.getRawInputShape();
/*
* translate args
*/
ArrayList<ASTNode> args = spec.getArgs();
if (args != null) {
for (ASTNode expr : args) {
PTFExpressionDef argDef = null;
try {
argDef = buildExpressionDef(inpShape, expr);
} catch (HiveException he) {
throw new SemanticException(he);
}
def.addArg(argDef);
}
}
if (FunctionRegistry.isRankingFunction(spec.getName())) {
setupRankingArgs(wdwTFnDef, def, spec);
}
WindowSpec wdwSpec = spec.getWindowSpec();
if (wdwSpec != null) {
String desc = spec.toString();
WindowFrameDef wdwFrame = translate(spec.getName(), inpShape, wdwSpec);
if (!wFnInfo.isSupportsWindow()) {
BoundarySpec start = wdwSpec.getWindowFrame().getStart();
if (start.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
throw new SemanticException(String.format("Expecting left window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
}
BoundarySpec end = wdwSpec.getWindowFrame().getEnd();
if (end.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
throw new SemanticException(String.format("Expecting right window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
}
}
def.setWindowFrame(wdwFrame);
}
try {
setupWdwFnEvaluator(def);
} catch (HiveException he) {
throw new SemanticException(he);
}
return def;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean[] distinctEvaluator = vectorPTFDesc.getEvaluatorsAreDistinct();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
if (distinctEvaluator[i] && !supportedFunctionType.isSupportDistinct()) {
setOperatorIssue(functionName + " distinct is not supported ");
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
final boolean isSingleParameter = (exprNodeDescList != null && exprNodeDescList.size() == 1);
final ExprNodeDesc singleExprNodeDesc = (isSingleParameter ? exprNodeDescList.get(0) : null);
final TypeInfo singleTypeInfo = (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null);
final PrimitiveCategory singlePrimitiveCategory = (singleTypeInfo instanceof PrimitiveTypeInfo ? ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null);
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
{
boolean isRowEndCurrent = (windowFrameDef.getEnd().isCurrentRow() && (supportedFunctionType == SupportedFunctionType.AVG || supportedFunctionType == SupportedFunctionType.MAX || supportedFunctionType == SupportedFunctionType.MIN || supportedFunctionType == SupportedFunctionType.SUM) && isSingleParameter && singlePrimitiveCategory != null);
if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is required for ROWS window type");
return false;
}
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
// RANK/DENSE_RANK don't care about columns.
if (supportedFunctionType != SupportedFunctionType.RANK && supportedFunctionType != SupportedFunctionType.DENSE_RANK) {
if (exprNodeDescList != null) {
// LEAD and LAG now supports multiple arguments in vectorized mode
if (exprNodeDescList.size() > 1 && supportedFunctionType != SupportedFunctionType.LAG && supportedFunctionType != SupportedFunctionType.LEAD) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT) {
// COUNT does not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
if (vectorPTFDesc.getOrderExprNodeDescs().length > 1) {
/*
* Currently, we need to rule out here all cases where a range boundary scanner can run,
* basically: 1. bounded start 2. bounded end which is not current row
*/
if (windowFrameDef.getWindowType() == WindowType.RANGE && (!windowFrameDef.isStartUnbounded() || !(windowFrameDef.getEnd().isCurrentRow() || windowFrameDef.isEndUnbounded()))) {
setOperatorIssue("Multi-column ordered RANGE boundary scanner is not supported in vectorized mode (window: " + windowFrameDef + ")");
return false;
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class WindowingTableFunction method initializeStreaming.
@Override
public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
int[] span = setCanAcceptInputAsStream(cfg);
if (!canAcceptInputAsStream) {
return;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
if (streamingEval != null) {
wFnDef.setWFnEval(streamingEval);
if (wFnDef.isPivotResult()) {
ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
wFnDef.setOI(listOI.getListElementObjectInspector());
}
}
}
if (tabDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
}
streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class WindowingTableFunction method setCanAcceptInputAsStream.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
* ()
*
* WindowTableFunction supports streaming if all functions meet one of these
* conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
* Or returns a non null Object for the getWindowingEvaluator, that implements
* ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
* So no Unbounded Preceding or Following.
*/
@SuppressWarnings("resource")
private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
canAcceptInputAsStream = false;
if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
return null;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
int startPos = Integer.MAX_VALUE;
int endPos = Integer.MIN_VALUE;
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
boolean streamingPossible = streamingPossible(cfg, wFnDef);
GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
continue;
}
BoundaryDef start = wdwFrame.getStart();
BoundaryDef end = wdwFrame.getEnd();
if (wdwFrame.getWindowType() == WindowType.ROWS) {
if (!end.isUnbounded() && !start.isUnbounded()) {
startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
continue;
}
}
return null;
}
int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
if (windowLimit < (endPos - startPos + 1)) {
return null;
}
canAcceptInputAsStream = true;
return new int[] { startPos, endPos };
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class TestVectorPTFGroupBatches method init.
/* inititialized to have state which is present in case of the following query:
* select p_mfgr, p_name, rowindex,
* count(*) over(partition by p_mfgr order by p_date range between 1 preceding and current row) as cs1,
* count(*) over(partition by p_mfgr order by p_date range between 3 preceding and current row) as cs2
* from vector_ptf_part_simple_orc;
*/
private void init(VectorPTFGroupBatches groupBatches) throws HiveException {
int[] outputProjectionColumnMap = new int[] { 3, 4, 0, 2, 1 };
TypeInfo[] outputTypeInfos = new TypeInfo[] { getTypeInfo("bigint"), getTypeInfo("bigint"), getTypeInfo("string"), getTypeInfo("string"), getTypeInfo("int") };
groupBatches.init(/* evaluators */
new VectorPTFEvaluatorBase[] { new VectorPTFEvaluatorCountStar(new WindowFrameDef(WindowType.RANGE, new BoundaryDef(Direction.PRECEDING, 1), new BoundaryDef(Direction.CURRENT, 0)), null, 2), new VectorPTFEvaluatorCountStar(new WindowFrameDef(WindowType.RANGE, new BoundaryDef(Direction.PRECEDING, 3), new BoundaryDef(Direction.CURRENT, 0)), null, 3) }, /* outputProjectionColumnMap */
outputProjectionColumnMap, /* bufferedColumnMap */
new int[] { 1, 2 }, /* bufferedTypeInfos */
new TypeInfo[] { getTypeInfo("int"), getTypeInfo("string") }, // p_date
new int[] { 1 }, // p_mfgr
new int[] { 0 }, /* keyWithoutOrderColumnMap */
getFakeOperator().setupOverflowBatch(3, new String[] { "bigint", "bigint" }, outputProjectionColumnMap, outputTypeInfos));
}
Aggregations