use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class WindowingTableFunction method startPartition.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#startPartition()
*/
@Override
public void startPartition() throws HiveException {
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
streamingState.reset(tabDef);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class Vectorizer method createVectorPTFDesc.
/*
* Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
* VectorizationContext to lookup column names, etc.
*/
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
final int functionCount = windowsFunctions.size();
ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
/*
* Output columns.
*/
// Evaluator results are first.
String[] outputColumnNames = new String[outputSize];
TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
for (int i = 0; i < functionCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = typeInfo;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = functionCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = colInfo.getType();
}
List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
final int partitionKeyCount = partitionExpressions.size();
ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
final int orderKeyCount = orderExpressions.size();
ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
// When there are PARTITION and ORDER BY clauses, will have different partition expressions.
// Otherwise, only order by expressions.
boolean isPartitionOrderBy = false;
if (partitionKeyCount != orderKeyCount) {
// Obviously different expressions.
isPartitionOrderBy = true;
} else {
// Check each ExprNodeDesc.
for (int i = 0; i < partitionKeyCount; i++) {
final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
isPartitionOrderBy = true;
break;
}
}
}
String[] evaluatorFunctionNames = new String[functionCount];
WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos);
vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
vectorPTFDesc.setOutputColumnNames(outputColumnNames);
vectorPTFDesc.setOutputTypeInfos(outputTypeInfos);
vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class PTFTranslator method translate.
public PTFDesc translate(WindowingSpec wdwSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException {
init(semAly, hCfg, inputRR, unparseT);
windowingSpec = wdwSpec;
ptfDesc = new PTFDesc();
ptfDesc.setCfg(hCfg);
ptfDesc.setLlInfo(llInfo);
WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef();
ptfDesc.setFuncDef(wdwTFnDef);
PTFQueryInputSpec inpSpec = new PTFQueryInputSpec();
inpSpec.setType(PTFQueryInputType.WINDOWING);
wdwTFnDef.setInput(translate(inpSpec, 0));
ShapeDetails inpShape = wdwTFnDef.getInput().getOutputShape();
WindowingTableFunctionResolver tFn = (WindowingTableFunctionResolver) FunctionRegistry.getTableFunctionResolver(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
if (tFn == null) {
throw new SemanticException(String.format("Internal Error: Unknown Table Function %s", FunctionRegistry.WINDOWING_TABLE_FUNCTION));
}
wdwTFnDef.setName(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
wdwTFnDef.setResolverClassName(tFn.getClass().getName());
wdwTFnDef.setAlias("ptf_" + 1);
wdwTFnDef.setExpressionTreeString(null);
wdwTFnDef.setTransformsRawInput(false);
tFn.initialize(hCfg, ptfDesc, wdwTFnDef);
TableFunctionEvaluator tEval = tFn.getEvaluator();
wdwTFnDef.setTFunction(tEval);
wdwTFnDef.setCarryForwardNames(tFn.carryForwardNames());
wdwTFnDef.setRawInputShape(inpShape);
PartitioningSpec partiSpec = wdwSpec.getQueryPartitioningSpec();
if (partiSpec == null) {
throw new SemanticException("Invalid use of Windowing: there is no Partitioning associated with Windowing");
}
PartitionDef partDef = translate(inpShape, wdwSpec.getQueryPartitionSpec());
OrderDef ordDef = translate(inpShape, wdwSpec.getQueryOrderSpec(), partDef);
wdwTFnDef.setPartition(partDef);
wdwTFnDef.setOrder(ordDef);
/*
* process Wdw functions
*/
ArrayList<WindowFunctionDef> windowFunctions = new ArrayList<WindowFunctionDef>();
if (wdwSpec.getWindowExpressions() != null) {
for (WindowExpressionSpec expr : wdwSpec.getWindowExpressions()) {
if (expr instanceof WindowFunctionSpec) {
WindowFunctionDef wFnDef = translate(wdwTFnDef, (WindowFunctionSpec) expr);
windowFunctions.add(wFnDef);
}
}
wdwTFnDef.setWindowFunctions(windowFunctions);
}
/*
* set outputFromWdwFnProcessing
*/
ArrayList<String> aliases = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (WindowFunctionDef wFnDef : windowFunctions) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFTranslator.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
tFn.setWdwProcessingOutputOI(wdwOutOI);
RowResolver wdwOutRR = buildRowResolverForWindowing(wdwTFnDef);
ShapeDetails wdwOutShape = setupShape(wdwOutOI, null, wdwOutRR);
wdwTFnDef.setOutputShape(wdwOutShape);
tFn.setupOutputOI();
PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
return ptfDesc;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class WindowingTableFunction method initializeStreaming.
@Override
public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
int[] span = setCanAcceptInputAsStream(cfg);
if (!canAcceptInputAsStream) {
return;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
if (streamingEval != null) {
wFnDef.setWFnEval(streamingEval);
if (wFnDef.isPivotResult()) {
ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
wFnDef.setOI(listOI.getListElementObjectInspector());
}
}
}
if (tabDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
}
streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef in project hive by apache.
the class WindowingTableFunction method setCanAcceptInputAsStream.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
* ()
*
* WindowTableFunction supports streaming if all functions meet one of these
* conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
* Or returns a non null Object for the getWindowingEvaluator, that implements
* ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
* So no Unbounded Preceding or Following.
*/
@SuppressWarnings("resource")
private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
canAcceptInputAsStream = false;
if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
return null;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
int startPos = Integer.MAX_VALUE;
int endPos = Integer.MIN_VALUE;
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
boolean streamingPossible = streamingPossible(cfg, wFnDef);
GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval.getWindowingEvaluator(wdwFrame) : null;
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
continue;
}
BoundaryDef start = wdwFrame.getStart();
BoundaryDef end = wdwFrame.getEnd();
if (wdwFrame.getWindowType() == WindowType.ROWS) {
if (!end.isUnbounded() && !start.isUnbounded()) {
startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
continue;
}
}
return null;
}
int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
if (windowLimit < (endPos - startPos + 1)) {
return null;
}
canAcceptInputAsStream = true;
return new int[] { startPos, endPos };
}
Aggregations