use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class VectorPTFDesc method getEvaluators.
public static VectorPTFEvaluatorBase[] getEvaluators(VectorPTFDesc vectorPTFDesc, VectorPTFInfo vectorPTFInfo) {
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
boolean[] evaluatorsAreDistinct = vectorPTFDesc.getEvaluatorsAreDistinct();
int evaluatorCount = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
VectorExpression[][] evaluatorInputExpressions = vectorPTFInfo.getEvaluatorInputExpressions();
Type[][] evaluatorInputColumnVectorTypes = vectorPTFInfo.getEvaluatorInputColumnVectorTypes();
int[] outputColumnMap = vectorPTFInfo.getOutputColumnMap();
VectorPTFEvaluatorBase[] evaluators = new VectorPTFEvaluatorBase[evaluatorCount];
for (int i = 0; i < evaluatorCount; i++) {
String functionName = evaluatorFunctionNames[i];
boolean isDistinct = evaluatorsAreDistinct[i];
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
VectorExpression[] inputVectorExpressions = evaluatorInputExpressions[i];
final Type[] columnVectorTypes = evaluatorInputColumnVectorTypes[i];
// The output* arrays start at index 0 for output evaluator aggregations.
final int outputColumnNum = outputColumnMap[i];
VectorPTFEvaluatorBase evaluator = VectorPTFDesc.getEvaluator(functionType, isDistinct, windowFrameDef, columnVectorTypes, inputVectorExpressions, outputColumnNum);
evaluators[i] = evaluator;
}
return evaluators;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class WindowingTableFunction method streamingPossible.
private boolean streamingPossible(Configuration cfg, WindowFunctionDef wFnDef) throws HiveException {
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFnDef.getName());
if (!wFnInfo.isSupportsWindow()) {
return true;
}
BoundaryDef start = wdwFrame.getStart();
BoundaryDef end = wdwFrame.getEnd();
/*
* Currently we are not handling dynamic sized windows implied by range
* based windows.
*/
if (wdwFrame.getWindowType() == WindowType.RANGE) {
return false;
}
/*
* Windows that are unbounded following don't benefit from Streaming.
*/
if (end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
return false;
}
/*
* let function decide if it can handle this special case.
*/
if (start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
return true;
}
int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
if (windowLimit < (start.getAmt() + end.getAmt() + 1)) {
return false;
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class PTFDeserializer method initializeWindowing.
public void initializeWindowing(WindowTableFunctionDef def) throws HiveException {
ShapeDetails inpShape = def.getInput().getOutputShape();
/*
* 1. setup resolve, make connections
*/
TableFunctionEvaluator tEval = def.getTFunction();
WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) constructResolver(def.getResolverClassName());
tResolver.initialize(hConf, ptfDesc, def, tEval);
/*
* 2. initialize WFns.
*/
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
if (wFnDef.getArgs() != null) {
for (PTFExpressionDef arg : wFnDef.getArgs()) {
initialize(arg, inpShape);
}
}
if (wFnDef.getWindowFrame() != null) {
WindowFrameDef wFrmDef = wFnDef.getWindowFrame();
initialize(wFrmDef, inpShape);
}
setupWdwFnEvaluator(wFnDef);
}
ArrayList<String> aliases = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFDeserializer.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
tResolver.setWdwProcessingOutputOI(wdwOutOI);
initialize(def.getOutputShape(), wdwOutOI);
tResolver.initializeOutputOI();
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class Vectorizer method createVectorPTFDesc.
/*
* Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
* VectorizationContext to lookup column names, etc.
*/
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
final int functionCount = windowsFunctions.size();
List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
/*
* Output columns.
*/
TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
// Evaluator results are first.
String[] outputColumnNames = new String[outputSize];
TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
for (int i = 0; i < functionCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = typeInfo;
outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = functionCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = colInfo.getType();
outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
}
List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
final int partitionKeyCount = partitionExpressions.size();
ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
final int orderKeyCount = orderExpressions.size();
ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
// When there are PARTITION and ORDER BY clauses, will have different partition expressions.
// Otherwise, only order by expressions.
boolean isPartitionOrderBy = false;
if (partitionKeyCount != orderKeyCount) {
// Obviously different expressions.
isPartitionOrderBy = true;
} else {
// Check each ExprNodeDesc.
for (int i = 0; i < partitionKeyCount; i++) {
final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
isPartitionOrderBy = true;
break;
}
}
}
String[] evaluatorFunctionNames = new String[functionCount];
boolean[] evaluatorsAreDistinct = new boolean[functionCount];
WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
vectorPTFDesc.setOutputColumnNames(outputColumnNames);
vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef in project hive by apache.
the class PTFTranslator method translate.
private WindowFrameDef translate(ShapeDetails inpShape, WindowFrameSpec spec, List<OrderExpression> orderExpressions) throws SemanticException {
if (spec == null) {
return null;
}
BoundarySpec s = spec.getStart();
BoundarySpec e = spec.getEnd();
int cmp = s.compareTo(e);
if (cmp > 0) {
throw new SemanticException(String.format("Window range invalid, start boundary is greater than end boundary: %s", spec));
}
WindowFrameDef winFrame = new WindowFrameDef(spec.getWindowType(), new BoundaryDef(s.direction, s.getAmt()), new BoundaryDef(e.direction, e.getAmt()));
if (winFrame.getWindowType() == WindowType.RANGE) {
winFrame.setOrderDef(buildOrderExpressions(inpShape, orderExpressions));
}
return winFrame;
}
Aggregations