use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.
the class SemanticAnalyzer method buildPTFReduceSinkDetails.
/**
* Construct the data structures containing ExprNodeDesc for partition
* columns and order columns. Use the input definition to construct the list
* of output columns for the ReduceSinkOperator
*
* @throws SemanticException
*/
void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, RowResolver inputRR, ArrayList<ExprNodeDesc> partCols, ArrayList<ExprNodeDesc> orderCols, StringBuilder orderString, StringBuilder nullOrderString) throws SemanticException {
List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
for (PTFExpressionDef colDef : partColList) {
ExprNodeDesc exprNode = colDef.getExprNode();
if (ExprNodeDescUtils.indexOf(exprNode, partCols) < 0) {
partCols.add(exprNode);
orderCols.add(exprNode);
orderString.append('+');
nullOrderString.append('a');
}
}
/*
* Order columns are used as key columns for constructing
* the ReduceSinkOperator
* Since we do not explicitly add these to outputColumnNames,
* we need to set includeKeyCols = false while creating the
* ReduceSinkDesc
*/
List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
for (int i = 0; i < orderColList.size(); i++) {
OrderExpressionDef colDef = orderColList.get(i);
char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
char nullOrderChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols);
if (index >= 0) {
orderString.setCharAt(index, orderChar);
nullOrderString.setCharAt(index, nullOrderChar);
continue;
}
orderCols.add(colDef.getExprNode());
orderString.append(orderChar);
nullOrderString.append(nullOrderChar);
}
}
use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.
the class SemanticAnalyzer method buildPTFReduceSinkDetails.
/**
* Construct the data structures containing ExprNodeDesc for partition
* columns and order columns. Use the input definition to construct the list
* of output columns for the ReduceSinkOperator
*/
private void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, List<ExprNodeDesc> partCols, List<ExprNodeDesc> orderCols, StringBuilder orderString, StringBuilder nullOrderString) {
List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
for (PTFExpressionDef colDef : partColList) {
ExprNodeDesc exprNode = colDef.getExprNode();
if (ExprNodeDescUtils.indexOf(exprNode, partCols) < 0) {
partCols.add(exprNode);
orderCols.add(exprNode);
orderString.append('+');
nullOrderString.append('a');
}
}
/*
* Order columns are used as key columns for constructing
* the ReduceSinkOperator
* Since we do not explicitly add these to outputColumnNames,
* we need to set includeKeyCols = false while creating the
* ReduceSinkDesc
*/
List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
for (OrderExpressionDef colDef : orderColList) {
char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
char nullOrderChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols);
if (index >= 0) {
orderString.setCharAt(index, orderChar);
nullOrderString.setCharAt(index, nullOrderChar);
continue;
}
orderCols.add(colDef.getExprNode());
orderString.append(orderChar);
nullOrderString.append(nullOrderChar);
}
}
use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.
the class Vectorizer method createVectorPTFDesc.
/*
* Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
* VectorizationContext to lookup column names, etc.
*/
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
final int functionCount = windowsFunctions.size();
List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
/*
* Output columns.
*/
TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
// Evaluator results are first.
String[] outputColumnNames = new String[outputSize];
TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
for (int i = 0; i < functionCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = typeInfo;
outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = functionCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = colInfo.getType();
outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
}
List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
final int partitionKeyCount = partitionExpressions.size();
ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
final int orderKeyCount = orderExpressions.size();
ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
// When there are PARTITION and ORDER BY clauses, will have different partition expressions.
// Otherwise, only order by expressions.
boolean isPartitionOrderBy = false;
if (partitionKeyCount != orderKeyCount) {
// Obviously different expressions.
isPartitionOrderBy = true;
} else {
// Check each ExprNodeDesc.
for (int i = 0; i < partitionKeyCount; i++) {
final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
isPartitionOrderBy = true;
break;
}
}
}
String[] evaluatorFunctionNames = new String[functionCount];
boolean[] evaluatorsAreDistinct = new boolean[functionCount];
WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
vectorPTFDesc.setOutputColumnNames(outputColumnNames);
vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.
the class PTFDeserializer method initializeWindowing.
public void initializeWindowing(WindowTableFunctionDef def) throws HiveException {
ShapeDetails inpShape = def.getInput().getOutputShape();
/*
* 1. setup resolve, make connections
*/
TableFunctionEvaluator tEval = def.getTFunction();
WindowingTableFunctionResolver tResolver = (WindowingTableFunctionResolver) constructResolver(def.getResolverClassName());
tResolver.initialize(hConf, ptfDesc, def, tEval);
/*
* 2. initialize WFns.
*/
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
if (wFnDef.getArgs() != null) {
for (PTFExpressionDef arg : wFnDef.getArgs()) {
initialize(arg, inpShape);
}
}
if (wFnDef.getWindowFrame() != null) {
WindowFrameDef wFrmDef = wFnDef.getWindowFrame();
initialize(wFrmDef, inpShape);
}
setupWdwFnEvaluator(wFnDef);
}
ArrayList<String> aliases = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFDeserializer.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
tResolver.setWdwProcessingOutputOI(wdwOutOI);
initialize(def.getOutputShape(), wdwOutOI);
tResolver.initializeOutputOI();
}
use of org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef in project hive by apache.
the class PTFDeserializer method setupWdwFnEvaluator.
static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException {
List<PTFExpressionDef> args = def.getArgs();
List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
ObjectInspector[] funcArgOIs = null;
if (args != null) {
for (PTFExpressionDef arg : args) {
argOIs.add(arg.getOI());
}
funcArgOIs = new ObjectInspector[args.size()];
funcArgOIs = argOIs.toArray(funcArgOIs);
}
GenericUDAFEvaluator wFnEval = def.getWFnEval();
ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs);
def.setWFnEval(wFnEval);
def.setOI(OI);
}
Aggregations