use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFOperator method initializeOp.
/*
* 1. Find out if the operator is invoked at Map-Side or Reduce-side
* 2. Get the deserialized QueryDef
* 3. Reconstruct the transient variables in QueryDef
* 4. Create input partition to store rows coming from previous operator
*/
@Override
protected void initializeOp(Configuration jobConf) throws HiveException {
super.initializeOp(jobConf);
hiveConf = jobConf;
isMapOperator = conf.isMapSide();
currentKeys = null;
reconstructQueryDef(hiveConf);
if (isMapOperator) {
PartitionedTableFunctionDef tDef = conf.getStartOfChain();
outputObjInspector = tDef.getRawInputShape().getOI();
} else {
outputObjInspector = conf.getFuncDef().getOutputShape().getOI();
}
setupKeysWrapper(inputObjInspectors[0]);
ptfInvocation = setupChain();
ptfInvocation.initializeStreaming(jobConf, isMapOperator);
firstMapRow = true;
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFOperator method setupChain.
private PTFInvocation setupChain() {
Stack<PartitionedTableFunctionDef> fnDefs = new Stack<PartitionedTableFunctionDef>();
PTFInputDef iDef = conf.getFuncDef();
while (iDef instanceof PartitionedTableFunctionDef) {
fnDefs.push((PartitionedTableFunctionDef) iDef);
iDef = ((PartitionedTableFunctionDef) iDef).getInput();
}
PTFInvocation curr = null, first = null;
while (!fnDefs.isEmpty()) {
PartitionedTableFunctionDef currFn = fnDefs.pop();
curr = new PTFInvocation(curr, currFn.getTFunction());
if (first == null) {
first = curr;
}
}
return first;
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class Vectorizer method createVectorPTFDesc.
/*
* Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
* VectorizationContext to lookup column names, etc.
*/
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
final int functionCount = windowsFunctions.size();
List<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
/*
* Output columns.
*/
TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
DataTypePhysicalVariation[] reducerBatchDataTypePhysicalVariations = vContext.getAllDataTypePhysicalVariations();
// Evaluator results are first.
String[] outputColumnNames = new String[outputSize];
TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputSize];
for (int i = 0; i < functionCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = typeInfo;
outputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = functionCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = colInfo.getType();
outputDataTypePhysicalVariations[i] = reducerBatchDataTypePhysicalVariations[i - functionCount];
}
List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
final int partitionKeyCount = partitionExpressions.size();
ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
final int orderKeyCount = orderExpressions.size();
ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
// When there are PARTITION and ORDER BY clauses, will have different partition expressions.
// Otherwise, only order by expressions.
boolean isPartitionOrderBy = false;
if (partitionKeyCount != orderKeyCount) {
// Obviously different expressions.
isPartitionOrderBy = true;
} else {
// Check each ExprNodeDesc.
for (int i = 0; i < partitionKeyCount; i++) {
final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
isPartitionOrderBy = true;
break;
}
}
}
String[] evaluatorFunctionNames = new String[functionCount];
boolean[] evaluatorsAreDistinct = new boolean[functionCount];
WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorsAreDistinct, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos, reducerBatchDataTypePhysicalVariations);
vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
vectorPTFDesc.setEvaluatorsAreDistinct(evaluatorsAreDistinct);
vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
vectorPTFDesc.setOutputColumnNames(outputColumnNames);
vectorPTFDesc.setOutputTypeInfos(outputTypeInfos, outputDataTypePhysicalVariations);
vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFDeserializer method initializePTFChain.
public void initializePTFChain(PartitionedTableFunctionDef tblFnDef) throws HiveException {
Deque<PTFInputDef> ptfChain = new ArrayDeque<PTFInputDef>();
PTFInputDef currentDef = tblFnDef;
while (currentDef != null) {
ptfChain.push(currentDef);
currentDef = currentDef.getInput();
}
while (!ptfChain.isEmpty()) {
currentDef = ptfChain.pop();
if (currentDef instanceof PTFQueryInputDef) {
initialize((PTFQueryInputDef) currentDef, inputOI);
} else if (currentDef instanceof WindowTableFunctionDef) {
initializeWindowing((WindowTableFunctionDef) currentDef);
} else {
initialize((PartitionedTableFunctionDef) currentDef);
}
}
PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFDeserializer method alterOutputOIForStreaming.
/*
* If the final PTF in a PTFChain can stream its output, then set the OI of its OutputShape
* to the OI returned by the TableFunctionEvaluator.
*/
public static void alterOutputOIForStreaming(PTFDesc ptfDesc) {
PartitionedTableFunctionDef tDef = ptfDesc.getFuncDef();
TableFunctionEvaluator tEval = tDef.getTFunction();
if (tEval.canIterateOutput()) {
tDef.getOutputShape().setOI(tEval.getOutputOI());
}
}
Aggregations