use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType in project hive by apache.
the class Vectorizer method createVectorPTFInfo.
/*
* Create the additional vectorization PTF information needed by the VectorPTFOperator during
* execution.
*/
private static VectorPTFInfo createVectorPTFInfo(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
ExprNodeDesc[] orderExprNodeDescs = vectorPTFDesc.getOrderExprNodeDescs();
ExprNodeDesc[] partitionExprNodeDescs = vectorPTFDesc.getPartitionExprNodeDescs();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int evaluatorCount = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
/*
* Output columns.
*/
int[] outputColumnProjectionMap = new int[outputSize];
// Evaluator results are first.
for (int i = 0; i < evaluatorCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
final int outputColumnNum;
outputColumnNum = vContext.allocateScratchColumn(typeInfo);
outputColumnProjectionMap[i] = outputColumnNum;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = evaluatorCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnProjectionMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName());
}
/*
* Partition and order by.
*/
int[] partitionColumnMap;
Type[] partitionColumnVectorTypes;
VectorExpression[] partitionExpressions;
if (!isPartitionOrderBy) {
partitionColumnMap = null;
partitionColumnVectorTypes = null;
partitionExpressions = null;
} else {
final int partitionKeyCount = partitionExprNodeDescs.length;
partitionColumnMap = new int[partitionKeyCount];
partitionColumnVectorTypes = new Type[partitionKeyCount];
partitionExpressions = new VectorExpression[partitionKeyCount];
for (int i = 0; i < partitionKeyCount; i++) {
VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]);
TypeInfo typeInfo = partitionExpression.getOutputTypeInfo();
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
partitionColumnVectorTypes[i] = columnVectorType;
partitionColumnMap[i] = partitionExpression.getOutputColumnNum();
partitionExpressions[i] = partitionExpression;
}
}
final int orderKeyCount = orderExprNodeDescs.length;
int[] orderColumnMap = new int[orderKeyCount];
Type[] orderColumnVectorTypes = new Type[orderKeyCount];
VectorExpression[] orderExpressions = new VectorExpression[orderKeyCount];
for (int i = 0; i < orderKeyCount; i++) {
VectorExpression orderExpression = vContext.getVectorExpression(orderExprNodeDescs[i]);
TypeInfo typeInfo = orderExpression.getOutputTypeInfo();
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
orderColumnVectorTypes[i] = columnVectorType;
orderColumnMap[i] = orderExpression.getOutputColumnNum();
orderExpressions[i] = orderExpression;
}
ArrayList<Integer> keyInputColumns = new ArrayList<Integer>();
ArrayList<Integer> nonKeyInputColumns = new ArrayList<Integer>();
determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns);
int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0]));
int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0]));
VectorExpression[] evaluatorInputExpressions = new VectorExpression[evaluatorCount];
Type[] evaluatorInputColumnVectorTypes = new Type[evaluatorCount];
for (int i = 0; i < evaluatorCount; i++) {
String functionName = evaluatorFunctionNames[i];
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
VectorExpression inputVectorExpression;
final Type columnVectorType;
if (exprNodeDescList != null) {
// Validation has limited evaluatorInputExprNodeDescLists to size 1.
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
// Determine input vector expression using the VectorizationContext.
inputVectorExpression = vContext.getVectorExpression(exprNodeDesc);
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
} else {
inputVectorExpression = null;
columnVectorType = ColumnVector.Type.NONE;
}
evaluatorInputExpressions[i] = inputVectorExpression;
evaluatorInputColumnVectorTypes[i] = columnVectorType;
}
VectorPTFInfo vectorPTFInfo = new VectorPTFInfo();
vectorPTFInfo.setOutputColumnMap(outputColumnProjectionMap);
vectorPTFInfo.setPartitionColumnMap(partitionColumnMap);
vectorPTFInfo.setPartitionColumnVectorTypes(partitionColumnVectorTypes);
vectorPTFInfo.setPartitionExpressions(partitionExpressions);
vectorPTFInfo.setOrderColumnMap(orderColumnMap);
vectorPTFInfo.setOrderColumnVectorTypes(orderColumnVectorTypes);
vectorPTFInfo.setOrderExpressions(orderExpressions);
vectorPTFInfo.setEvaluatorInputExpressions(evaluatorInputExpressions);
vectorPTFInfo.setEvaluatorInputColumnVectorTypes(evaluatorInputColumnVectorTypes);
vectorPTFInfo.setKeyInputColumnMap(keyInputColumnMap);
vectorPTFInfo.setNonKeyInputColumnMap(nonKeyInputColumnMap);
return vectorPTFInfo;
}
use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = (PTFDesc) op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
if (!windowFrameDef.isStartUnbounded()) {
setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
return false;
}
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
if (!windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
return false;
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
if (exprNodeDescList != null) {
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
// COUNT, DENSE_RANK, and RANK do not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
return true;
}
Aggregations