use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc in project hive by apache.
the class Vectorizer method validateAndVectorizeOperator.
public Operator<? extends OperatorDesc> validateAndVectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException, VectorizerCannotVectorizeException {
Operator<? extends OperatorDesc> vectorOp = null;
// This "global" allows various validation methods to set the "not vectorized" reason.
currentOperator = op;
boolean isNative;
try {
switch(op.getType()) {
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
if (!validateMapJoinOperator((MapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else if (op instanceof SMBMapJoinOperator) {
if (!validateSMBMapJoinOperator((SMBMapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else {
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
if (op instanceof MapJoinOperator) {
MapJoinDesc desc = (MapJoinDesc) op.getConf();
int joinType = desc.getConds()[0].getType();
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinDesc);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
if (joinType == JoinDesc.FULL_OUTER_JOIN) {
setOperatorIssue("Vectorized & filtered full-outer joins not supported");
throw new VectorizerCannotVectorizeException();
}
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), desc, vContext, vectorMapJoinDesc);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
// Check additional constraint.
if (smbJoinSinkDesc.getFilterMap() != null) {
setOperatorIssue("FilterMaps not supported for Vector Pass-Thru SMB MapJoin");
throw new VectorizerCannotVectorizeException();
}
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext, vectorSMBJoinDesc);
isNative = false;
}
}
break;
case REDUCESINK:
{
if (!validateReduceSinkOperator((ReduceSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
ReduceSinkDesc reduceDesc = (ReduceSinkDesc) op.getConf();
VectorReduceSinkDesc vectorReduceSinkDesc = new VectorReduceSinkDesc();
boolean specialize = canSpecializeReduceSink(reduceDesc, isTezOrSpark, vContext, vectorReduceSinkDesc);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), reduceDesc, vContext, vectorReduceSinkDesc);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, reduceDesc, vectorReduceSinkDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorReduceSinkInfo vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo();
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
if (!validateFilterOperator((FilterOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
vectorOp = vectorizeFilterOperator(op, vContext, vectorFilterDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case TOPNKEY:
{
if (!validateTopNKeyOperator((TopNKeyOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorTopNKeyDesc vectorTopNKeyDesc = new VectorTopNKeyDesc();
vectorOp = vectorizeTopNKeyOperator(op, vContext, vectorTopNKeyDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression[] keyExpressions = vectorTopNKeyDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(keyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
if (!validateSelectOperator((SelectOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
vectorOp = vectorizeSelectOperator(op, vContext, vectorSelectDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
// The validateGroupByOperator method will update vectorGroupByDesc.
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
if (!validateGroupByOperator((GroupByOperator) op, isReduce, isTezOrSpark, vectorGroupByDesc)) {
throw new VectorizerCannotVectorizeException();
}
ImmutablePair<Operator<? extends OperatorDesc>, String> pair = doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc);
if (pair.left == null) {
setOperatorIssue(pair.right);
throw new VectorizerCannotVectorizeException();
}
vectorOp = pair.left;
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs();
for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) {
if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
if (!validateFileSinkOperator((FileSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
boolean isArrowSpecialization = checkForArrowFileSink(fileSinkDesc, isTezOrSpark, vContext, vectorFileSinkDesc);
if (isArrowSpecialization) {
vectorOp = specializeArrowFileSinkOperator(op, vContext, fileSinkDesc, vectorFileSinkDesc);
isNative = true;
} else {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext, vectorFileSinkDesc);
isNative = false;
}
}
break;
case LIMIT:
{
// No validation.
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext, vectorLimitDesc);
isNative = true;
}
break;
case EVENT:
{
// No validation.
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext, vectorEventDesc);
isNative = true;
}
break;
case PTF:
{
// The validatePTFOperator method will update vectorPTFDesc.
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
if (!validatePTFOperator((PTFOperator) op, vContext, vectorPTFDesc)) {
throw new VectorizerCannotVectorizeException();
}
vectorOp = vectorizePTFOperator(op, vContext, vectorPTFDesc);
isNative = true;
}
break;
case HASHTABLESINK:
{
// No validation.
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext, vectorSparkHashTableSinkDesc);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
// No validation.
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext, vectorSparkPartitionPruningSinkDesc);
// need to maintain the unique ID so that target map works can
// read the output
((SparkPartitionPruningSinkOperator) vectorOp).setUniqueId(((SparkPartitionPruningSinkOperator) op).getUniqueId());
isNative = true;
}
break;
default:
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
} catch (HiveException e) {
setOperatorIssue(e.getMessage());
throw new VectorizerCannotVectorizeException();
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
// These operators need to be linked to enable runtime statistics to be gathered/used correctly
planMapper.link(op, vectorOp);
return vectorOp;
}
use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean[] distinctEvaluator = vectorPTFDesc.getEvaluatorsAreDistinct();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
if (distinctEvaluator[i] && !supportedFunctionType.isSupportDistinct()) {
setOperatorIssue(functionName + " distinct is not supported ");
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
final boolean isSingleParameter = (exprNodeDescList != null && exprNodeDescList.size() == 1);
final ExprNodeDesc singleExprNodeDesc = (isSingleParameter ? exprNodeDescList.get(0) : null);
final TypeInfo singleTypeInfo = (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null);
final PrimitiveCategory singlePrimitiveCategory = (singleTypeInfo instanceof PrimitiveTypeInfo ? ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null);
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
{
boolean isRowEndCurrent = (windowFrameDef.getEnd().isCurrentRow() && (supportedFunctionType == SupportedFunctionType.AVG || supportedFunctionType == SupportedFunctionType.MAX || supportedFunctionType == SupportedFunctionType.MIN || supportedFunctionType == SupportedFunctionType.SUM) && isSingleParameter && singlePrimitiveCategory != null);
if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is required for ROWS window type");
return false;
}
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
// RANK/DENSE_RANK don't care about columns.
if (supportedFunctionType != SupportedFunctionType.RANK && supportedFunctionType != SupportedFunctionType.DENSE_RANK) {
if (exprNodeDescList != null) {
// LEAD and LAG now supports multiple arguments in vectorized mode
if (exprNodeDescList.size() > 1 && supportedFunctionType != SupportedFunctionType.LAG && supportedFunctionType != SupportedFunctionType.LEAD) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT) {
// COUNT does not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
if (vectorPTFDesc.getOrderExprNodeDescs().length > 1) {
/*
* Currently, we need to rule out here all cases where a range boundary scanner can run,
* basically: 1. bounded start 2. bounded end which is not current row
*/
if (windowFrameDef.getWindowType() == WindowType.RANGE && (!windowFrameDef.isStartUnbounded() || !(windowFrameDef.getEnd().isCurrentRow() || windowFrameDef.isEndUnbounded()))) {
setOperatorIssue("Multi-column ordered RANGE boundary scanner is not supported in vectorized mode (window: " + windowFrameDef + ")");
return false;
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc in project hive by apache.
the class Vectorizer method vectorizePTFOperator.
/*
* NOTE: The VectorPTFDesc has already been allocated and populated.
*/
public static Operator<? extends OperatorDesc> vectorizePTFOperator(Operator<? extends OperatorDesc> ptfOp, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf();
VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext, vectorPTFDesc);
vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo);
Class<? extends Operator<?>> opClass = VectorPTFOperator.class;
return OperatorFactory.getVectorOperator(opClass, ptfOp.getCompilationOpContext(), ptfOp.getConf(), vContext, vectorPTFDesc);
}
use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc in project hive by apache.
the class TestVectorPTFGroupBatches method getFakeOperator.
private VectorPTFOperator getFakeOperator() throws HiveException {
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
vectorPTFDesc.setVectorPTFInfo(new VectorPTFInfo());
vectorPTFDesc.setOutputColumnNames(new String[0]);
vectorPTFDesc.setEvaluatorFunctionNames(new String[0]);
return new VectorPTFOperator(new CompilationOpContext(), new PTFDesc(), new VectorizationContext("fake"), vectorPTFDesc);
}
Aggregations