use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method getBetweenFilterExpression.
/* Get a [NOT] BETWEEN filter expression. This is treated as a special case
* because the NOT is actually specified in the expression tree as the first argument,
* and we don't want any runtime cost for that. So creating the VectorExpression
* needs to be done differently than the standard way where all arguments are
* passed to the VectorExpression constructor.
*/
private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
// knows to revert to row-at-a-time execution.
return null;
}
boolean hasDynamicValues = false;
// We don't currently support the BETWEEN ends being columns. They must be scalars.
if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
hasDynamicValues = true;
} else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
return null;
}
boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
ExprNodeDesc colExpr = childExpr.get(1);
// The children after not, might need a cast. Get common types for the two comparisons.
// Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
// cast
TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
if (commonType.equals(desc.getTypeInfo())) {
castChildren.add(desc);
} else {
GenericUDF castUdf = getGenericUDFForCast(commonType);
ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
castChildren.add(engfd);
}
}
String colType = commonType.getTypeName();
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
// determine class
Class<?> cl = null;
if (isIntFamily(colType) && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
} else if (isIntFamily(colType) && notKeywordPresent) {
cl = FilterLongColumnNotBetween.class;
} else if (isFloatFamily(colType) && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
} else if (isFloatFamily(colType) && notKeywordPresent) {
cl = FilterDoubleColumnNotBetween.class;
} else if (colType.equals("string") && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
} else if (colType.equals("string") && notKeywordPresent) {
cl = FilterStringColumnNotBetween.class;
} else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
} else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
cl = FilterVarCharColumnNotBetween.class;
} else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
} else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
cl = FilterCharColumnNotBetween.class;
} else if (colType.equals("timestamp") && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
} else if (colType.equals("timestamp") && notKeywordPresent) {
cl = FilterTimestampColumnNotBetween.class;
} else if (isDecimalFamily(colType) && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
} else if (isDecimalFamily(colType) && notKeywordPresent) {
cl = FilterDecimalColumnNotBetween.class;
} else if (isDateFamily(colType) && !notKeywordPresent) {
cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
} else if (isDateFamily(colType) && notKeywordPresent) {
cl = FilterLongColumnNotBetween.class;
}
return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method getCustomUDFExpression.
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
TypeInfo resultTypeInfo = expr.getTypeInfo();
if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
// GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
List<ExprNodeDesc> childExprList = expr.getChildren();
final int childrenCount = childExprList.size();
// argument descriptors
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
for (int i = 0; i < argDescs.length; i++) {
argDescs[i] = new VectorUDFArgDesc();
}
// positions of variable arguments (columns or non-constant expressions)
List<Integer> variableArgPositions = new ArrayList<>();
// Column numbers of batch corresponding to expression result arguments
List<Integer> exprResultColumnNums = new ArrayList<>();
// Prepare children
List<VectorExpression> vectorExprs = new ArrayList<>();
TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
for (int i = 0; i < childrenCount; i++) {
ExprNodeDesc child = childExprList.get(i);
inputTypeInfos[i] = child.getTypeInfo();
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeColumnDesc) {
variableArgPositions.add(i);
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
// this is a constant (or null)
if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && child.getTypeInfo().getCategory() != Category.STRUCT) {
// Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant.
throw new HiveException("Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child);
}
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else if (child instanceof ExprNodeDynamicValueDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeFieldDesc) {
// Get the GenericUDFStructField to process the field of Struct type
VectorExpression e = getGenericUDFStructField((ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, child.getTypeInfo());
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else {
throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
}
}
// Allocate output column and get column number;
TypeInfo resultTypeInfo = expr.getTypeInfo();
String resultTypeName = resultTypeInfo.getTypeName();
final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
// Make vectorized operator
VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions);
// Set child expressions
VectorExpression[] childVEs = null;
if (exprResultColumnNums.size() != 0) {
childVEs = new VectorExpression[exprResultColumnNums.size()];
for (int i = 0; i < childVEs.length; i++) {
childVEs[i] = vectorExprs.get(i);
}
}
ve.setChildExpressions(childVEs);
ve.setInputTypeInfos(inputTypeInfos);
ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
ve.setOutputTypeInfo(resultTypeInfo);
ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
// Free output columns if inputs have non-leaf expression trees.
for (Integer i : exprResultColumnNums) {
ocm.freeOutputColumn(i);
}
if (isFilter) {
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return ve;
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method createVectorExpression.
private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType, DataTypePhysicalVariation returnDataTypePhysicalVariation) throws HiveException {
int numChildren = childExpr == null ? 0 : childExpr.size();
TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
List<VectorExpression> children = new ArrayList<>();
Object[] arguments = new Object[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
TypeInfo childTypeInfo = child.getTypeInfo();
inputTypeInfos[i] = childTypeInfo;
// Assume.
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
if (childTypeInfo instanceof DecimalTypeInfo) {
// In this method, we must only process non-Decimal64 column vectors.
// Convert Decimal64 columns to regular decimal.
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
// FUTURE: Can we reuse this conversion?
VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
continue;
}
}
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
// In filter mode, the column must be a boolean
SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
children.add(selectColumnIsTrue);
}
arguments[i] = colIndex;
} else if (child instanceof ExprNodeConstantDesc) {
Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
} else if (child instanceof ExprNodeDynamicValueDesc) {
arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, returnDataTypePhysicalVariation, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
}
Objects.requireNonNull(vectorExpression).setInputTypeInfos(inputTypeInfos);
vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
if (!children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
freeNonColumns(children.toArray(new VectorExpression[0]));
return vectorExpression;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method getVectorExpressionForUdf.
private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
int numChildren = (childExpr == null) ? 0 : childExpr.size();
if (numChildren > 2 && mode == VectorExpressionDescriptor.Mode.FILTER && ((genericUdf instanceof GenericUDFOPOr) || (genericUdf instanceof GenericUDFOPAnd))) {
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
String childTypeString = child.getTypeString();
if (childTypeString == null) {
throw new HiveException("Null child type name string");
}
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
if (columnVectorType != ColumnVector.Type.LONG) {
return null;
}
if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
return null;
}
}
Class<?> vclass;
if (genericUdf instanceof GenericUDFOPOr) {
vclass = FilterExprOrExpr.class;
} else {
vclass = FilterExprAndExpr.class;
}
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
return createVectorExpression(vclass, childExpr, childrenMode, returnType, DataTypePhysicalVariation.NONE);
}
if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
return null;
}
// Intercept here for a possible Decimal64 vector expression class.
VectorExpression result = getDecimal64VectorExpressionForUdf(genericUdf, udfClass, childExpr, numChildren, mode, returnType);
if (result != null) {
return result;
}
// Otherwise, fall through and proceed with non-Decimal64 vector expression classes...
VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
builder.setNumArguments(numChildren);
builder.setMode(mode);
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
TypeInfo childTypeInfo = child.getTypeInfo();
String childTypeString = childTypeInfo.toString();
if (childTypeString == null) {
throw new HiveException("Null child type name string");
}
String undecoratedTypeName = getUndecoratedName(childTypeString);
if (undecoratedTypeName == null) {
throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method");
}
builder.setArgumentType(i, undecoratedTypeName);
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc) || (child instanceof ExprNodeFieldDesc)) {
builder.setInputExpressionType(i, InputExpressionType.COLUMN);
} else if (child instanceof ExprNodeConstantDesc) {
if (isNullConst(child)) {
builder.setInputExpressionType(i, InputExpressionType.NULLSCALAR);
} else {
builder.setInputExpressionType(i, InputExpressionType.SCALAR);
}
} else if (child instanceof ExprNodeDynamicValueDesc) {
builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE);
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpressionDescriptor.Descriptor descriptor = builder.build();
Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions);
if (vclass == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("No vector udf found for " + udfClass.getSimpleName() + ", descriptor: " + descriptor);
}
return null;
}
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
return createVectorExpression(vclass, childExpr, childrenMode, returnType, DataTypePhysicalVariation.NONE);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class TestVectorizationContext method testInBloomFilter.
@Test
public void testInBloomFilter() throws Exception {
// Setup InBloomFilter() UDF
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.getDecimalTypeInfo(10, 5), "a", "table", false);
ExprNodeDesc bfExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.binaryTypeInfo));
ExprNodeGenericFuncDesc inBloomFilterExpr = new ExprNodeGenericFuncDesc();
GenericUDF inBloomFilterUdf = new GenericUDFInBloomFilter();
inBloomFilterExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
inBloomFilterExpr.setGenericUDF(inBloomFilterUdf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(colExpr);
children1.add(bfExpr);
inBloomFilterExpr.setChildren(children1);
// Setup VectorizationContext
List<String> columns = new ArrayList<String>();
columns.add("b");
columns.add("a");
VectorizationContext vc = new VectorizationContext("name", columns);
// Create vectorized expr
VectorExpression ve = vc.getVectorExpression(inBloomFilterExpr, VectorExpressionDescriptor.Mode.FILTER);
Assert.assertEquals(VectorInBloomFilterColDynamicValue.class, ve.getClass());
VectorInBloomFilterColDynamicValue vectorizedInBloomFilterExpr = (VectorInBloomFilterColDynamicValue) ve;
VectorExpression[] children = vectorizedInBloomFilterExpr.getChildExpressions();
// VectorInBloomFilterColDynamicValue should have all of the necessary information to vectorize.
// Should be no need for child vector expressions, which would imply casting/conversion.
Assert.assertNull(children);
}
Aggregations