use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method getBetweenExpression.
/* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
* because the NOT is actually specified in the expression tree as the first argument,
* and we don't want any runtime cost for that. So creating the VectorExpression
* needs to be done differently than the standard way where all arguments are
* passed to the VectorExpression constructor.
*/
private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
boolean hasDynamicValues = false;
// We don't currently support the BETWEEN ends being columns. They must be scalars.
if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
hasDynamicValues = true;
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
// Projection mode is not applicable.
return null;
}
} else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
return null;
}
boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
ExprNodeDesc colExpr = childExpr.get(1);
// The children after not, might need a cast. Get common types for the two comparisons.
// Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
// cast
TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
List<ExprNodeDesc> castChildren = new ArrayList<>();
boolean wereCastUdfs = false;
Category commonTypeCategory = commonType.getCategory();
for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
TypeInfo childTypeInfo = desc.getTypeInfo();
Category childCategory = childTypeInfo.getCategory();
if (childCategory != commonTypeCategory) {
return null;
}
final boolean isNeedsCast;
if (commonTypeCategory == Category.PRIMITIVE) {
// Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
// Otherwise, we generate unnecessary casts.
isNeedsCast = ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
} else {
isNeedsCast = !commonType.equals(desc.getTypeInfo());
}
if (!isNeedsCast) {
castChildren.add(desc);
} else {
GenericUDF castUdf = getGenericUDFForCast(commonType);
ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
castChildren.add(engfd);
wereCastUdfs = true;
}
}
String colType = commonType.getTypeName();
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
// determine class
Class<?> cl = null;
if (isIntFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isIntFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
} else if (isFloatFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
}
} else if (isFloatFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnNotBetween.class;
} else {
cl = FilterDoubleColumnNotBetween.class;
}
} else if (colType.equals("string") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
}
} else if (colType.equals("string") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnNotBetween.class;
} else {
cl = FilterStringColumnNotBetween.class;
}
} else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
}
} else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnNotBetween.class;
} else {
cl = FilterVarCharColumnNotBetween.class;
}
} else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
}
} else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnNotBetween.class;
} else {
cl = FilterCharColumnNotBetween.class;
}
} else if (colType.equals("timestamp") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
}
} else if (colType.equals("timestamp") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnNotBetween.class;
} else {
cl = FilterTimestampColumnNotBetween.class;
}
} else if (isDecimalFamily(colType) && !notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
false, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
}
} else if (isDecimalFamily(colType) && notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
true, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnNotBetween.class;
} else {
cl = FilterDecimalColumnNotBetween.class;
}
} else if (isDateFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isDateFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
}
return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class SharedWorkOptimizer method extractConjsIgnoringDPPPreds.
private static Multiset<String> extractConjsIgnoringDPPPreds(ExprNodeDesc predicate) {
List<ExprNodeDesc> conjsOp = ExprNodeDescUtils.split(predicate);
Multiset<String> conjsOpString = TreeMultiset.create();
for (int i = 0; i < conjsOp.size(); i++) {
if (conjsOp.get(i) instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) conjsOp.get(i);
if (GenericUDFInBloomFilter.class == func.getGenericUDF().getClass()) {
continue;
} else if (GenericUDFBetween.class == func.getGenericUDF().getClass() && (func.getChildren().get(2) instanceof ExprNodeDynamicValueDesc || func.getChildren().get(3) instanceof ExprNodeDynamicValueDesc)) {
continue;
}
} else if (conjsOp.get(i) instanceof ExprNodeDynamicListDesc) {
continue;
}
conjsOpString.add(conjsOp.get(i).toString());
}
return conjsOpString;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class SemiJoinReductionMerge method createSemiJoinPredicate.
/**
* Creates the multi-column semi-join predicate that is applied on the target relation.
*
* Assuming that the target columns of the semi-join are fname, lname, and age, the generated predicates is:
* <pre>
* fname BETWEEN ?min_fname AND ?max_fname and
* lname BETWEEN ?min_lname AND ?max_lname and
* age BETWEEN ?min_age AND ?max_age and
* IN_BLOOM_FILTER(HASH(fname,lname,age),?bloom_filter)
* </pre>
* where the question mark (?) indicates dynamic values bound at runtime.
*/
private static ExprNodeGenericFuncDesc createSemiJoinPredicate(List<ReduceSinkOperator> sjBranches, RuntimeValuesInfo sjValueInfo, ParseContext context) {
// Performance note: To speed-up evaluation 'BETWEEN' predicates should come before the 'IN_BLOOM_FILTER'
Deque<String> dynamicIds = new ArrayDeque<>(sjValueInfo.getDynamicValueIDs());
List<ExprNodeDesc> sjPredicates = new ArrayList<>();
List<ExprNodeDesc> hashArgs = new ArrayList<>();
for (ReduceSinkOperator rs : sjBranches) {
RuntimeValuesInfo info = context.getRsToRuntimeValuesInfoMap().get(rs);
checkState(info.getTargetColumns().size() == 1, "Cannot handle multi-column semijoin branches.");
final ExprNodeDesc targetColumn = info.getTargetColumns().get(0);
TypeInfo typeInfo = targetColumn.getTypeInfo();
DynamicValue minDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
DynamicValue maxDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
List<ExprNodeDesc> betweenArgs = Arrays.asList(// Use false to not invert between result
new ExprNodeConstantDesc(Boolean.FALSE), targetColumn, new ExprNodeDynamicValueDesc(minDynamic), new ExprNodeDynamicValueDesc(maxDynamic));
ExprNodeDesc betweenExp = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFBetween(), "between", betweenArgs);
sjPredicates.add(betweenExp);
hashArgs.add(targetColumn);
}
ExprNodeDesc hashExp = ExprNodeDescUtils.murmurHash(hashArgs);
assert dynamicIds.size() == 1 : "There should be one column left untreated the one with the bloom filter";
DynamicValue bloomDynamic = new DynamicValue(dynamicIds.poll(), TypeInfoFactory.binaryTypeInfo);
sjPredicates.add(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFInBloomFilter(), "in_bloom_filter", Arrays.asList(hashExp, new ExprNodeDynamicValueDesc(bloomDynamic))));
return and(sjPredicates);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class TezCompiler method sortSemijoinFilters.
/**
* Sort semijoin filters depending on the benefit (computed depending on selectivity and cost)
* that they provide. We create three blocks: first all normal predicates, second between clauses
* for the min/max dynamic values, and finally the in bloom filter predicates. The intuition is
* that evaluating the between clause will be cheaper than evaluating the bloom filter predicates.
* Hence, after this method runs, normal predicates come first (possibly sorted by Calcite),
* then we will have sorted between clauses, and finally sorted in bloom filter clauses.
*/
private static void sortSemijoinFilters(OptimizeTezProcContext procCtx, ListMultimap<FilterOperator, SemijoinOperatorInfo> globalReductionFactorMap) throws SemanticException {
for (Entry<FilterOperator, Collection<SemijoinOperatorInfo>> e : globalReductionFactorMap.asMap().entrySet()) {
FilterOperator filterOp = e.getKey();
Collection<SemijoinOperatorInfo> semijoinInfos = e.getValue();
ExprNodeDesc pred = filterOp.getConf().getPredicate();
if (FunctionRegistry.isOpAnd(pred)) {
LinkedHashSet<ExprNodeDesc> allPreds = new LinkedHashSet<>(pred.getChildren());
List<ExprNodeDesc> betweenPreds = new ArrayList<>();
List<ExprNodeDesc> inBloomFilterPreds = new ArrayList<>();
// We check whether we can find semijoin predicates
for (SemijoinOperatorInfo roi : semijoinInfos) {
for (ExprNodeDesc expr : pred.getChildren()) {
if (FunctionRegistry.isOpBetween(expr) && expr.getChildren().get(2) instanceof ExprNodeDynamicValueDesc) {
// BETWEEN in SJ
String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(2)).getDynamicValue().getId();
List<String> dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(roi.rsOperator).getDynamicValueIDs();
for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) {
if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) {
betweenPreds.add(expr);
allPreds.remove(expr);
break;
}
}
} else if (FunctionRegistry.isOpInBloomFilter(expr) && expr.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) {
// IN_BLOOM_FILTER in SJ
String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(1)).getDynamicValue().getId();
List<String> dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(roi.rsOperator).getDynamicValueIDs();
for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) {
if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) {
inBloomFilterPreds.add(expr);
allPreds.remove(expr);
break;
}
}
}
}
}
// First rest of predicates
List<ExprNodeDesc> newAndArgs = new ArrayList<>(allPreds);
// Then sorted between predicates
newAndArgs.addAll(betweenPreds);
// Finally, sorted in bloom predicates
newAndArgs.addAll(inBloomFilterPreds);
ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), newAndArgs);
filterOp.getConf().setPredicate(andExpr);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc in project hive by apache.
the class VectorizationContext method createVectorExpression.
private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException {
int numChildren = childExpr == null ? 0 : childExpr.size();
TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
List<VectorExpression> children = new ArrayList<VectorExpression>();
Object[] arguments = new Object[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
TypeInfo childTypeInfo = child.getTypeInfo();
inputTypeInfos[i] = childTypeInfo;
// Assume.
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
if (childTypeInfo instanceof DecimalTypeInfo) {
// In this method, we must only process non-Decimal64 column vectors.
// Convert Decimal64 columns to regular decimal.
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
// FUTURE: Can we reuse this conversion?
VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
continue;
}
}
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
// In filter mode, the column must be a boolean
SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
children.add(selectColumnIsTrue);
}
arguments[i] = colIndex;
} else if (child instanceof ExprNodeConstantDesc) {
Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
} else if (child instanceof ExprNodeDynamicValueDesc) {
arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
}
vectorExpression.setInputTypeInfos(inputTypeInfos);
vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
if ((vectorExpression != null) && !children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
for (VectorExpression ve : children) {
ocm.freeOutputColumn(ve.getOutputColumnNum());
}
return vectorExpression;
}
Aggregations