use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class SemanticAnalyzer method genSamplePredicate.
/**
* Generates the sampling predicate from the TABLESAMPLE clause information.
* This function uses the bucket column list to decide the expression inputs
* to the predicate hash function in case useBucketCols is set to true,
* otherwise the expression list stored in the TableSample is used. The bucket
* columns of the table are used to generate this predicate in case no
* expressions are provided on the TABLESAMPLE clause and the table has
* clustering columns defined in it's metadata. The predicate created has the
* following structure:
*
* ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
*
* @param ts
* TABLESAMPLE clause information
* @param bucketCols
* The clustering columns of the table
* @param useBucketCols
* Flag to indicate whether the bucketCols should be used as input to
* the hash function
* @param alias
* The alias used for the table in the row resolver
* @param rwsch
* The row resolver used to resolve column references
* @param planExpr
* The plan tree for the expression. If the user specified this, the
* parse expressions are not used
* @return exprNodeDesc
* @exception SemanticException
*/
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
if (planExpr != null) {
args.add(planExpr);
} else if (useBucketCols) {
for (String col : bucketCols) {
ColumnInfo ci = rwsch.get(alias, col);
// TODO: change type to the one in the table schema
args.add(new ExprNodeColumnDesc(ci));
}
} else {
for (ASTNode expr : ts.getExprs()) {
args.add(genExprNodeDesc(expr, rwsch));
}
}
ExprNodeDesc equalsExpr = null;
{
ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
LOG.info("hashfnExpr = " + hashfnExpr);
ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
LOG.info("andExpr = " + andExpr);
ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
LOG.info("modExpr = " + modExpr);
LOG.info("numeratorExpr = " + numeratorExpr);
equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
LOG.info("equalsExpr = " + equalsExpr);
}
return equalsExpr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class SemanticAnalyzer method genFilterPlan.
/**
* create a filter plan. The condition and the inputs are specified.
*
* @param qb
* current query block
* @param condn
* The condition to be resolved
* @param input
* the input operator
*/
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching) throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted());
if (filterCond instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond;
if (Boolean.TRUE.equals(c.getValue())) {
// If filter condition is TRUE, we ignore it
return input;
}
if (ExprNodeDescUtils.isNullConstant(c)) {
// If filter condition is NULL, transform to FALSE
filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false);
}
}
if (!filterCond.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo)) {
// convert the result of the condition to a boolean value.
if (filterCond.getTypeInfo().getCategory() == ObjectInspector.Category.PRIMITIVE) {
// For primitive types like string/double/timestamp, try to cast the result of
// the child expression to a boolean.
filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(filterCond, TypeInfoFactory.booleanTypeInfo);
} else {
// For complex types like map/list/struct, create a isnotnull function on the child expression.
filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("isnotnull", filterCond);
}
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new FilterDesc(filterCond, false), new RowSchema(inputRR.getColumnInfos()), input), inputRR);
ctx.getPlanMapper().link(condn, output);
LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR.toString());
return output;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class ShowPartitionAnalyzer method replaceDefaultPartNameAndCastType.
private ExprNodeDesc replaceDefaultPartNameAndCastType(ExprNodeDesc nodeDesc, Map<String, String> colTypes, String defaultPartName) throws SemanticException {
if (!(nodeDesc instanceof ExprNodeGenericFuncDesc)) {
return nodeDesc;
}
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) nodeDesc;
if (FunctionRegistry.isOpAnd(funcDesc) || FunctionRegistry.isOpOr(funcDesc)) {
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc child : funcDesc.getChildren()) {
newChildren.add(replaceDefaultPartNameAndCastType(child, colTypes, defaultPartName));
}
funcDesc.setChildren(newChildren);
return funcDesc;
}
List<ExprNodeDesc> children = funcDesc.getChildren();
int colIdx = -1, constIdx = -1;
for (int i = 0; i < children.size(); i++) {
ExprNodeDesc child = children.get(i);
if (child instanceof ExprNodeColumnDesc) {
String col = ((ExprNodeColumnDesc) child).getColumn().toLowerCase();
String type = colTypes.get(col);
if (!type.equals(child.getTypeString())) {
child.setTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo(type));
}
colIdx = i;
} else if (child instanceof ExprNodeConstantDesc) {
constIdx = i;
}
}
if (funcDesc.getGenericUDF() instanceof GenericUDFBaseCompare && children.size() == 2 && colIdx > -1 && constIdx > -1) {
ExprNodeConstantDesc constantDesc = (ExprNodeConstantDesc) children.get(constIdx);
ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) children.get(colIdx);
Object val = constantDesc.getValue();
boolean isDefaultPartitionName = defaultPartName.equals(val);
String type = colTypes.get(columnDesc.getColumn().toLowerCase());
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
if (!isDefaultPartitionName) {
if (!constantDesc.getTypeString().equals(type)) {
Object converted = ObjectInspectorConverters.getConverter(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(constantDesc.getTypeInfo()), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(pti)).convert(val);
if (converted == null) {
throw new SemanticException("Cannot convert to " + type + " from " + constantDesc.getTypeString() + ", value: " + val);
}
ExprNodeConstantDesc newConstantDesc = new ExprNodeConstantDesc(pti, converted);
children.set(constIdx, newConstantDesc);
}
} else {
GenericUDF originalOp = funcDesc.getGenericUDF();
String fnName;
if (FunctionRegistry.isEq(originalOp)) {
fnName = "isnull";
} else if (FunctionRegistry.isNeq(originalOp)) {
fnName = "isnotnull";
} else {
throw new SemanticException("Only '=' and '!=' are allowed for the default partition, function: " + originalOp.getUdfName());
}
funcDesc = PartitionUtils.makeUnaryPredicate(fnName, columnDesc);
}
}
return funcDesc;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class VectorizationContext method getBetweenExpression.
/* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
* because the NOT is actually specified in the expression tree as the first argument,
* and we don't want any runtime cost for that. So creating the VectorExpression
* needs to be done differently than the standard way where all arguments are
* passed to the VectorExpression constructor.
*/
private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
boolean hasDynamicValues = false;
// We don't currently support the BETWEEN ends being columns. They must be scalars.
if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
hasDynamicValues = true;
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
// Projection mode is not applicable.
return null;
}
} else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
return null;
}
boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
ExprNodeDesc colExpr = childExpr.get(1);
// The children after not, might need a cast. Get common types for the two comparisons.
// Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
// cast
TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
List<ExprNodeDesc> castChildren = new ArrayList<>();
boolean wereCastUdfs = false;
Category commonTypeCategory = commonType.getCategory();
for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
TypeInfo childTypeInfo = desc.getTypeInfo();
Category childCategory = childTypeInfo.getCategory();
if (childCategory != commonTypeCategory) {
return null;
}
final boolean isNeedsCast;
if (commonTypeCategory == Category.PRIMITIVE) {
// Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
// Otherwise, we generate unnecessary casts.
isNeedsCast = ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
} else {
isNeedsCast = !commonType.equals(desc.getTypeInfo());
}
if (!isNeedsCast) {
castChildren.add(desc);
} else {
GenericUDF castUdf = getGenericUDFForCast(commonType);
ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
castChildren.add(engfd);
wereCastUdfs = true;
}
}
String colType = commonType.getTypeName();
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
// determine class
Class<?> cl = null;
if (isIntFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isIntFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
} else if (isFloatFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
}
} else if (isFloatFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnNotBetween.class;
} else {
cl = FilterDoubleColumnNotBetween.class;
}
} else if (colType.equals("string") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
}
} else if (colType.equals("string") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnNotBetween.class;
} else {
cl = FilterStringColumnNotBetween.class;
}
} else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
}
} else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnNotBetween.class;
} else {
cl = FilterVarCharColumnNotBetween.class;
}
} else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
}
} else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnNotBetween.class;
} else {
cl = FilterCharColumnNotBetween.class;
}
} else if (colType.equals("timestamp") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
}
} else if (colType.equals("timestamp") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnNotBetween.class;
} else {
cl = FilterTimestampColumnNotBetween.class;
}
} else if (isDecimalFamily(colType) && !notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
false, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
}
} else if (isDecimalFamily(colType) && notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
true, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnNotBetween.class;
} else {
cl = FilterDecimalColumnNotBetween.class;
}
} else if (isDateFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isDateFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
}
return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class VectorizationContext method getCoalesceExpression.
private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
int[] inputColumns = new int[childExpr.size()];
VectorExpression[] vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION);
final int size = vectorChildren.length;
TypeInfo[] inputTypeInfos = new TypeInfo[size];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size];
DataTypePhysicalVariation outputDataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64;
boolean fixConstants = false;
for (int i = 0; i < vectorChildren.length; ++i) {
VectorExpression ve = vectorChildren[i];
inputColumns[i] = ve.getOutputColumnNum();
inputTypeInfos[i] = ve.getOutputTypeInfo();
inputDataTypePhysicalVariations[i] = ve.getOutputDataTypePhysicalVariation();
if (inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) {
if (childExpr.get(i) instanceof ExprNodeConstantDesc && inputTypeInfos[i] instanceof DecimalTypeInfo && ((DecimalTypeInfo) inputTypeInfos[i]).precision() <= 18) {
fixConstants = true;
} else {
outputDataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
}
}
}
if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64 && fixConstants) {
for (int i = 0; i < vectorChildren.length; ++i) {
if ((inputDataTypePhysicalVariations[i] == DataTypePhysicalVariation.NONE || inputDataTypePhysicalVariations[i] == null) && vectorChildren[i] instanceof ConstantVectorExpression) {
ConstantVectorExpression cve = ((ConstantVectorExpression) vectorChildren[i]);
HiveDecimal hd = cve.getDecimalValue();
Long longValue = new HiveDecimalWritable(hd).serialize64(((DecimalTypeInfo) cve.getOutputTypeInfo()).getScale());
((ConstantVectorExpression) vectorChildren[i]).setLongValue(longValue);
vectorChildren[i].setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.DECIMAL_64);
int scratchColIndex = vectorChildren[i].getOutputColumnNum() - ocm.initialOutputCol;
ocm.scratchDataTypePhysicalVariations[scratchColIndex] = DataTypePhysicalVariation.DECIMAL_64;
}
}
}
final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation);
VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outputColumnNum);
vectorCoalesce.setChildExpressions(vectorChildren);
vectorCoalesce.setInputTypeInfos(inputTypeInfos);
vectorCoalesce.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
vectorCoalesce.setOutputTypeInfo(returnType);
vectorCoalesce.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation);
freeNonColumns(vectorChildren);
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
if (isFilter) {
// Wrap the PROJECTION IF expression output with a filter.
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(vectorCoalesce.getOutputColumnNum());
filterVectorExpr.setChildExpressions(new VectorExpression[] { vectorCoalesce });
filterVectorExpr.setInputTypeInfos(vectorCoalesce.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(vectorCoalesce.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return vectorCoalesce;
}
}
Aggregations