use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class HiveIcebergStorageHandler method collectColumnAndReplaceDummyValues.
/**
* Recursively replaces the ExprNodeDynamicListDesc nodes by a dummy ExprNodeConstantDesc so we can test if we can
* convert the predicate to an Iceberg predicate when pruning the partitions later. Also collects the column names
* in the filter.
* <p>
* Please make sure that it is ok to change the input node (clone if needed)
* @param node The node we are traversing
* @param foundColumn The column we already found
*/
private String collectColumnAndReplaceDummyValues(ExprNodeDesc node, String foundColumn) {
String column = foundColumn;
List<ExprNodeDesc> children = node.getChildren();
if (children != null && !children.isEmpty()) {
ListIterator<ExprNodeDesc> iterator = children.listIterator();
while (iterator.hasNext()) {
ExprNodeDesc child = iterator.next();
if (child instanceof ExprNodeDynamicListDesc) {
Object dummy;
switch(((PrimitiveTypeInfo) child.getTypeInfo()).getPrimitiveCategory()) {
case INT:
case SHORT:
dummy = 1;
break;
case LONG:
dummy = 1L;
break;
case TIMESTAMP:
case TIMESTAMPLOCALTZ:
dummy = new Timestamp();
break;
case CHAR:
case VARCHAR:
case STRING:
dummy = "1";
break;
case DOUBLE:
case FLOAT:
case DECIMAL:
dummy = 1.1;
break;
case DATE:
dummy = new Date();
break;
case BOOLEAN:
dummy = true;
break;
default:
throw new UnsupportedOperationException("Not supported primitive type in partition pruning: " + child.getTypeInfo());
}
iterator.set(new ExprNodeConstantDesc(child.getTypeInfo(), dummy));
} else {
String newColumn;
if (child instanceof ExprNodeColumnDesc) {
newColumn = ((ExprNodeColumnDesc) child).getColumn();
} else {
newColumn = collectColumnAndReplaceDummyValues(child, column);
}
if (column != null && newColumn != null && !newColumn.equals(column)) {
throw new UnsupportedOperationException("Partition pruning does not support filtering for more columns");
}
if (column == null) {
column = newColumn;
}
}
}
}
return column;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class KafkaScanTrimmer method pushLeaf.
/**
* @param expr leaf node to push
* @param operator operator
* @param negation true if it is a negation, this is used to represent:
* GenericUDFOPGreaterThan and GenericUDFOPEqualOrGreaterThan
* using PredicateLeaf.Operator.LESS_THAN and PredicateLeaf.Operator.LESS_THAN_EQUALS
*
* @return leaf scan or null if can not figure out push down
*/
@Nullable
private Map<TopicPartition, KafkaInputSplit> pushLeaf(ExprNodeGenericFuncDesc expr, PredicateLeaf.Operator operator, boolean negation) {
if (expr.getChildren().size() != 2) {
return null;
}
GenericUDF genericUDF = expr.getGenericUDF();
if (!(genericUDF instanceof GenericUDFBaseCompare)) {
return null;
}
ExprNodeDesc expr1 = expr.getChildren().get(0);
ExprNodeDesc expr2 = expr.getChildren().get(1);
// We may need to peel off the GenericUDFBridge that is added by CBO or user
if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
expr1 = getColumnExpr(expr1);
expr2 = getColumnExpr(expr2);
}
ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
if (extracted == null || (extracted.length > 2)) {
return null;
}
ExprNodeColumnDesc columnDesc;
ExprNodeConstantDesc constantDesc;
final boolean flip;
if (extracted[0] instanceof ExprNodeColumnDesc) {
columnDesc = (ExprNodeColumnDesc) extracted[0];
constantDesc = (ExprNodeConstantDesc) extracted[1];
flip = false;
} else {
flip = true;
columnDesc = (ExprNodeColumnDesc) extracted[1];
constantDesc = (ExprNodeConstantDesc) extracted[0];
}
if (columnDesc.getColumn().equals(MetadataColumn.PARTITION.getName())) {
return buildScanFromPartitionPredicate(fullHouse, operator, ((Number) constantDesc.getValue()).intValue(), flip, negation);
}
if (columnDesc.getColumn().equals(MetadataColumn.OFFSET.getName())) {
return buildScanFromOffsetPredicate(fullHouse, operator, ((Number) constantDesc.getValue()).longValue(), flip, negation);
}
if (columnDesc.getColumn().equals(MetadataColumn.TIMESTAMP.getName())) {
long timestamp = ((Number) constantDesc.getValue()).longValue();
// noinspection unchecked
return buildScanForTimesPredicate(fullHouse, operator, timestamp, flip, negation, kafkaConsumer);
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class PartitionPruner method compactExpr.
/**
* Taking a partition pruning expression, remove the null operands and non-partition columns.
* The reason why there are null operands is ExprProcFactory classes, for example
* PPRColumnExprProcessor.
* @param expr original partition pruning expression.
* @return partition pruning expression that only contains partition columns.
*/
@VisibleForTesting
static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
// If this is a constant boolean expression, return the value.
if (expr == null) {
return null;
}
if (expr instanceof ExprNodeConstantDesc) {
if (((ExprNodeConstantDesc) expr).getValue() == null) {
return null;
}
if (!isBooleanExpr(expr)) {
throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + expr.getExprString(true));
}
return expr;
} else if (expr instanceof ExprNodeColumnDesc) {
return expr;
} else if (expr instanceof ExprNodeGenericFuncDesc) {
GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
boolean isAnd = udf instanceof GenericUDFOPAnd;
boolean isOr = udf instanceof GenericUDFOPOr;
List<ExprNodeDesc> children = expr.getChildren();
if (isAnd) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allTrue = true;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (!isTrueExpr(compactChild)) {
newChildren.add(compactChild);
allTrue = false;
}
if (isFalseExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
} else {
allTrue = false;
}
}
if (allTrue) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (newChildren.size() == 0) {
return null;
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
} else if (isOr) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allFalse = true;
boolean isNull = false;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (isTrueExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (!isNull && !isFalseExpr(compactChild)) {
newChildren.add(compactChild);
allFalse = false;
}
} else {
isNull = true;
}
}
if (isNull) {
return null;
}
if (allFalse) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
}
return expr;
} else {
throw new IllegalStateException("Unexpected type of ExprNodeDesc: " + expr.getExprString(true));
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class PartitionPruner method removeNonPartCols.
/**
* See compactExpr. Some things in the expr are replaced with nulls for pruner, however
* the virtual columns are not removed (ExprNodeColumnDesc cannot tell them apart from
* partition columns), so we do it here.
* The expression is only used to prune by partition name, so we have no business with VCs.
* @param expr original partition pruning expression.
* @param partCols list of partition columns for the table.
* @param referred partition columns referred by expr
* @return partition pruning expression that only contains partition columns from the list.
*/
private static ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List<String> partCols, Set<String> referred) {
if (expr instanceof ExprNodeFieldDesc) {
// list or struct fields.
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
} else if (expr instanceof ExprNodeColumnDesc) {
String column = ((ExprNodeColumnDesc) expr).getColumn();
if (!partCols.contains(column)) {
// Column doesn't appear to be a partition column for the table.
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
}
referred.add(column);
} else if (expr instanceof ExprNodeGenericFuncDesc) {
List<ExprNodeDesc> children = expr.getChildren();
for (int i = 0; i < children.size(); ++i) {
ExprNodeDesc other = removeNonPartCols(children.get(i), partCols, referred);
if (ExprNodeDescUtils.isNullConstant(other)) {
if (FunctionRegistry.isOpAnd(expr)) {
// partcol=... AND nonpartcol=... is replaced with partcol=... AND TRUE
// which will be folded to partcol=...
// This cannot be done also for OR
Preconditions.checkArgument(expr.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo));
other = new ExprNodeConstantDesc(expr.getTypeInfo(), true);
} else {
// and cause overaggressive prunning, missing data (incorrect result)
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
}
}
children.set(i, other);
}
}
return expr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class BaseSemanticAnalyzer method validatePartColumnType.
public static void validatePartColumnType(Table tbl, Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
return;
}
Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
if (!getPartExprNodeDesc(astNode, conf, astExprNodeMap)) {
STATIC_LOG.warn("Dynamic partitioning is used; only validating " + astExprNodeMap.size() + " columns");
}
if (astExprNodeMap.isEmpty()) {
// All columns are dynamic, nothing to do.
return;
}
List<FieldSchema> parts = tbl.getPartitionKeys();
Map<String, String> partCols = new HashMap<String, String>(parts.size());
for (FieldSchema col : parts) {
partCols.put(col.getName(), col.getType().toLowerCase());
}
for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
astKeyName = ParseUtils.stripIdentifierQuotes(astKeyName);
}
String colType = partCols.get(astKeyName);
ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(astExprNodePair.getValue().getTypeInfo());
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(colType);
ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType);
// Since partVal is a constant, it is safe to cast ExprNodeDesc to ExprNodeConstantDesc.
// Its value should be in normalized format (e.g. no leading zero in integer, date is in
// format of YYYY-MM-DD etc)
Object value = ((ExprNodeConstantDesc) astExprNodePair.getValue()).getValue();
Object convertedValue = value;
if (!inputOI.getTypeName().equals(outputOI.getTypeName())) {
convertedValue = ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
if (convertedValue == null) {
throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH, astKeyName, inputOI.getTypeName(), outputOI.getTypeName());
}
if (!convertedValue.toString().equals(value.toString())) {
// value might have been changed because of the normalization in conversion
STATIC_LOG.warn("Partition " + astKeyName + " expects type " + outputOI.getTypeName() + " but input value is in type " + inputOI.getTypeName() + ". Convert " + value.toString() + " to " + convertedValue.toString());
}
}
if (!convertedValue.toString().equals(partSpec.get(astKeyName))) {
STATIC_LOG.warn("Partition Spec " + astKeyName + "=" + partSpec.get(astKeyName) + " has been changed to " + astKeyName + "=" + convertedValue.toString());
}
partSpec.put(astKeyName, convertedValue.toString());
}
}
Aggregations