Search in sources :

Example 66 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class HiveIcebergStorageHandler method collectColumnAndReplaceDummyValues.

/**
 * Recursively replaces the ExprNodeDynamicListDesc nodes by a dummy ExprNodeConstantDesc so we can test if we can
 * convert the predicate to an Iceberg predicate when pruning the partitions later. Also collects the column names
 * in the filter.
 * <p>
 * Please make sure that it is ok to change the input node (clone if needed)
 * @param node The node we are traversing
 * @param foundColumn The column we already found
 */
private String collectColumnAndReplaceDummyValues(ExprNodeDesc node, String foundColumn) {
    String column = foundColumn;
    List<ExprNodeDesc> children = node.getChildren();
    if (children != null && !children.isEmpty()) {
        ListIterator<ExprNodeDesc> iterator = children.listIterator();
        while (iterator.hasNext()) {
            ExprNodeDesc child = iterator.next();
            if (child instanceof ExprNodeDynamicListDesc) {
                Object dummy;
                switch(((PrimitiveTypeInfo) child.getTypeInfo()).getPrimitiveCategory()) {
                    case INT:
                    case SHORT:
                        dummy = 1;
                        break;
                    case LONG:
                        dummy = 1L;
                        break;
                    case TIMESTAMP:
                    case TIMESTAMPLOCALTZ:
                        dummy = new Timestamp();
                        break;
                    case CHAR:
                    case VARCHAR:
                    case STRING:
                        dummy = "1";
                        break;
                    case DOUBLE:
                    case FLOAT:
                    case DECIMAL:
                        dummy = 1.1;
                        break;
                    case DATE:
                        dummy = new Date();
                        break;
                    case BOOLEAN:
                        dummy = true;
                        break;
                    default:
                        throw new UnsupportedOperationException("Not supported primitive type in partition pruning: " + child.getTypeInfo());
                }
                iterator.set(new ExprNodeConstantDesc(child.getTypeInfo(), dummy));
            } else {
                String newColumn;
                if (child instanceof ExprNodeColumnDesc) {
                    newColumn = ((ExprNodeColumnDesc) child).getColumn();
                } else {
                    newColumn = collectColumnAndReplaceDummyValues(child, column);
                }
                if (column != null && newColumn != null && !newColumn.equals(column)) {
                    throw new UnsupportedOperationException("Partition pruning does not support filtering for more columns");
                }
                if (column == null) {
                    column = newColumn;
                }
            }
        }
    }
    return column;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDynamicListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(org.apache.hadoop.hive.common.type.Date)

Example 67 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class KafkaScanTrimmer method pushLeaf.

/**
 * @param expr     leaf node to push
 * @param operator operator
 * @param negation true if it is a negation, this is used to represent:
 *                 GenericUDFOPGreaterThan and GenericUDFOPEqualOrGreaterThan
 *                 using PredicateLeaf.Operator.LESS_THAN and PredicateLeaf.Operator.LESS_THAN_EQUALS
 *
 * @return leaf scan or null if can not figure out push down
 */
@Nullable
private Map<TopicPartition, KafkaInputSplit> pushLeaf(ExprNodeGenericFuncDesc expr, PredicateLeaf.Operator operator, boolean negation) {
    if (expr.getChildren().size() != 2) {
        return null;
    }
    GenericUDF genericUDF = expr.getGenericUDF();
    if (!(genericUDF instanceof GenericUDFBaseCompare)) {
        return null;
    }
    ExprNodeDesc expr1 = expr.getChildren().get(0);
    ExprNodeDesc expr2 = expr.getChildren().get(1);
    // We may need to peel off the GenericUDFBridge that is added by CBO or user
    if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
        expr1 = getColumnExpr(expr1);
        expr2 = getColumnExpr(expr2);
    }
    ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
    if (extracted == null || (extracted.length > 2)) {
        return null;
    }
    ExprNodeColumnDesc columnDesc;
    ExprNodeConstantDesc constantDesc;
    final boolean flip;
    if (extracted[0] instanceof ExprNodeColumnDesc) {
        columnDesc = (ExprNodeColumnDesc) extracted[0];
        constantDesc = (ExprNodeConstantDesc) extracted[1];
        flip = false;
    } else {
        flip = true;
        columnDesc = (ExprNodeColumnDesc) extracted[1];
        constantDesc = (ExprNodeConstantDesc) extracted[0];
    }
    if (columnDesc.getColumn().equals(MetadataColumn.PARTITION.getName())) {
        return buildScanFromPartitionPredicate(fullHouse, operator, ((Number) constantDesc.getValue()).intValue(), flip, negation);
    }
    if (columnDesc.getColumn().equals(MetadataColumn.OFFSET.getName())) {
        return buildScanFromOffsetPredicate(fullHouse, operator, ((Number) constantDesc.getValue()).longValue(), flip, negation);
    }
    if (columnDesc.getColumn().equals(MetadataColumn.TIMESTAMP.getName())) {
        long timestamp = ((Number) constantDesc.getValue()).longValue();
        // noinspection unchecked
        return buildScanForTimesPredicate(fullHouse, operator, timestamp, flip, negation, kafkaConsumer);
    }
    return null;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Nullable(javax.annotation.Nullable)

Example 68 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class PartitionPruner method compactExpr.

/**
 * Taking a partition pruning expression, remove the null operands and non-partition columns.
 * The reason why there are null operands is ExprProcFactory classes, for example
 * PPRColumnExprProcessor.
 * @param expr original partition pruning expression.
 * @return partition pruning expression that only contains partition columns.
 */
@VisibleForTesting
static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
    // If this is a constant boolean expression, return the value.
    if (expr == null) {
        return null;
    }
    if (expr instanceof ExprNodeConstantDesc) {
        if (((ExprNodeConstantDesc) expr).getValue() == null) {
            return null;
        }
        if (!isBooleanExpr(expr)) {
            throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + expr.getExprString(true));
        }
        return expr;
    } else if (expr instanceof ExprNodeColumnDesc) {
        return expr;
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
        boolean isAnd = udf instanceof GenericUDFOPAnd;
        boolean isOr = udf instanceof GenericUDFOPOr;
        List<ExprNodeDesc> children = expr.getChildren();
        if (isAnd) {
            // Non-partition expressions are converted to nulls.
            List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
            boolean allTrue = true;
            for (ExprNodeDesc child : children) {
                ExprNodeDesc compactChild = compactExpr(child);
                if (compactChild != null) {
                    if (!isTrueExpr(compactChild)) {
                        newChildren.add(compactChild);
                        allTrue = false;
                    }
                    if (isFalseExpr(compactChild)) {
                        return new ExprNodeConstantDesc(Boolean.FALSE);
                    }
                } else {
                    allTrue = false;
                }
            }
            if (allTrue) {
                return new ExprNodeConstantDesc(Boolean.TRUE);
            }
            if (newChildren.size() == 0) {
                return null;
            }
            if (newChildren.size() == 1) {
                return newChildren.get(0);
            }
            // Nothing to compact, update expr with compacted children.
            ((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
        } else if (isOr) {
            // Non-partition expressions are converted to nulls.
            List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
            boolean allFalse = true;
            boolean isNull = false;
            for (ExprNodeDesc child : children) {
                ExprNodeDesc compactChild = compactExpr(child);
                if (compactChild != null) {
                    if (isTrueExpr(compactChild)) {
                        return new ExprNodeConstantDesc(Boolean.TRUE);
                    }
                    if (!isNull && !isFalseExpr(compactChild)) {
                        newChildren.add(compactChild);
                        allFalse = false;
                    }
                } else {
                    isNull = true;
                }
            }
            if (isNull) {
                return null;
            }
            if (allFalse) {
                return new ExprNodeConstantDesc(Boolean.FALSE);
            }
            if (newChildren.size() == 1) {
                return newChildren.get(0);
            }
            // Nothing to compact, update expr with compacted children.
            ((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
        }
        return expr;
    } else {
        throw new IllegalStateException("Unexpected type of ExprNodeDesc: " + expr.getExprString(true));
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) AbstractSequentialList(java.util.AbstractSequentialList) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 69 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class PartitionPruner method removeNonPartCols.

/**
 * See compactExpr. Some things in the expr are replaced with nulls for pruner, however
 * the virtual columns are not removed (ExprNodeColumnDesc cannot tell them apart from
 * partition columns), so we do it here.
 * The expression is only used to prune by partition name, so we have no business with VCs.
 * @param expr original partition pruning expression.
 * @param partCols list of partition columns for the table.
 * @param referred partition columns referred by expr
 * @return partition pruning expression that only contains partition columns from the list.
 */
private static ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List<String> partCols, Set<String> referred) {
    if (expr instanceof ExprNodeFieldDesc) {
        // list or struct fields.
        return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
    } else if (expr instanceof ExprNodeColumnDesc) {
        String column = ((ExprNodeColumnDesc) expr).getColumn();
        if (!partCols.contains(column)) {
            // Column doesn't appear to be a partition column for the table.
            return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
        }
        referred.add(column);
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        List<ExprNodeDesc> children = expr.getChildren();
        for (int i = 0; i < children.size(); ++i) {
            ExprNodeDesc other = removeNonPartCols(children.get(i), partCols, referred);
            if (ExprNodeDescUtils.isNullConstant(other)) {
                if (FunctionRegistry.isOpAnd(expr)) {
                    // partcol=... AND nonpartcol=...   is replaced with partcol=... AND TRUE
                    // which will be folded to partcol=...
                    // This cannot be done also for OR
                    Preconditions.checkArgument(expr.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo));
                    other = new ExprNodeConstantDesc(expr.getTypeInfo(), true);
                } else {
                    // and cause overaggressive prunning, missing data (incorrect result)
                    return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
                }
            }
            children.set(i, other);
        }
    }
    return expr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 70 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class BaseSemanticAnalyzer method validatePartColumnType.

public static void validatePartColumnType(Table tbl, Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
        return;
    }
    Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
    if (!getPartExprNodeDesc(astNode, conf, astExprNodeMap)) {
        STATIC_LOG.warn("Dynamic partitioning is used; only validating " + astExprNodeMap.size() + " columns");
    }
    if (astExprNodeMap.isEmpty()) {
        // All columns are dynamic, nothing to do.
        return;
    }
    List<FieldSchema> parts = tbl.getPartitionKeys();
    Map<String, String> partCols = new HashMap<String, String>(parts.size());
    for (FieldSchema col : parts) {
        partCols.put(col.getName(), col.getType().toLowerCase());
    }
    for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
        String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
        if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
            astKeyName = ParseUtils.stripIdentifierQuotes(astKeyName);
        }
        String colType = partCols.get(astKeyName);
        ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(astExprNodePair.getValue().getTypeInfo());
        TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(colType);
        ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType);
        // Since partVal is a constant, it is safe to cast ExprNodeDesc to ExprNodeConstantDesc.
        // Its value should be in normalized format (e.g. no leading zero in integer, date is in
        // format of YYYY-MM-DD etc)
        Object value = ((ExprNodeConstantDesc) astExprNodePair.getValue()).getValue();
        Object convertedValue = value;
        if (!inputOI.getTypeName().equals(outputOI.getTypeName())) {
            convertedValue = ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
            if (convertedValue == null) {
                throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH, astKeyName, inputOI.getTypeName(), outputOI.getTypeName());
            }
            if (!convertedValue.toString().equals(value.toString())) {
                // value might have been changed because of the normalization in conversion
                STATIC_LOG.warn("Partition " + astKeyName + " expects type " + outputOI.getTypeName() + " but input value is in type " + inputOI.getTypeName() + ". Convert " + value.toString() + " to " + convertedValue.toString());
            }
        }
        if (!convertedValue.toString().equals(partSpec.get(astKeyName))) {
            STATIC_LOG.warn("Partition Spec " + astKeyName + "=" + partSpec.get(astKeyName) + " has been changed to " + astKeyName + "=" + convertedValue.toString());
        }
        partSpec.put(astKeyName, convertedValue.toString());
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)208 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)178 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)134 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)123 ArrayList (java.util.ArrayList)97 Test (org.junit.Test)71 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)46 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)39 HashMap (java.util.HashMap)32 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)30 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)27 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)27 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)27 List (java.util.List)23 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)21 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)21 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)20 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)19