Search in sources :

Example 51 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class KafkaInputFormat method computeSplits.

private List<KafkaInputSplit> computeSplits(Configuration configuration) throws IOException, InterruptedException {
    // ExecutorService is used to harness some KAFKA blocking calls and interrupt after some duration
    final ExecutorService execService = Executors.newSingleThreadExecutor();
    try (KafkaConsumer consumer = new KafkaConsumer(KafkaUtils.consumerProperties(configuration))) {
        final String topic = configuration.get(KafkaTableProperties.HIVE_KAFKA_TOPIC.getName());
        final long timeoutMs = configuration.getLong(KafkaTableProperties.KAFKA_FETCH_METADATA_TIMEOUT.getName(), -1);
        final int maxTries = configuration.getInt(KafkaTableProperties.MAX_RETRIES.getName(), -1);
        // hive depends on FileSplits
        JobConf jobConf = new JobConf(configuration);
        Path[] tablePaths = org.apache.hadoop.mapred.FileInputFormat.getInputPaths(jobConf);
        final Future<List<KafkaInputSplit>> futureFullHouse;
        // noinspection unchecked
        futureFullHouse = execService.submit(() -> buildFullScanFromKafka(topic, consumer, tablePaths, maxTries));
        final List<KafkaInputSplit> fullHouse;
        try {
            fullHouse = futureFullHouse.get(timeoutMs, TimeUnit.MILLISECONDS);
        } catch (TimeoutException | ExecutionException e) {
            futureFullHouse.cancel(true);
            LOG.error("can not generate full scan split", e);
            // at this point we can not go further fail split generation
            throw new IOException(e);
        }
        @SuppressWarnings("unchecked") final ImmutableMap.Builder<TopicPartition, KafkaInputSplit> fullHouseMapBuilder = new ImmutableMap.Builder();
        fullHouse.forEach(input -> fullHouseMapBuilder.put(new TopicPartition(input.getTopic(), input.getPartition()), input));
        final KafkaScanTrimmer kafkaScanTrimmer = new KafkaScanTrimmer(fullHouseMapBuilder.build(), consumer);
        final String filterExprSerialized = configuration.get(TableScanDesc.FILTER_EXPR_CONF_STR);
        if (filterExprSerialized != null && !filterExprSerialized.isEmpty()) {
            ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized);
            LOG.info("Kafka trimmer working on Filter tree {}", filterExpr.getExprString());
            Callable<List<KafkaInputSplit>> trimmerWorker = () -> kafkaScanTrimmer.computeOptimizedScan(filterExpr).entrySet().stream().map(Map.Entry::getValue).collect(Collectors.toList());
            Future<List<KafkaInputSplit>> futureTinyHouse = execService.submit(trimmerWorker);
            try {
                return futureTinyHouse.get(timeoutMs, TimeUnit.MILLISECONDS).stream().filter(split -> split.getStartOffset() < split.getEndOffset()).collect(Collectors.toList());
            } catch (ExecutionException | TimeoutException e) {
                futureTinyHouse.cancel(true);
                LOG.error("Had issue with trimmer will return full scan ", e);
                return fullHouse;
            }
        }
        // Case null: it can be filter evaluated to false or no filter at all thus return full scan
        return fullHouse;
    } finally {
        execService.shutdown();
    }
}
Also used : ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) NullWritable(org.apache.hadoop.io.NullWritable) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) SerializationUtilities(org.apache.hadoop.hive.ql.exec.SerializationUtilities) Callable(java.util.concurrent.Callable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) VectorizedSupport(org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport) Future(java.util.concurrent.Future) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ExecutorService(java.util.concurrent.ExecutorService) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TopicPartition(org.apache.kafka.common.TopicPartition) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) Reporter(org.apache.hadoop.mapred.Reporter) InputFormat(org.apache.hadoop.mapreduce.InputFormat) IOException(java.io.IOException) PartitionInfo(org.apache.kafka.common.PartitionInfo) VectorizedInputFormatInterface(org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) JobConf(org.apache.hadoop.mapred.JobConf) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordReader(org.apache.hadoop.mapred.RecordReader) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) JobConf(org.apache.hadoop.mapred.JobConf) TimeoutException(java.util.concurrent.TimeoutException) Path(org.apache.hadoop.fs.Path) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) TopicPartition(org.apache.kafka.common.TopicPartition) ExecutorService(java.util.concurrent.ExecutorService) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 52 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class ConvertAstToSearchArg method parse.

/**
 * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
 * @param expression the Hive ExprNodeDesc
 */
private void parse(ExprNodeDesc expression) {
    // handle the special cases.
    if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
        // if it is a reference to a boolean column, covert it to a truth test.
        if (expression instanceof ExprNodeColumnDesc) {
            ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression;
            if (columnDesc.getTypeString().equals("boolean")) {
                builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, true);
                return;
            }
        }
        // otherwise, we don't know what to do so make it a maybe
        builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
        return;
    }
    // get the kind of expression
    ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
    Class<?> op = expr.getGenericUDF().getClass();
    // handle the logical operators
    if (op == GenericUDFOPOr.class) {
        builder.startOr();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPAnd.class) {
        builder.startAnd();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPNot.class) {
        builder.startNot();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPEqual.class) {
        createLeaf(PredicateLeaf.Operator.EQUALS, expr);
    } else if (op == GenericUDFOPNotEqual.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.EQUALS, expr);
        builder.end();
    } else if (op == GenericUDFOPEqualNS.class) {
        createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
    } else if (op == GenericUDFOPGreaterThan.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
        builder.end();
    } else if (op == GenericUDFOPEqualOrGreaterThan.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
        builder.end();
    } else if (op == GenericUDFOPLessThan.class) {
        createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
    } else if (op == GenericUDFOPEqualOrLessThan.class) {
        createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
    } else if (op == GenericUDFIn.class) {
        createLeaf(PredicateLeaf.Operator.IN, expr, 0);
    } else if (op == GenericUDFBetween.class) {
        // Start with NOT operator when the first child of GenericUDFBetween operator is set to TRUE
        if (Boolean.TRUE.equals(((ExprNodeConstantDesc) expression.getChildren().get(0)).getValue())) {
            builder.startNot();
            createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
            builder.end();
        } else {
            createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
        }
    } else if (op == GenericUDFOPNull.class) {
        createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
    } else if (op == GenericUDFOPNotNull.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
        builder.end();
    // otherwise, we didn't understand it, so mark it maybe
    } else {
        builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
    }
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) GenericUDFOPEqualNS(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull)

Example 53 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class ProjectionPusher method pushProjectionsAndFilters.

private void pushProjectionsAndFilters(final JobConf jobConf, final String splitPath, final String splitPathWithNoSchema) {
    if (mapWork == null) {
        return;
    } else if (mapWork.getPathToAliases() == null) {
        return;
    }
    final Set<String> aliases = new HashSet<String>();
    try {
        List<String> a = HiveFileFormatUtils.getFromPathRecursively(mapWork.getPathToAliases(), new Path(splitPath), null, false, true);
        if (a != null) {
            aliases.addAll(a);
        }
        if (a == null || a.isEmpty()) {
            // TODO: not having aliases for path usually means some bug. Should it give up?
            LOG.warn("Couldn't find aliases for " + splitPath);
        }
    } catch (IllegalArgumentException | IOException e) {
        throw new RuntimeException(e);
    }
    // Collect the needed columns from all the aliases and create ORed filter
    // expression for the table.
    boolean allColumnsNeeded = false;
    boolean noFilters = false;
    Set<Integer> neededColumnIDs = new HashSet<Integer>();
    // To support nested column pruning, we need to track the path from the top to the nested
    // fields
    Set<String> neededNestedColumnPaths = new HashSet<String>();
    List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
    RowSchema rowSchema = null;
    for (String alias : aliases) {
        final Operator<? extends Serializable> op = mapWork.getAliasToWork().get(alias);
        if (op != null && op instanceof TableScanOperator) {
            final TableScanOperator ts = (TableScanOperator) op;
            if (ts.getNeededColumnIDs() == null) {
                allColumnsNeeded = true;
            } else {
                neededColumnIDs.addAll(ts.getNeededColumnIDs());
                if (ts.getNeededNestedColumnPaths() != null) {
                    neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
                }
            }
            rowSchema = ts.getSchema();
            ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
            // No filter if any TS has no filter expression
            noFilters = filterExpr == null;
            filterExprs.add(filterExpr);
        }
    }
    ExprNodeGenericFuncDesc tableFilterExpr = null;
    if (!noFilters) {
        try {
            for (ExprNodeGenericFuncDesc filterExpr : filterExprs) {
                if (tableFilterExpr == null) {
                    tableFilterExpr = filterExpr;
                } else {
                    tableFilterExpr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(tableFilterExpr, filterExpr));
                }
            }
        } catch (UDFArgumentException ex) {
            LOG.debug("Turn off filtering due to " + ex);
            tableFilterExpr = null;
        }
    }
    // push down projections
    if (!allColumnsNeeded) {
        if (!neededColumnIDs.isEmpty()) {
            ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
            ColumnProjectionUtils.appendNestedColumnPaths(jobConf, new ArrayList<String>(neededNestedColumnPaths));
        }
    } else {
        ColumnProjectionUtils.setReadAllColumns(jobConf);
    }
    pushFilters(jobConf, rowSchema, tableFilterExpr);
}
Also used : Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IOException(java.io.IOException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) HashSet(java.util.HashSet)

Example 54 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class ConstantPropagateProcFactory method evaluateFunction.

/**
 * Evaluate UDF
 *
 * @param udf UDF object
 * @param exprs
 * @param oldExprs
 * @return null if expression cannot be evaluated (not all parameters are constants). Or evaluated
 *         ExprNodeConstantDesc if possible.
 * @throws HiveException
 */
private static ExprNodeDesc evaluateFunction(GenericUDF udf, List<ExprNodeDesc> exprs, List<ExprNodeDesc> oldExprs) {
    DeferredJavaObject[] arguments = new DeferredJavaObject[exprs.size()];
    ObjectInspector[] argois = new ObjectInspector[exprs.size()];
    for (int i = 0; i < exprs.size(); i++) {
        ExprNodeDesc desc = exprs.get(i);
        if (desc instanceof ExprNodeConstantDesc) {
            ExprNodeConstantDesc constant = (ExprNodeConstantDesc) exprs.get(i);
            if (!constant.getTypeInfo().equals(oldExprs.get(i).getTypeInfo())) {
                constant = typeCast(constant, oldExprs.get(i).getTypeInfo());
                if (constant == null) {
                    return null;
                }
            }
            if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
                // nested complex types cannot be folded cleanly
                return null;
            }
            Object value = constant.getValue();
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo();
            Object writableValue = null == value ? value : PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti).getPrimitiveWritableObject(value);
            arguments[i] = new DeferredJavaObject(writableValue);
            argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
        } else if (desc instanceof ExprNodeGenericFuncDesc) {
            ExprNodeDesc evaluatedFn = foldExpr((ExprNodeGenericFuncDesc) desc);
            if (null == evaluatedFn || !(evaluatedFn instanceof ExprNodeConstantDesc)) {
                return null;
            }
            ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn;
            if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
                // nested complex types cannot be folded cleanly
                return null;
            }
            Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue());
            arguments[i] = new DeferredJavaObject(writableValue);
            argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
        } else {
            return null;
        }
    }
    try {
        ObjectInspector oi = udf.initialize(argois);
        Object o = udf.evaluate(arguments);
        if (LOG.isDebugEnabled()) {
            LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o);
        }
        if (o == null) {
            return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
        }
        Class<?> clz = o.getClass();
        if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) {
            PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
            TypeInfo typeInfo = poi.getTypeInfo();
            o = poi.getPrimitiveJavaObject(o);
            if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
                return new ExprNodeConstantDesc(typeInfo, o);
            }
        } else if (udf instanceof GenericUDFStruct && oi instanceof StandardConstantStructObjectInspector) {
            // do not fold named_struct, only struct()
            ConstantObjectInspector coi = (ConstantObjectInspector) oi;
            TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi);
            return new ExprNodeConstantDesc(structType, ObjectInspectorUtils.copyToStandardJavaObject(o, coi));
        } else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
            if (LOG.isErrorEnabled()) {
                LOG.error("Unable to evaluate {}({}). Return value unrecoginizable.", udf.getClass().getName(), exprs);
            }
            return null;
        } else {
        // fall through
        }
        String constStr = null;
        if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
            // remember original string representation of constant.
            constStr = arguments[0].get().toString();
        }
        return new ExprNodeConstantDesc(o).setFoldedFromVal(constStr);
    } catch (HiveException e) {
        LOG.error("Evaluation function {}({}) failed in Constant Propagation Optimizer.", udf.getClass().getName(), exprs);
        throw new RuntimeException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) GenericUDFStruct(org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct)

Example 55 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class ConstantPropagateProcFactory method foldExprFull.

/**
 * Fold input expression desc.
 *
 * This function recursively checks if any subexpression of a specified expression
 * can be evaluated to be constant and replaces such subexpression with the constant.
 * If the expression is a deterministic UDF and all the subexpressions are constants,
 * the value will be calculated immediately (during compilation time vs. runtime).
 * e.g.:
 *   concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
 *   unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
 * @param desc folding expression
 * @param constants current propagated constant map
 * @param cppCtx
 * @param op processing operator
 * @param propagate if true, assignment expressions will be added to constants.
 * @return fold expression
 * @throws UDFArgumentException
 */
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
    // Combine NOT operator with the child operator. Otherwise, the following optimization
    // from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
    // should not be optimized to not(x > 3), but (x <=3 or x is null).
    desc = foldNegative(desc);
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
        GenericUDF udf = funcDesc.getGenericUDF();
        boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
        List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDesc childExpr : desc.getChildren()) {
            newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
        }
        // Don't evaluate nondeterministic function since the value can only calculate during runtime.
        if (!isConstantFoldableUdf(udf, newExprs)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
            }
            ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            return desc;
        } else {
            // If all child expressions of deterministic function are constants, evaluate such UDF immediately
            ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
            if (constant != null) {
                LOG.debug("Folding expression: {} -> {}", desc, constant);
                return constant;
            } else {
                // Check if the function can be short cut.
                ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
                if (shortcut != null) {
                    LOG.debug("Folding expression: {} -> {}", desc, shortcut);
                    return shortcut;
                }
                ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            }
            // constant, add them to colToConstants as half-deterministic columns.
            if (propagate) {
                propagate(udf, newExprs, op.getSchema(), constants);
            }
        }
        return desc;
    } else if (desc instanceof ExprNodeColumnDesc) {
        if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
            return desc;
        }
        Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
        ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
        if (col != null) {
            LOG.debug("Folding expression: {} -> {}", desc, col);
            return col;
        }
    }
    return desc;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Serializable(java.io.Serializable) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)228 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)165 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)134 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)123 ArrayList (java.util.ArrayList)106 Test (org.junit.Test)92 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)49 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)44 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)38 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)37 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)30 List (java.util.List)29 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)28 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)23 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)22 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)22 HashMap (java.util.HashMap)21 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)21