Search in sources :

Example 16 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class StatsUtils method getNDVFor.

private static long getNDVFor(ExprNodeGenericFuncDesc engfd, long numRows, Statistics parentStats) {
    GenericUDF udf = engfd.getGenericUDF();
    if (!FunctionRegistry.isDeterministic(udf) && !FunctionRegistry.isRuntimeConstant(udf)) {
        return numRows;
    }
    List<Long> ndvs = Lists.newArrayList();
    Class<?> udfClass = udf instanceof GenericUDFBridge ? ((GenericUDFBridge) udf).getUdfClass() : udf.getClass();
    NDV ndv = AnnotationUtils.getAnnotation(udfClass, NDV.class);
    long udfNDV = Long.MAX_VALUE;
    if (ndv != null) {
        udfNDV = ndv.maxNdv();
    } else {
        for (String col : engfd.getCols()) {
            ColStatistics stats = parentStats.getColumnStatisticsFromColName(col);
            if (stats != null) {
                ndvs.add(stats.getCountDistint());
            }
        }
    }
    long countDistincts = ndvs.isEmpty() ? numRows : addWithExpDecay(ndvs);
    return Collections.min(Lists.newArrayList(countDistincts, udfNDV, numRows));
}
Also used : NDV(org.apache.hadoop.hive.ql.udf.generic.NDV) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge)

Example 17 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class TestAccumuloRangeGenerator method testCastExpression.

@Test
public void testCastExpression() throws Exception {
    // 40 and 50
    ExprNodeDesc fourty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 40), fifty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 50);
    // +
    GenericUDFOPPlus plus = new GenericUDFOPPlus();
    // 40 + 50
    ExprNodeGenericFuncDesc addition = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, plus, Arrays.asList(fourty, fifty));
    // cast(.... as string)
    UDFToString stringCast = new UDFToString();
    GenericUDFBridge stringCastBridge = new GenericUDFBridge("cast", false, stringCast.getClass().getName());
    // cast (40 + 50 as string)
    ExprNodeGenericFuncDesc cast = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, stringCastBridge, "cast", Collections.<ExprNodeDesc>singletonList(addition));
    ExprNodeDesc key = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "key", null, false);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(key, cast));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "key");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(node);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    // Don't fail -- would be better to actually compute a range of [90,+inf)
    Object result = nodeOutput.get(node);
    Assert.assertNull(result);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Test(org.junit.Test)

Example 18 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class ExprNodeDescUtils method foldConstant.

private static ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) {
    GenericUDF udf = func.getGenericUDF();
    if (!FunctionRegistry.isConsistentWithinQuery(udf)) {
        return null;
    }
    try {
        // resources may not be available at compile time.
        if (udf instanceof GenericUDFBridge) {
            UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null);
            if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) {
                return null;
            }
        } else {
            if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) {
                return null;
            }
        }
        if (func.getChildren() != null) {
            for (ExprNodeDesc child : func.getChildren()) {
                if (child instanceof ExprNodeConstantDesc) {
                    continue;
                }
                if (child instanceof ExprNodeGenericFuncDesc) {
                    if (foldConstant((ExprNodeGenericFuncDesc) child) != null) {
                        continue;
                    }
                }
                return null;
            }
        }
        ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func);
        ObjectInspector output = evaluator.initialize(null);
        Object constant = evaluator.evaluate(null);
        Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
        return new ExprNodeConstantDesc(java);
    } catch (Exception e) {
        return null;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) UDF(org.apache.hadoop.hive.ql.exec.UDF) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

GenericUDFBridge (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge)18 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)8 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)8 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 UDF (org.apache.hadoop.hive.ql.exec.UDF)3 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)3 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)3 GenericUDFToUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp)3 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)2 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)2 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)2 GenericUDFToBinary (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary)2 GenericUDFToChar (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar)2 GenericUDFToDate (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate)2 GenericUDFToDecimal (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal)2