use of org.apache.hadoop.hive.ql.udf.generic.NDV in project hive by apache.
the class StatsUtils method getNDVFor.
private static long getNDVFor(ExprNodeGenericFuncDesc engfd, long numRows, Statistics parentStats) {
GenericUDF udf = engfd.getGenericUDF();
if (!FunctionRegistry.isDeterministic(udf)) {
return numRows;
}
List<Long> ndvs = Lists.newArrayList();
Class<?> udfClass = udf instanceof GenericUDFBridge ? ((GenericUDFBridge) udf).getUdfClass() : udf.getClass();
NDV ndv = AnnotationUtils.getAnnotation(udfClass, NDV.class);
long udfNDV = Long.MAX_VALUE;
if (ndv != null) {
udfNDV = ndv.maxNdv();
} else {
for (String col : engfd.getCols()) {
ColStatistics stats = parentStats.getColumnStatisticsFromColName(col);
if (stats != null) {
ndvs.add(stats.getCountDistint());
}
}
}
long countDistincts = ndvs.isEmpty() ? numRows : addWithExpDecay(ndvs);
return Collections.min(Lists.newArrayList(countDistincts, udfNDV, numRows));
}
Aggregations