Search in sources :

Example 1 with StatEstimatorProvider

use of org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider in project hive by apache.

the class StatsUtils method getColStatisticsFromExpression.

/**
 * Get column statistics expression nodes
 * @param conf
 *          - hive conf
 * @param parentStats
 *          - parent statistics
 * @param end
 *          - expression nodes
 * @return column statistics
 */
public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end) {
    if (end == null) {
        return null;
    }
    String colName = null;
    String colType = null;
    double avgColSize = 0;
    long countDistincts = 0;
    long numNulls = 0;
    ObjectInspector oi = end.getWritableObjectInspector();
    long numRows = parentStats.getNumRows();
    if (end instanceof ExprNodeColumnDesc) {
        // column projection
        ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
        colName = encd.getColumn();
        if (encd.getIsPartitionColOrVirtualCol()) {
            ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
            if (colStats != null) {
                /* If statistics for the column already exist use it. */
                return colStats.clone();
            }
            // virtual columns
            colType = encd.getTypeInfo().getTypeName();
            countDistincts = numRows;
        } else {
            // clone the column stats and return
            ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
            if (result != null) {
                return result.clone();
            }
            return null;
        }
    } else if (end instanceof ExprNodeConstantDesc) {
        return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end);
    } else if (end instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
        colName = engfd.getName();
        colType = engfd.getTypeString();
        // If it is a widening cast, we do not change NDV, min, max
        if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
            // cast on single column
            ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
            if (stats != null) {
                ColStatistics newStats;
                newStats = stats.clone();
                newStats.setColumnName(colName);
                colType = colType.toLowerCase();
                newStats.setColumnType(colType);
                newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
                return newStats;
            }
        }
        if (conf.getBoolVar(ConfVars.HIVE_STATS_ESTIMATORS_ENABLE)) {
            Optional<StatEstimatorProvider> sep = engfd.getGenericUDF().adapt(StatEstimatorProvider.class);
            if (sep.isPresent()) {
                StatEstimator se = sep.get().getStatEstimator();
                List<ColStatistics> csList = new ArrayList<ColStatistics>();
                for (ExprNodeDesc child : engfd.getChildren()) {
                    ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child);
                    if (cs == null) {
                        break;
                    }
                    csList.add(cs);
                }
                if (csList.size() == engfd.getChildren().size()) {
                    Optional<ColStatistics> res = se.estimate(csList);
                    if (res.isPresent()) {
                        ColStatistics newStats = res.get();
                        colType = colType.toLowerCase();
                        newStats.setColumnType(colType);
                        newStats.setColumnName(colName);
                        return newStats;
                    }
                }
            }
        }
        // fallback to default
        countDistincts = getNDVFor(engfd, numRows, parentStats);
    } else if (end instanceof ExprNodeColumnListDesc) {
        // column list
        ExprNodeColumnListDesc encd = (ExprNodeColumnListDesc) end;
        colName = Joiner.on(",").join(encd.getCols());
        colType = serdeConstants.LIST_TYPE_NAME;
        countDistincts = numRows;
    } else if (end instanceof ExprNodeFieldDesc) {
        // field within complex type
        ExprNodeFieldDesc enfd = (ExprNodeFieldDesc) end;
        colName = enfd.getFieldName();
        colType = enfd.getTypeString();
        countDistincts = numRows;
    } else if (end instanceof ExprDynamicParamDesc) {
        // possible to create colstats object
        return null;
    } else {
        throw new IllegalArgumentException("not supported expr type " + end.getClass());
    }
    colType = colType.toLowerCase();
    avgColSize = getAvgColLenOf(conf, oi, colType);
    ColStatistics colStats = new ColStatistics(colName, colType);
    colStats.setAvgColLen(avgColSize);
    colStats.setCountDistint(countDistincts);
    colStats.setNumNulls(numNulls);
    return colStats;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableTimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampLocalTZObjectInspector) StandardConstantListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector) StandardConstantMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) StandardConstantStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) StatEstimator(org.apache.hadoop.hive.ql.stats.estimator.StatEstimator) ExprNodeColumnListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc) ExprDynamicParamDesc(org.apache.hadoop.hive.ql.plan.ExprDynamicParamDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) StatEstimatorProvider(org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ArrayList (java.util.ArrayList)1 ColStatistics (org.apache.hadoop.hive.ql.plan.ColStatistics)1 ExprDynamicParamDesc (org.apache.hadoop.hive.ql.plan.ExprDynamicParamDesc)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeColumnListDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 StatEstimator (org.apache.hadoop.hive.ql.stats.estimator.StatEstimator)1 StatEstimatorProvider (org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider)1 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StandardConstantListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector)1 StandardConstantMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector)1 StandardConstantStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector)1 StandardListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector)1 StandardMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)1