Search in sources :

Example 11 with ColumnStatistics

use of org.apache.orc.ColumnStatistics in project hive by apache.

the class OrcInputFormat method isStripeSatisfyPredicate.

private static boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) {
    List<PredicateLeaf> predLeaves = sarg.getLeaves();
    TruthValue[] truthValues = new TruthValue[predLeaves.size()];
    for (int pred = 0; pred < truthValues.length; pred++) {
        if (filterColumns[pred] != -1) {
            if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
                truthValues[pred] = TruthValue.YES_NO_NULL;
            } else {
                // column statistics at index 0 contains only the number of rows
                ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
                PredicateLeaf leaf = predLeaves.get(pred);
                try {
                    truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, leaf, null);
                } catch (NoDynamicValuesException dve) {
                    LOG.debug("Dynamic values are not available here {}", dve.getMessage());
                    boolean hasNulls = stats.hasNull() || leaf.getOperator() != Operator.NULL_SAFE_EQUALS;
                    truthValues[pred] = hasNulls ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
                }
            }
        } else {
            // parition column case.
            // partition filter will be evaluated by partition pruner so
            // we will not evaluate partition filter here.
            truthValues[pred] = TruthValue.YES_NO_NULL;
        }
    }
    return sarg.evaluate(truthValues).isNeeded();
}
Also used : ColumnStatistics(org.apache.orc.ColumnStatistics) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) TruthValue(org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue) NoDynamicValuesException(org.apache.hadoop.hive.ql.plan.DynamicValue.NoDynamicValuesException)

Aggregations

ColumnStatistics (org.apache.orc.ColumnStatistics)11 BinaryColumnStatistics (org.apache.orc.BinaryColumnStatistics)10 BooleanColumnStatistics (org.apache.orc.BooleanColumnStatistics)10 DoubleColumnStatistics (org.apache.orc.DoubleColumnStatistics)10 IntegerColumnStatistics (org.apache.orc.IntegerColumnStatistics)10 StringColumnStatistics (org.apache.orc.StringColumnStatistics)10 Test (org.junit.Test)10 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)9 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)9 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)8 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)6 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)6 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)6 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)6 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)6 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)6 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)6 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)6 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)6