Search in sources :

Example 1 with DoubleStatistics

use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by apache.

the class RangeExprEvaluator method getStatistics.

private DoubleStatistics getStatistics(double min, double max) {
    final DoubleStatistics doubleStatistics = new DoubleStatistics();
    doubleStatistics.setMinMax(min, max);
    return doubleStatistics;
}
Also used : DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics)

Example 2 with DoubleStatistics

use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by apache.

the class ParquetMetaStatCollector method getStat.

private ColumnStatistics getStat(Object min, Object max, Long numNull, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, Integer repetitionLevel) {
    Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
    Statistics convertedStat = stat;
    TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType);
    // Change to repeated if repetitionLevel > 0
    if (repetitionLevel != null && repetitionLevel > 0) {
        type = TypeProtos.MajorType.newBuilder().setMinorType(type.getMinorType()).setMode(TypeProtos.DataMode.REPEATED).build();
    }
    if (numNull != null) {
        stat.setNumNulls(numNull.longValue());
    }
    if (min != null && max != null) {
        switch(type.getMinorType()) {
            case INT:
            case TIME:
                ((IntStatistics) stat).setMinMax(Integer.parseInt(min.toString()), Integer.parseInt(max.toString()));
                break;
            case BIGINT:
            case TIMESTAMP:
                ((LongStatistics) stat).setMinMax(Long.parseLong(min.toString()), Long.parseLong(max.toString()));
                break;
            case FLOAT4:
                ((FloatStatistics) stat).setMinMax(Float.parseFloat(min.toString()), Float.parseFloat(max.toString()));
                break;
            case FLOAT8:
                ((DoubleStatistics) stat).setMinMax(Double.parseDouble(min.toString()), Double.parseDouble(max.toString()));
                break;
            case DATE:
                convertedStat = new LongStatistics();
                convertedStat.setNumNulls(stat.getNumNulls());
                final long minMS = convertToDrillDateValue(Integer.parseInt(min.toString()));
                final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
                ((LongStatistics) convertedStat).setMinMax(minMS, maxMS);
                break;
            default:
        }
    }
    return new ColumnStatistics(convertedStat, type);
}
Also used : LongStatistics(org.apache.parquet.column.statistics.LongStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) BinaryStatistics(org.apache.parquet.column.statistics.BinaryStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) Statistics(org.apache.parquet.column.statistics.Statistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) LongStatistics(org.apache.parquet.column.statistics.LongStatistics) TypeProtos(org.apache.drill.common.types.TypeProtos)

Example 3 with DoubleStatistics

use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by axbaretto.

the class RangeExprEvaluator method evalCastFunc.

private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input) {
    try {
        DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder();
        DrillSimpleFunc interpreter = funcHolder.createInterpreter();
        final ValueHolder minHolder, maxHolder;
        TypeProtos.MinorType srcType = holderExpr.args.get(0).getMajorType().getMinorType();
        TypeProtos.MinorType destType = holderExpr.getMajorType().getMinorType();
        if (srcType.equals(destType)) {
            // same type cast ==> NoOp.
            return input;
        } else if (!CAST_FUNC.containsKey(srcType) || !CAST_FUNC.get(srcType).contains(destType)) {
            // cast func between srcType and destType is NOT allowed.
            return null;
        }
        switch(srcType) {
            case INT:
                minHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMin());
                maxHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMax());
                break;
            case BIGINT:
                minHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMin());
                maxHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMax());
                break;
            case FLOAT4:
                minHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMin());
                maxHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMax());
                break;
            case FLOAT8:
                minHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMin());
                maxHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMax());
                break;
            case DATE:
                minHolder = ValueHolderHelper.getDateHolder(((LongStatistics) input).getMin());
                maxHolder = ValueHolderHelper.getDateHolder(((LongStatistics) input).getMax());
                break;
            default:
                return null;
        }
        final ValueHolder[] args1 = { minHolder };
        final ValueHolder[] args2 = { maxHolder };
        final ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
        final ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
        switch(destType) {
            // TODO : need handle # of nulls.
            case INT:
                return getStatistics(((IntHolder) minFuncHolder).value, ((IntHolder) maxFuncHolder).value);
            case BIGINT:
                return getStatistics(((BigIntHolder) minFuncHolder).value, ((BigIntHolder) maxFuncHolder).value);
            case FLOAT4:
                return getStatistics(((Float4Holder) minFuncHolder).value, ((Float4Holder) maxFuncHolder).value);
            case FLOAT8:
                return getStatistics(((Float8Holder) minFuncHolder).value, ((Float8Holder) maxFuncHolder).value);
            case TIMESTAMP:
                return getStatistics(((TimeStampHolder) minFuncHolder).value, ((TimeStampHolder) maxFuncHolder).value);
            default:
                return null;
        }
    } catch (Exception e) {
        throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName());
    }
}
Also used : LongStatistics(org.apache.parquet.column.statistics.LongStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) ValueHolder(org.apache.drill.exec.expr.holders.ValueHolder) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) TypeProtos(org.apache.drill.common.types.TypeProtos) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) DrillSimpleFuncHolder(org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder) DrillSimpleFunc(org.apache.drill.exec.expr.DrillSimpleFunc)

Example 4 with DoubleStatistics

use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by axbaretto.

the class ParquetMetaStatCollector method getStat.

/**
 * Builds column statistics using given primitiveType, originalType, scale,
 * precision, numNull, min and max values.
 *
 * @param min             min value for statistics
 * @param max             max value for statistics
 * @param numNull         num_nulls for statistics
 * @param primitiveType   type that determines statistics class
 * @param originalType    type that determines statistics class
 * @param scale           scale value (used for DECIMAL type)
 * @param precision       precision value (used for DECIMAL type)
 * @return column statistics
 */
private ColumnStatistics getStat(Object min, Object max, Long numNull, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int scale, int precision) {
    Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
    Statistics convertedStat = stat;
    TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType, scale, precision);
    if (numNull != null) {
        stat.setNumNulls(numNull);
    }
    if (min != null && max != null) {
        switch(type.getMinorType()) {
            case INT:
            case TIME:
                ((IntStatistics) stat).setMinMax(Integer.parseInt(min.toString()), Integer.parseInt(max.toString()));
                break;
            case BIGINT:
            case TIMESTAMP:
                ((LongStatistics) stat).setMinMax(Long.parseLong(min.toString()), Long.parseLong(max.toString()));
                break;
            case FLOAT4:
                ((FloatStatistics) stat).setMinMax(Float.parseFloat(min.toString()), Float.parseFloat(max.toString()));
                break;
            case FLOAT8:
                ((DoubleStatistics) stat).setMinMax(Double.parseDouble(min.toString()), Double.parseDouble(max.toString()));
                break;
            case DATE:
                convertedStat = new LongStatistics();
                convertedStat.setNumNulls(stat.getNumNulls());
                final long minMS = convertToDrillDateValue(Integer.parseInt(min.toString()));
                final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
                ((LongStatistics) convertedStat).setMinMax(minMS, maxMS);
                break;
            case BIT:
                ((BooleanStatistics) stat).setMinMax(Boolean.parseBoolean(min.toString()), Boolean.parseBoolean(max.toString()));
                break;
            default:
        }
    }
    return new ColumnStatistics(convertedStat, type);
}
Also used : LongStatistics(org.apache.parquet.column.statistics.LongStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) BooleanStatistics(org.apache.parquet.column.statistics.BooleanStatistics) BinaryStatistics(org.apache.parquet.column.statistics.BinaryStatistics) FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) Statistics(org.apache.parquet.column.statistics.Statistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) LongStatistics(org.apache.parquet.column.statistics.LongStatistics) BooleanStatistics(org.apache.parquet.column.statistics.BooleanStatistics) TypeProtos(org.apache.drill.common.types.TypeProtos)

Example 5 with DoubleStatistics

use of org.apache.parquet.column.statistics.DoubleStatistics in project parquet-mr by apache.

the class TestStatisticsFilter method testClearExceptionForNots.

@Test
public void testClearExceptionForNots() {
    List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
    FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
    try {
        canDrop(pred, columnMetas);
        fail("This should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
    }
}
Also used : IntStatistics(org.apache.parquet.column.statistics.IntStatistics) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Aggregations

DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)9 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)5 TypeProtos (org.apache.drill.common.types.TypeProtos)4 FloatStatistics (org.apache.parquet.column.statistics.FloatStatistics)4 LongStatistics (org.apache.parquet.column.statistics.LongStatistics)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)2 DrillSimpleFunc (org.apache.drill.exec.expr.DrillSimpleFunc)2 DrillSimpleFuncHolder (org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder)2 ValueHolder (org.apache.drill.exec.expr.holders.ValueHolder)2 BinaryStatistics (org.apache.parquet.column.statistics.BinaryStatistics)2 Statistics (org.apache.parquet.column.statistics.Statistics)2 BooleanStatistics (org.apache.parquet.column.statistics.BooleanStatistics)1 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)1 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)1 Test (org.junit.Test)1