use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by apache.
the class RangeExprEvaluator method getStatistics.
private DoubleStatistics getStatistics(double min, double max) {
final DoubleStatistics doubleStatistics = new DoubleStatistics();
doubleStatistics.setMinMax(min, max);
return doubleStatistics;
}
use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by apache.
the class ParquetMetaStatCollector method getStat.
private ColumnStatistics getStat(Object min, Object max, Long numNull, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, Integer repetitionLevel) {
Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
Statistics convertedStat = stat;
TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType);
// Change to repeated if repetitionLevel > 0
if (repetitionLevel != null && repetitionLevel > 0) {
type = TypeProtos.MajorType.newBuilder().setMinorType(type.getMinorType()).setMode(TypeProtos.DataMode.REPEATED).build();
}
if (numNull != null) {
stat.setNumNulls(numNull.longValue());
}
if (min != null && max != null) {
switch(type.getMinorType()) {
case INT:
case TIME:
((IntStatistics) stat).setMinMax(Integer.parseInt(min.toString()), Integer.parseInt(max.toString()));
break;
case BIGINT:
case TIMESTAMP:
((LongStatistics) stat).setMinMax(Long.parseLong(min.toString()), Long.parseLong(max.toString()));
break;
case FLOAT4:
((FloatStatistics) stat).setMinMax(Float.parseFloat(min.toString()), Float.parseFloat(max.toString()));
break;
case FLOAT8:
((DoubleStatistics) stat).setMinMax(Double.parseDouble(min.toString()), Double.parseDouble(max.toString()));
break;
case DATE:
convertedStat = new LongStatistics();
convertedStat.setNumNulls(stat.getNumNulls());
final long minMS = convertToDrillDateValue(Integer.parseInt(min.toString()));
final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
((LongStatistics) convertedStat).setMinMax(minMS, maxMS);
break;
default:
}
}
return new ColumnStatistics(convertedStat, type);
}
use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by axbaretto.
the class RangeExprEvaluator method evalCastFunc.
private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input) {
try {
DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder();
DrillSimpleFunc interpreter = funcHolder.createInterpreter();
final ValueHolder minHolder, maxHolder;
TypeProtos.MinorType srcType = holderExpr.args.get(0).getMajorType().getMinorType();
TypeProtos.MinorType destType = holderExpr.getMajorType().getMinorType();
if (srcType.equals(destType)) {
// same type cast ==> NoOp.
return input;
} else if (!CAST_FUNC.containsKey(srcType) || !CAST_FUNC.get(srcType).contains(destType)) {
// cast func between srcType and destType is NOT allowed.
return null;
}
switch(srcType) {
case INT:
minHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMin());
maxHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMax());
break;
case BIGINT:
minHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMin());
maxHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMax());
break;
case FLOAT4:
minHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMin());
maxHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMax());
break;
case FLOAT8:
minHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMin());
maxHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMax());
break;
case DATE:
minHolder = ValueHolderHelper.getDateHolder(((LongStatistics) input).getMin());
maxHolder = ValueHolderHelper.getDateHolder(((LongStatistics) input).getMax());
break;
default:
return null;
}
final ValueHolder[] args1 = { minHolder };
final ValueHolder[] args2 = { maxHolder };
final ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
final ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
switch(destType) {
// TODO : need handle # of nulls.
case INT:
return getStatistics(((IntHolder) minFuncHolder).value, ((IntHolder) maxFuncHolder).value);
case BIGINT:
return getStatistics(((BigIntHolder) minFuncHolder).value, ((BigIntHolder) maxFuncHolder).value);
case FLOAT4:
return getStatistics(((Float4Holder) minFuncHolder).value, ((Float4Holder) maxFuncHolder).value);
case FLOAT8:
return getStatistics(((Float8Holder) minFuncHolder).value, ((Float8Holder) maxFuncHolder).value);
case TIMESTAMP:
return getStatistics(((TimeStampHolder) minFuncHolder).value, ((TimeStampHolder) maxFuncHolder).value);
default:
return null;
}
} catch (Exception e) {
throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName());
}
}
use of org.apache.parquet.column.statistics.DoubleStatistics in project drill by axbaretto.
the class ParquetMetaStatCollector method getStat.
/**
* Builds column statistics using given primitiveType, originalType, scale,
* precision, numNull, min and max values.
*
* @param min min value for statistics
* @param max max value for statistics
* @param numNull num_nulls for statistics
* @param primitiveType type that determines statistics class
* @param originalType type that determines statistics class
* @param scale scale value (used for DECIMAL type)
* @param precision precision value (used for DECIMAL type)
* @return column statistics
*/
private ColumnStatistics getStat(Object min, Object max, Long numNull, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, int scale, int precision) {
Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
Statistics convertedStat = stat;
TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType, scale, precision);
if (numNull != null) {
stat.setNumNulls(numNull);
}
if (min != null && max != null) {
switch(type.getMinorType()) {
case INT:
case TIME:
((IntStatistics) stat).setMinMax(Integer.parseInt(min.toString()), Integer.parseInt(max.toString()));
break;
case BIGINT:
case TIMESTAMP:
((LongStatistics) stat).setMinMax(Long.parseLong(min.toString()), Long.parseLong(max.toString()));
break;
case FLOAT4:
((FloatStatistics) stat).setMinMax(Float.parseFloat(min.toString()), Float.parseFloat(max.toString()));
break;
case FLOAT8:
((DoubleStatistics) stat).setMinMax(Double.parseDouble(min.toString()), Double.parseDouble(max.toString()));
break;
case DATE:
convertedStat = new LongStatistics();
convertedStat.setNumNulls(stat.getNumNulls());
final long minMS = convertToDrillDateValue(Integer.parseInt(min.toString()));
final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
((LongStatistics) convertedStat).setMinMax(minMS, maxMS);
break;
case BIT:
((BooleanStatistics) stat).setMinMax(Boolean.parseBoolean(min.toString()), Boolean.parseBoolean(max.toString()));
break;
default:
}
}
return new ColumnStatistics(convertedStat, type);
}
use of org.apache.parquet.column.statistics.DoubleStatistics in project parquet-mr by apache.
the class TestStatisticsFilter method testClearExceptionForNots.
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(getDoubleColumnMeta(new DoubleStatistics(), 0L), getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?" + " not(eq(double.column, 12.0))", e.getMessage());
}
}
Aggregations