use of org.apache.parquet.column.statistics.IntStatistics in project drill by apache.
the class ParquetMetaStatCollector method getStat.
private ColumnStatistics getStat(Object min, Object max, Long numNull, PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType, Integer repetitionLevel) {
Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
Statistics convertedStat = stat;
TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType);
// Change to repeated if repetitionLevel > 0
if (repetitionLevel != null && repetitionLevel > 0) {
type = TypeProtos.MajorType.newBuilder().setMinorType(type.getMinorType()).setMode(TypeProtos.DataMode.REPEATED).build();
}
if (numNull != null) {
stat.setNumNulls(numNull.longValue());
}
if (min != null && max != null) {
switch(type.getMinorType()) {
case INT:
case TIME:
((IntStatistics) stat).setMinMax(Integer.parseInt(min.toString()), Integer.parseInt(max.toString()));
break;
case BIGINT:
case TIMESTAMP:
((LongStatistics) stat).setMinMax(Long.parseLong(min.toString()), Long.parseLong(max.toString()));
break;
case FLOAT4:
((FloatStatistics) stat).setMinMax(Float.parseFloat(min.toString()), Float.parseFloat(max.toString()));
break;
case FLOAT8:
((DoubleStatistics) stat).setMinMax(Double.parseDouble(min.toString()), Double.parseDouble(max.toString()));
break;
case DATE:
convertedStat = new LongStatistics();
convertedStat.setNumNulls(stat.getNumNulls());
final long minMS = convertToDrillDateValue(Integer.parseInt(min.toString()));
final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
((LongStatistics) convertedStat).setMinMax(minMS, maxMS);
break;
default:
}
}
return new ColumnStatistics(convertedStat, type);
}
use of org.apache.parquet.column.statistics.IntStatistics in project drill by apache.
the class RangeExprEvaluator method visitUnknown.
@Override
public Statistics visitUnknown(LogicalExpression e, Void value) throws RuntimeException {
if (e instanceof TypedFieldExpr) {
TypedFieldExpr fieldExpr = (TypedFieldExpr) e;
final ColumnStatistics columnStatistics = columnStatMap.get(fieldExpr.getPath());
if (columnStatistics != null) {
return columnStatistics.getStatistics();
} else {
// field does not exist.
Preconditions.checkArgument(fieldExpr.getMajorType().equals(Types.OPTIONAL_INT));
IntStatistics intStatistics = new IntStatistics();
// all values are nulls
intStatistics.setNumNulls(rowCount);
return intStatistics;
}
}
return null;
}
use of org.apache.parquet.column.statistics.IntStatistics in project drill by apache.
the class RangeExprEvaluator method getStatistics.
private IntStatistics getStatistics(int min, int max) {
final IntStatistics intStatistics = new IntStatistics();
intStatistics.setMinMax(min, max);
return intStatistics;
}
use of org.apache.parquet.column.statistics.IntStatistics in project drill by apache.
the class RangeExprEvaluator method evalCastFunc.
private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input) {
try {
DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder();
DrillSimpleFunc interpreter = funcHolder.createInterpreter();
final ValueHolder minHolder, maxHolder;
TypeProtos.MinorType srcType = holderExpr.args.get(0).getMajorType().getMinorType();
TypeProtos.MinorType destType = holderExpr.getMajorType().getMinorType();
if (srcType.equals(destType)) {
// same type cast ==> NoOp.
return input;
} else if (!CAST_FUNC.containsKey(srcType) || !CAST_FUNC.get(srcType).contains(destType)) {
// cast func between srcType and destType is NOT allowed.
return null;
}
switch(srcType) {
case INT:
minHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMin());
maxHolder = ValueHolderHelper.getIntHolder(((IntStatistics) input).getMax());
break;
case BIGINT:
minHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMin());
maxHolder = ValueHolderHelper.getBigIntHolder(((LongStatistics) input).getMax());
break;
case FLOAT4:
minHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMin());
maxHolder = ValueHolderHelper.getFloat4Holder(((FloatStatistics) input).getMax());
break;
case FLOAT8:
minHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMin());
maxHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics) input).getMax());
break;
default:
return null;
}
final ValueHolder[] args1 = { minHolder };
final ValueHolder[] args2 = { maxHolder };
final ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
final ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
switch(destType) {
//TODO : need handle # of nulls.
case INT:
return getStatistics(((IntHolder) minFuncHolder).value, ((IntHolder) maxFuncHolder).value);
case BIGINT:
return getStatistics(((BigIntHolder) minFuncHolder).value, ((BigIntHolder) maxFuncHolder).value);
case FLOAT4:
return getStatistics(((Float4Holder) minFuncHolder).value, ((Float4Holder) maxFuncHolder).value);
case FLOAT8:
return getStatistics(((Float8Holder) minFuncHolder).value, ((Float8Holder) maxFuncHolder).value);
default:
return null;
}
} catch (Exception e) {
throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName());
}
}
use of org.apache.parquet.column.statistics.IntStatistics in project drill by apache.
the class ParquetFooterStatCollector method convertDateStatIfNecessary.
public static Statistics convertDateStatIfNecessary(Statistics stat, ParquetReaderUtility.DateCorruptionStatus containsCorruptDates) {
IntStatistics dateStat = (IntStatistics) stat;
LongStatistics dateMLS = new LongStatistics();
boolean isDateCorrect = containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_NO_CORRUPTION;
// Only do conversion when stat is NOT empty.
if (!dateStat.isEmpty()) {
dateMLS.setMinMax(convertToDrillDateValue(dateStat.getMin(), isDateCorrect), convertToDrillDateValue(dateStat.getMax(), isDateCorrect));
dateMLS.setNumNulls(dateStat.getNumNulls());
}
return dateMLS;
}
Aggregations