use of org.apache.parquet.column.statistics.Statistics in project parquet-mr by apache.
the class StatisticsFilter method visit.
private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean visit(UserDefined<T, U> ud, boolean inverted) {
Column<T> filterColumn = ud.getColumn();
ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
U udp = ud.getUserDefinedPredicate();
if (columnChunk == null) {
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
return udp.keep(null);
} else {
return !udp.keep(null);
}
}
Statistics<T> stats = columnChunk.getStatistics();
if (stats.isEmpty()) {
// we have no statistics available, we cannot drop any chunks
return BLOCK_MIGHT_MATCH;
}
if (isAllNulls(columnChunk)) {
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
return udp.keep(null);
} else {
return !udp.keep(null);
}
}
if (!stats.hasNonNullValue()) {
// stats does not contain min/max values, we cannot drop any chunks
return BLOCK_MIGHT_MATCH;
}
org.apache.parquet.filter2.predicate.Statistics<T> udpStats = new org.apache.parquet.filter2.predicate.Statistics<T>(stats.genericGetMin(), stats.genericGetMax(), stats.comparator());
if (inverted) {
return udp.inverseCanDrop(udpStats);
} else {
return udp.canDrop(udpStats);
}
}
use of org.apache.parquet.column.statistics.Statistics in project parquet-mr by apache.
the class DumpCommand method dump.
public static void dump(final PrettyPrintWriter out, PageReadStore store, ColumnDescriptor column) throws IOException {
PageReader reader = store.getPageReader(column);
long vc = reader.getTotalValueCount();
int rmax = column.getMaxRepetitionLevel();
int dmax = column.getMaxDefinitionLevel();
out.format("%s TV=%d RL=%d DL=%d", Joiner.on('.').skipNulls().join(column.getPath()), vc, rmax, dmax);
DictionaryPage dict = reader.readDictionaryPage();
if (dict != null) {
out.format(" DS:%d", dict.getDictionarySize());
out.format(" DE:%s", dict.getEncoding());
}
out.println();
out.rule('-');
DataPage page = reader.readPage();
for (long count = 0; page != null; count++) {
out.format("page %d:", count);
page.accept(new Visitor<Void>() {
@Override
public Void visit(DataPageV1 pageV1) {
out.format(" DLE:%s", pageV1.getDlEncoding());
out.format(" RLE:%s", pageV1.getRlEncoding());
out.format(" VLE:%s", pageV1.getValueEncoding());
Statistics<?> statistics = pageV1.getStatistics();
if (statistics != null) {
out.format(" ST:[%s]", statistics);
} else {
out.format(" ST:[none]");
}
return null;
}
@Override
public Void visit(DataPageV2 pageV2) {
out.format(" DLE:RLE");
out.format(" RLE:RLE");
out.format(" VLE:%s", pageV2.getDataEncoding());
Statistics<?> statistics = pageV2.getStatistics();
if (statistics != null) {
out.format(" ST:[%s]", statistics);
} else {
out.format(" ST:[none]");
}
return null;
}
});
out.format(" SZ:%d", page.getUncompressedSize());
out.format(" VC:%d", page.getValueCount());
out.println();
page = reader.readPage();
}
}
use of org.apache.parquet.column.statistics.Statistics in project drill by apache.
the class RangeExprEvaluator method visitFunctionHolderExpression.
@Override
public Statistics visitFunctionHolderExpression(FunctionHolderExpression holderExpr, Void value) throws RuntimeException {
FuncHolder funcHolder = holderExpr.getHolder();
if (!(funcHolder instanceof DrillSimpleFuncHolder)) {
// Only Drill function is allowed.
return null;
}
final String funcName = ((DrillSimpleFuncHolder) funcHolder).getRegisteredNames()[0];
if (CastFunctions.isCastFunction(funcName)) {
Statistics stat = holderExpr.args.get(0).accept(this, null);
if (stat != null && !stat.isEmpty()) {
return evalCastFunc(holderExpr, stat);
}
}
return null;
}
use of org.apache.parquet.column.statistics.Statistics in project parquet-mr by apache.
the class ParquetMetadataCommand method printColumnChunk.
private void printColumnChunk(Logger console, int width, ColumnChunkMetaData column, MessageType schema) {
String[] path = column.getPath().toArray();
PrimitiveType type = primitive(schema, path);
Preconditions.checkNotNull(type);
ColumnDescriptor desc = schema.getColumnDescription(path);
long size = column.getTotalSize();
long count = column.getValueCount();
float perValue = ((float) size) / count;
CompressionCodecName codec = column.getCodec();
Set<Encoding> encodings = column.getEncodings();
EncodingStats encodingStats = column.getEncodingStats();
String encodingSummary = encodingStats == null ? encodingsAsString(encodings, desc) : encodingStatsAsString(encodingStats);
Statistics stats = column.getStatistics();
String name = column.getPath().toDotString();
PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName();
if (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
console.info(String.format("%-" + width + "s FIXED[%d] %s %-7s %-9d %-8s %-7s %s", name, type.getTypeLength(), shortCodec(codec), encodingSummary, count, humanReadable(perValue), stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()), minMaxAsString(stats, type.getOriginalType())));
} else {
console.info(String.format("%-" + width + "s %-9s %s %-7s %-9d %-10s %-7s %s", name, typeName, shortCodec(codec), encodingSummary, count, humanReadable(perValue), stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()), minMaxAsString(stats, type.getOriginalType())));
}
}
use of org.apache.parquet.column.statistics.Statistics in project parquet-mr by apache.
the class TestParquetMetadataConverter method testIgnoreStatsWithSignedSortOrder.
@Test
public void testIgnoreStatsWithSignedSortOrder() {
ParquetMetadataConverter converter = new ParquetMetadataConverter();
BinaryStatistics stats = new BinaryStatistics();
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("z"));
stats.incrementNumNulls();
PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
Statistics convertedStats = converter.fromParquetStatistics(Version.FULL_VERSION, StatsHelper.V1.toParquetStatistics(stats), binaryType);
Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty());
}
Aggregations