Search in sources :

Example 81 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class AbstractOrcRecordReader method getStatisticsByColumnOrdinal.

private static Map<Integer, ColumnStatistics> getStatisticsByColumnOrdinal(OrcType rootStructType, List<ColumnStatistics> fileStats) {
    requireNonNull(rootStructType, "rootStructType is null");
    checkArgument(rootStructType.getOrcTypeKind() == STRUCT);
    requireNonNull(fileStats, "fileStats is null");
    ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) {
        if (fileStats.size() > ordinal) {
            ColumnStatistics element = fileStats.get(rootStructType.getFieldTypeIndex(ordinal));
            if (element != null) {
                statistics.put(ordinal, element);
            }
        }
    }
    return statistics.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 82 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class ColumnWriterUtils method buildRowGroupIndexes.

/**
 * Build RowGroupIndex using column statistics and checkpoints.
 */
@SafeVarargs
public static List<RowGroupIndex> buildRowGroupIndexes(boolean compressed, List<ColumnStatistics> rowGroupColumnStatistics, Optional<List<? extends StreamCheckpoint>> prependCheckpoints, PresentOutputStream presentStream, ValueOutputStream<? extends StreamCheckpoint>... dataStreams) {
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    List<List<? extends StreamCheckpoint>> dataCheckpoints = Arrays.stream(dataStreams).map(ValueOutputStream::getCheckpoints).collect(Collectors.toList());
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        Optional<StreamCheckpoint> prependCheckpoint = prependCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        Optional<StreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        // prepend and present checkpoints always come first
        ImmutableList.Builder<Integer> positions = ImmutableList.builder();
        prependCheckpoint.ifPresent(checkpoint -> positions.addAll(checkpoint.toPositionList(compressed)));
        presentCheckpoint.ifPresent(checkpoint -> positions.addAll(checkpoint.toPositionList(compressed)));
        // add data checkpoints
        for (List<? extends StreamCheckpoint> dataCheckpoint : dataCheckpoints) {
            StreamCheckpoint streamCheckpoint = dataCheckpoint.get(groupId);
            positions.addAll(streamCheckpoint.toPositionList(compressed));
        }
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        rowGroupIndexes.add(new RowGroupIndex(positions.build(), columnStatistics));
    }
    return rowGroupIndexes.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)

Example 83 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class DecimalColumnWriter method finishRowGroup.

@Override
public Map<Integer, ColumnStatistics> finishRowGroup() {
    checkState(!closed);
    ColumnStatistics statistics;
    if (type.isShort()) {
        statistics = shortDecimalStatisticsBuilder.buildColumnStatistics();
        shortDecimalStatisticsBuilder = new ShortDecimalStatisticsBuilder(type.getScale());
    } else {
        statistics = longDecimalStatisticsBuilder.buildColumnStatistics();
        longDecimalStatisticsBuilder = new LongDecimalStatisticsBuilder();
    }
    rowGroupColumnStatistics.add(statistics);
    columnStatisticsRetainedSizeInBytes += statistics.getRetainedSizeInBytes();
    return ImmutableMap.of(column, statistics);
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) LongDecimalStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.LongDecimalStatisticsBuilder) ShortDecimalStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.ShortDecimalStatisticsBuilder)

Example 84 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class DictionaryColumnWriter method finishRowGroup.

@Override
public Map<Integer, ColumnStatistics> finishRowGroup() {
    checkState(!closed);
    checkState(inRowGroup);
    inRowGroup = false;
    if (directEncoded) {
        return getDirectColumnWriter().finishRowGroup();
    }
    ColumnStatistics statistics = createColumnStatistics();
    rowGroupBuilder.addIndexes(getDictionaryEntries() - 1, rowGroupIndexes, rowGroupOffset);
    DictionaryRowGroup rowGroup = rowGroupBuilder.build(statistics);
    rowGroups.add(rowGroup);
    if (columnWriterOptions.isIgnoreDictionaryRowGroupSizes()) {
        rowGroupRetainedSizeInBytes += rowGroup.getColumnStatistics().getRetainedSizeInBytes();
    } else {
        rowGroupRetainedSizeInBytes += rowGroup.getShallowRetainedSizeInBytes();
        rowGroupRetainedSizeInBytes += rowGroupBuilder.getIndexRetainedBytes();
    }
    rowGroupOffset = 0;
    rowGroupBuilder.reset();
    return ImmutableMap.of(column, statistics);
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics)

Example 85 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project presto by prestodb.

the class FloatColumnWriter method finishRowGroup.

@Override
public Map<Integer, ColumnStatistics> finishRowGroup() {
    checkState(!closed);
    ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
    rowGroupColumnStatistics.add(statistics);
    columnStatisticsRetainedSizeInBytes += statistics.getRetainedSizeInBytes();
    statisticsBuilder = new DoubleStatisticsBuilder();
    return ImmutableMap.of(column, statistics);
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) DoubleStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.DoubleStatisticsBuilder)

Aggregations

ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)99 ImmutableList (com.google.common.collect.ImmutableList)46 Slice (io.airlift.slice.Slice)46 List (java.util.List)46 Stream (com.facebook.presto.orc.metadata.Stream)38 ArrayList (java.util.ArrayList)38 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)32 StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)32 BooleanStreamCheckpoint (com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)26 PresentOutputStream (com.facebook.presto.orc.stream.PresentOutputStream)26 ImmutableMap (com.google.common.collect.ImmutableMap)23 LongOutputStream (com.facebook.presto.orc.stream.LongOutputStream)16 OrcType (com.facebook.presto.orc.metadata.OrcType)15 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)14 Map (java.util.Map)14 Type (com.facebook.presto.common.type.Type)13 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)12 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 Optional (java.util.Optional)12