Search in sources :

Example 61 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.

the class OrcWriteValidation method validateColumnStatisticsEquivalent.

private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, List<ColumnStatistics> actualColumnStatistics, List<ColumnStatistics> expectedColumnStatistics) throws OrcCorruptionException {
    requireNonNull(name, "name is null");
    requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
    requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
    if (actualColumnStatistics.size() != expectedColumnStatistics.size()) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in %s statistics", name);
    }
    for (int i = 0; i < actualColumnStatistics.size(); i++) {
        ColumnStatistics actual = actualColumnStatistics.get(i);
        ColumnStatistics expected = expectedColumnStatistics.get(i);
        validateColumnStatisticsEquivalent(orcDataSourceId, name + " column " + i, actual, expected);
    }
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics)

Example 62 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.

the class OrcWriter method bufferFileFooter.

/**
 * Collect the data for the file footer.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<DataOutput> bufferFileFooter() throws IOException {
    List<DataOutput> outputData = new ArrayList<>();
    Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).collect(toList()));
    Slice metadataSlice = metadataWriter.writeMetadata(metadata);
    outputData.add(createDataOutput(metadataSlice));
    numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
    List<ColumnStatistics> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
    recordValidation(validation -> validation.setFileStatistics(fileStats));
    Map<String, Slice> userMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
    unencryptedStats = new ArrayList<>();
    Map<Integer, Map<Integer, Slice>> encryptedStats = new HashMap<>();
    addStatsRecursive(fileStats, 0, new HashMap<>(), unencryptedStats, encryptedStats);
    Optional<DwrfEncryption> dwrfEncryption;
    if (dwrfWriterEncryption.isPresent()) {
        ImmutableList.Builder<EncryptionGroup> encryptionGroupBuilder = ImmutableList.builder();
        List<WriterEncryptionGroup> writerEncryptionGroups = dwrfWriterEncryption.get().getWriterEncryptionGroups();
        for (int i = 0; i < writerEncryptionGroups.size(); i++) {
            WriterEncryptionGroup group = writerEncryptionGroups.get(i);
            Map<Integer, Slice> groupStats = encryptedStats.get(i);
            encryptionGroupBuilder.add(new EncryptionGroup(group.getNodes(), // reader will just use key metadata from the stripe
            Optional.empty(), group.getNodes().stream().map(groupStats::get).collect(toList())));
        }
        dwrfEncryption = Optional.of(new DwrfEncryption(dwrfWriterEncryption.get().getKeyProvider(), encryptionGroupBuilder.build()));
    } else {
        dwrfEncryption = Optional.empty();
    }
    Optional<DwrfStripeCacheData> dwrfStripeCacheData = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getDwrfStripeCacheData);
    Slice dwrfStripeCacheSlice = metadataWriter.writeDwrfStripeCache(dwrfStripeCacheData);
    outputData.add(createDataOutput(dwrfStripeCacheSlice));
    Optional<List<Integer>> dwrfStripeCacheOffsets = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getOffsets);
    Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, OptionalLong.of(rawSize), closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toList()), orcTypes, ImmutableList.copyOf(unencryptedStats), userMetadata, dwrfEncryption, dwrfStripeCacheOffsets);
    closedStripes.clear();
    closedStripesRetainedBytes = 0;
    Slice footerSlice = metadataWriter.writeFooter(footer);
    outputData.add(createDataOutput(footerSlice));
    recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
    Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), columnWriterOptions.getCompressionKind(), columnWriterOptions.getCompressionMaxBufferSize(), dwrfStripeCacheData);
    outputData.add(createDataOutput(postscriptSlice));
    outputData.add(createDataOutput(Slices.wrappedBuffer((byte) postscriptSlice.length())));
    return outputData;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DEFAULT_SEQUENCE_ID(com.facebook.presto.orc.metadata.ColumnEncoding.DEFAULT_SEQUENCE_ID) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) OrcType.mapColumnToNode(com.facebook.presto.orc.metadata.OrcType.mapColumnToNode) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) Collections(java.util.Collections) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) DataOutput(com.facebook.presto.common.io.DataOutput) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Metadata(com.facebook.presto.orc.metadata.Metadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Footer(com.facebook.presto.orc.metadata.Footer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption)

Example 63 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.

the class LongColumnWriter method finishRowGroup.

@Override
public Map<Integer, ColumnStatistics> finishRowGroup() {
    checkState(!closed);
    ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
    rowGroupColumnStatistics.add(statistics);
    columnStatisticsRetainedSizeInBytes += statistics.getRetainedSizeInBytes();
    statisticsBuilder = statisticsBuilderSupplier.get();
    return ImmutableMap.of(column, statistics);
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics)

Example 64 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.

the class LongDictionaryColumnWriter method createColumnStatistics.

@Override
protected ColumnStatistics createColumnStatistics() {
    ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
    statisticsBuilder = new IntegerStatisticsBuilder();
    return statistics;
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) IntegerStatisticsBuilder(com.facebook.presto.orc.metadata.statistics.IntegerStatisticsBuilder)

Example 65 with ColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.

the class MapFlatColumnWriter method getValueColumnStatistics.

private Map<Integer, ColumnStatistics> getValueColumnStatistics(Function<ColumnWriter, Map<Integer, ColumnStatistics>> getStats) {
    if (valueWriters.isEmpty()) {
        return getEmptyValueColumnStatistics();
    }
    ImmutableListMultimap.Builder<Integer, ColumnStatistics> allValueStats = ImmutableListMultimap.builder();
    for (MapFlatValueWriter valueWriter : valueWriters) {
        Map<Integer, ColumnStatistics> valueColumnStatistic = getStats.apply(valueWriter.getValueWriter());
        allValueStats.putAll(valueColumnStatistic.entrySet());
    }
    ImmutableMap.Builder<Integer, ColumnStatistics> columnStatistics = ImmutableMap.builder();
    allValueStats.build().asMap().forEach((nodeIndex, nodeStats) -> {
        ColumnStatistics mergedNodeStats = mergeColumnStatistics((List<ColumnStatistics>) nodeStats);
        columnStatistics.put(nodeIndex, mergedNodeStats);
    });
    return columnStatistics.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) ColumnStatistics.mergeColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics.mergeColumnStatistics) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)99 ImmutableList (com.google.common.collect.ImmutableList)46 Slice (io.airlift.slice.Slice)46 List (java.util.List)46 Stream (com.facebook.presto.orc.metadata.Stream)38 ArrayList (java.util.ArrayList)38 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)32 StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)32 BooleanStreamCheckpoint (com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)26 PresentOutputStream (com.facebook.presto.orc.stream.PresentOutputStream)26 ImmutableMap (com.google.common.collect.ImmutableMap)23 LongOutputStream (com.facebook.presto.orc.stream.LongOutputStream)16 OrcType (com.facebook.presto.orc.metadata.OrcType)15 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)14 Map (java.util.Map)14 Type (com.facebook.presto.common.type.Type)13 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)12 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 Optional (java.util.Optional)12