Search in sources :

Example 6 with ColumnEncoding

use of com.facebook.presto.orc.metadata.ColumnEncoding in project presto by prestodb.

the class SliceDictionaryColumnWriter method writeDictionary.

@Override
protected Optional<int[]> writeDictionary() {
    ColumnEncodingKind encodingKind = orcEncoding == DWRF ? DICTIONARY : DICTIONARY_V2;
    int dictionaryEntryCount = dictionary.getEntryCount();
    columnEncoding = new ColumnEncoding(encodingKind, dictionaryEntryCount);
    if (sortDictionaryKeys) {
        return writeSortedDictionary();
    } else {
        for (int i = 0; i < dictionaryEntryCount; i++) {
            writeDictionaryEntry(i);
        }
        return Optional.empty();
    }
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) ColumnEncodingKind(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind)

Example 7 with ColumnEncoding

use of com.facebook.presto.orc.metadata.ColumnEncoding in project presto by prestodb.

the class OrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeSystemMemoryContext.close();
    currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
    if (stripe != null) {
        // Give readers access to dictionary streams
        StreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        List<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
        for (StreamReader column : streamReaders) {
            if (column != null) {
                column.startStripe(dictionaryStreamSources, columnEncodings);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StreamReader(com.facebook.presto.orc.reader.StreamReader) StreamSources(com.facebook.presto.orc.stream.StreamSources) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 8 with ColumnEncoding

use of com.facebook.presto.orc.metadata.ColumnEncoding in project presto by prestodb.

the class StripeReader method readStripe.

public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext systemMemoryUsage) throws IOException {
    // read the stripe footer
    StripeFooter stripeFooter = readStripeFooter(stripe, systemMemoryUsage);
    List<ColumnEncoding> columnEncodings = stripeFooter.getColumnEncodings();
    // get streams for selected columns
    Map<StreamId, Stream> streams = new HashMap<>();
    boolean hasRowGroupDictionary = false;
    for (Stream stream : stripeFooter.getStreams()) {
        if (includedOrcColumns.contains(stream.getColumn())) {
            streams.put(new StreamId(stream), stream);
            ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()).getColumnEncodingKind();
            if (columnEncoding == DICTIONARY && stream.getStreamKind() == StreamKind.IN_DICTIONARY) {
                hasRowGroupDictionary = true;
            }
        }
    }
    // handle stripes with more than one row group or a dictionary
    if ((stripe.getNumberOfRows() > rowsInRowGroup) || hasRowGroupDictionary) {
        // determine ranges of the stripe to read
        Map<StreamId, DiskRange> diskRanges = getDiskRanges(stripeFooter.getStreams());
        diskRanges = Maps.filterKeys(diskRanges, Predicates.in(streams.keySet()));
        // read the file regions
        Map<StreamId, OrcInputStream> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, systemMemoryUsage);
        // read the bloom filter for each column
        Map<Integer, List<HiveBloomFilter>> bloomFilterIndexes = readBloomFilterIndexes(streams, streamsData);
        // read the row index for each column
        Map<Integer, List<RowGroupIndex>> columnIndexes = readColumnIndexes(streams, streamsData, bloomFilterIndexes);
        // select the row groups matching the tuple domain
        Set<Integer> selectedRowGroups = selectRowGroups(stripe, columnIndexes);
        // if all row groups are skipped, return null
        if (selectedRowGroups.isEmpty()) {
            // set accounted memory usage to zero
            systemMemoryUsage.close();
            return null;
        }
        // value streams
        Map<StreamId, ValueStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
        // build the dictionary streams
        StreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
        // build the row groups
        try {
            List<RowGroup> rowGroups = createRowGroups(stripe.getNumberOfRows(), streams, valueStreams, columnIndexes, selectedRowGroups, columnEncodings);
            return new Stripe(stripe.getNumberOfRows(), columnEncodings, rowGroups, dictionaryStreamSources);
        } catch (InvalidCheckpointException e) {
            // we must fail because the length of the row group dictionary is contained in the checkpoint stream.
            if (hasRowGroupDictionary) {
                throw new OrcCorruptionException(e, "ORC file %s has corrupt checkpoints", orcDataSource);
            }
        }
    }
    // stripe only has one row group and no dictionary
    ImmutableMap.Builder<StreamId, DiskRange> diskRangesBuilder = ImmutableMap.builder();
    for (Entry<StreamId, DiskRange> entry : getDiskRanges(stripeFooter.getStreams()).entrySet()) {
        StreamId streamId = entry.getKey();
        if (streamId.getStreamKind() != ROW_INDEX && streams.keySet().contains(streamId)) {
            diskRangesBuilder.put(entry);
        }
    }
    ImmutableMap<StreamId, DiskRange> diskRanges = diskRangesBuilder.build();
    // read the file regions
    Map<StreamId, OrcInputStream> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, systemMemoryUsage);
    // value streams
    Map<StreamId, ValueStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
    // build the dictionary streams
    StreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
    // build the row group
    ImmutableMap.Builder<StreamId, StreamSource<?>> builder = ImmutableMap.builder();
    for (Entry<StreamId, ValueStream<?>> entry : valueStreams.entrySet()) {
        builder.put(entry.getKey(), new ValueStreamSource<>(entry.getValue()));
    }
    RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), new StreamSources(builder.build()));
    return new Stripe(stripe.getNumberOfRows(), columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources);
}
Also used : HashMap(java.util.HashMap) InvalidCheckpointException(com.facebook.presto.orc.checkpoint.InvalidCheckpointException) ValueStream(com.facebook.presto.orc.stream.ValueStream) ValueStream(com.facebook.presto.orc.stream.ValueStream) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) Stream(com.facebook.presto.orc.metadata.Stream) InputStream(java.io.InputStream) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ColumnEncodingKind(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) StreamSource(com.facebook.presto.orc.stream.StreamSource) CheckpointStreamSource.createCheckpointStreamSource(com.facebook.presto.orc.stream.CheckpointStreamSource.createCheckpointStreamSource) ValueStreamSource(com.facebook.presto.orc.stream.ValueStreamSource) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) StreamSources(com.facebook.presto.orc.stream.StreamSources)

Example 9 with ColumnEncoding

use of com.facebook.presto.orc.metadata.ColumnEncoding in project presto by prestodb.

the class TestDictionaryColumnWriter method verifyDwrfDirectEncoding.

private void verifyDwrfDirectEncoding(int stripeCount, List<StripeFooter> stripeFooters) {
    assertEquals(stripeFooters.size(), stripeCount);
    for (StripeFooter footer : stripeFooters) {
        ColumnEncoding encoding = footer.getColumnEncodings().get(COLUMN_ID);
        assertEquals(encoding.getColumnEncodingKind(), DWRF_DIRECT);
    }
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter)

Example 10 with ColumnEncoding

use of com.facebook.presto.orc.metadata.ColumnEncoding in project presto by prestodb.

the class OrcWriter method createEncryptedGroups.

private List<Slice> createEncryptedGroups(Multimap<Integer, Stream> encryptedStreams, Map<Integer, ColumnEncoding> encryptedColumnEncodings) throws IOException {
    ImmutableList.Builder<Slice> encryptedGroups = ImmutableList.builder();
    for (int i = 0; i < encryptedStreams.keySet().size(); i++) {
        int groupId = i;
        Map<Integer, ColumnEncoding> groupColumnEncodings = encryptedColumnEncodings.entrySet().stream().filter(entry -> dwrfEncryptionInfo.getGroupByNodeId(entry.getKey()).orElseThrow(() -> new VerifyError("missing group for encryptedColumn")) == groupId).collect(toImmutableMap(Entry::getKey, Entry::getValue));
        DwrfDataEncryptor dwrfDataEncryptor = dwrfEncryptionInfo.getEncryptorByGroupId(i);
        OrcOutputBuffer buffer = new OrcOutputBuffer(columnWriterOptions, Optional.of(dwrfDataEncryptor));
        toStripeEncryptionGroup(new StripeEncryptionGroup(ImmutableList.copyOf(encryptedStreams.get(i)), groupColumnEncodings)).writeTo(buffer);
        buffer.close();
        DynamicSliceOutput output = new DynamicSliceOutput(toIntExact(buffer.getOutputDataSize()));
        buffer.writeDataTo(output);
        encryptedGroups.add(output.slice());
    }
    return encryptedGroups.build();
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) DwrfMetadataWriter.toFileStatistics(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toFileStatistics) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) StreamLayout(com.facebook.presto.orc.writer.StreamLayout) ColumnWriter(com.facebook.presto.orc.writer.ColumnWriter) DwrfEncryption(com.facebook.presto.orc.metadata.DwrfEncryption) DataSink(com.facebook.presto.common.io.DataSink) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DictionaryColumnWriter(com.facebook.presto.orc.writer.DictionaryColumnWriter) DIRECT(com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) DwrfStripeCacheWriter(com.facebook.presto.orc.metadata.DwrfStripeCacheWriter) Slices(io.airlift.slice.Slices) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) CompressedMetadataWriter(com.facebook.presto.orc.metadata.CompressedMetadataWriter) Footer(com.facebook.presto.orc.metadata.Footer) UNENCRYPTED(com.facebook.presto.orc.DwrfEncryptionInfo.UNENCRYPTED) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Collectors(java.util.stream.Collectors) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) DataSize(io.airlift.units.DataSize) List(java.util.List) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) Metadata(com.facebook.presto.orc.metadata.Metadata) IntStream(java.util.stream.IntStream) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) HashMap(java.util.HashMap) CLOSED(com.facebook.presto.orc.FlushReason.CLOSED) Multimap(com.google.common.collect.Multimap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) MAGIC(com.facebook.presto.orc.metadata.PostScript.MAGIC) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) DataOutput.createDataOutput(com.facebook.presto.common.io.DataOutput.createDataOutput) LastUsedCompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool.LastUsedCompressionBufferPool) OrcType(com.facebook.presto.orc.metadata.OrcType) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) Math.toIntExact(java.lang.Math.toIntExact) Type(com.facebook.presto.common.type.Type) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) ColumnWriters.createColumnWriter(com.facebook.presto.orc.writer.ColumnWriters.createColumnWriter) StripeStatistics(com.facebook.presto.orc.metadata.statistics.StripeStatistics) OrcReader.validateFile(com.facebook.presto.orc.OrcReader.validateFile) OrcWriteValidationBuilder(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationBuilder) IOException(java.io.IOException) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Stream(com.facebook.presto.orc.metadata.Stream) Consumer(java.util.function.Consumer) EncryptionGroup(com.facebook.presto.orc.metadata.EncryptionGroup) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) CompressionBufferPool(com.facebook.presto.orc.writer.CompressionBufferPool) Closeable(java.io.Closeable) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DataOutput(com.facebook.presto.common.io.DataOutput) DwrfEncryptionInfo.createNodeToGroupMap(com.facebook.presto.orc.DwrfEncryptionInfo.createNodeToGroupMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) StripeEncryptionGroup(com.facebook.presto.orc.metadata.StripeEncryptionGroup) DwrfMetadataWriter.toStripeEncryptionGroup(com.facebook.presto.orc.metadata.DwrfMetadataWriter.toStripeEncryptionGroup)

Aggregations

ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)13 StripeFooter (com.facebook.presto.orc.metadata.StripeFooter)6 Stream (com.facebook.presto.orc.metadata.Stream)5 ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 HashMap (java.util.HashMap)5 List (java.util.List)5 StripeEncryptionGroup (com.facebook.presto.orc.metadata.StripeEncryptionGroup)4 StripeInformation (com.facebook.presto.orc.metadata.StripeInformation)4 ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)4 Slice (io.airlift.slice.Slice)4 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 InvalidCheckpointException (com.facebook.presto.orc.checkpoint.InvalidCheckpointException)3 ColumnEncodingKind (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind)3 OrcType (com.facebook.presto.orc.metadata.OrcType)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 Preconditions.checkState (com.google.common.base.Preconditions.checkState)3 ImmutableSet (com.google.common.collect.ImmutableSet)3