Search in sources :

Example 1 with OrcInputStream

use of io.trino.orc.stream.OrcInputStream in project trino by trinodb.

the class StripeReader method readStripe.

public Stripe readStripe(StripeInformation stripe, AggregatedMemoryContext memoryUsage) throws IOException {
    // read the stripe footer
    StripeFooter stripeFooter = readStripeFooter(stripe, memoryUsage);
    ColumnMetadata<ColumnEncoding> columnEncodings = stripeFooter.getColumnEncodings();
    if (writeValidation.isPresent()) {
        writeValidation.get().validateTimeZone(orcDataSource.getId(), stripeFooter.getTimeZone());
    }
    ZoneId fileTimeZone = stripeFooter.getTimeZone();
    // get streams for selected columns
    Map<StreamId, Stream> streams = new HashMap<>();
    for (Stream stream : stripeFooter.getStreams()) {
        if (includedOrcColumnIds.contains(stream.getColumnId()) && isSupportedStreamType(stream, types.get(stream.getColumnId()).getOrcTypeKind())) {
            streams.put(new StreamId(stream), stream);
        }
    }
    // handle stripes with more than one row group
    boolean invalidCheckPoint = false;
    if (rowsInRowGroup.isPresent() && stripe.getNumberOfRows() > rowsInRowGroup.getAsInt()) {
        // determine ranges of the stripe to read
        Map<StreamId, DiskRange> diskRanges = getDiskRanges(stripeFooter.getStreams());
        diskRanges = Maps.filterKeys(diskRanges, Predicates.in(streams.keySet()));
        // read the file regions
        Map<StreamId, OrcChunkLoader> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, memoryUsage);
        // read the bloom filter for each column
        Map<OrcColumnId, List<BloomFilter>> bloomFilterIndexes = readBloomFilterIndexes(streams, streamsData);
        // read the row index for each column
        Map<StreamId, List<RowGroupIndex>> columnIndexes = readColumnIndexes(streams, streamsData, bloomFilterIndexes);
        if (writeValidation.isPresent()) {
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), stripe.getOffset(), columnIndexes);
        }
        // select the row groups matching the tuple domain
        Set<Integer> selectedRowGroups = selectRowGroups(stripe, columnIndexes);
        // if all row groups are skipped, return null
        if (selectedRowGroups.isEmpty()) {
            // set accounted memory usage to zero
            memoryUsage.close();
            return null;
        }
        // value streams
        Map<StreamId, ValueInputStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
        // build the dictionary streams
        InputStreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
        // build the row groups
        try {
            List<RowGroup> rowGroups = createRowGroups(stripe.getNumberOfRows(), streams, valueStreams, columnIndexes, selectedRowGroups, columnEncodings);
            return new Stripe(stripe.getNumberOfRows(), fileTimeZone, columnEncodings, rowGroups, dictionaryStreamSources);
        } catch (InvalidCheckpointException e) {
            // The ORC file contains a corrupt checkpoint stream treat the stripe as a single row group.
            invalidCheckPoint = true;
        }
    }
    // stripe only has one row group
    ImmutableMap.Builder<StreamId, DiskRange> diskRangesBuilder = ImmutableMap.builder();
    for (Entry<StreamId, DiskRange> entry : getDiskRanges(stripeFooter.getStreams()).entrySet()) {
        StreamId streamId = entry.getKey();
        if (streams.containsKey(streamId)) {
            diskRangesBuilder.put(entry);
        }
    }
    ImmutableMap<StreamId, DiskRange> diskRanges = diskRangesBuilder.buildOrThrow();
    // read the file regions
    Map<StreamId, OrcChunkLoader> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, memoryUsage);
    long minAverageRowBytes = 0;
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        if (entry.getKey().getStreamKind() == ROW_INDEX) {
            List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, new OrcInputStream(streamsData.get(entry.getKey())));
            checkState(rowGroupIndexes.size() == 1 || invalidCheckPoint, "expect a single row group or an invalid check point");
            long totalBytes = 0;
            long totalRows = 0;
            for (RowGroupIndex rowGroupIndex : rowGroupIndexes) {
                ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics();
                if (columnStatistics.hasMinAverageValueSizeInBytes()) {
                    totalBytes += columnStatistics.getMinAverageValueSizeInBytes() * columnStatistics.getNumberOfValues();
                    totalRows += columnStatistics.getNumberOfValues();
                }
            }
            if (totalRows > 0) {
                minAverageRowBytes += totalBytes / totalRows;
            }
        }
    }
    // value streams
    Map<StreamId, ValueInputStream<?>> valueStreams = createValueStreams(streams, streamsData, columnEncodings);
    // build the dictionary streams
    InputStreamSources dictionaryStreamSources = createDictionaryStreamSources(streams, valueStreams, columnEncodings);
    // build the row group
    ImmutableMap.Builder<StreamId, InputStreamSource<?>> builder = ImmutableMap.builder();
    for (Entry<StreamId, ValueInputStream<?>> entry : valueStreams.entrySet()) {
        builder.put(entry.getKey(), new ValueInputStreamSource<>(entry.getValue()));
    }
    RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), minAverageRowBytes, new InputStreamSources(builder.buildOrThrow()));
    return new Stripe(stripe.getNumberOfRows(), fileTimeZone, columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources);
}
Also used : ValueInputStream(io.trino.orc.stream.ValueInputStream) OrcColumnId(io.trino.orc.metadata.OrcColumnId) HashMap(java.util.HashMap) InvalidCheckpointException(io.trino.orc.checkpoint.InvalidCheckpointException) ValueInputStreamSource(io.trino.orc.stream.ValueInputStreamSource) InputStreamSource(io.trino.orc.stream.InputStreamSource) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcInputStream(io.trino.orc.stream.OrcInputStream) ZoneId(java.time.ZoneId) OrcChunkLoader(io.trino.orc.stream.OrcChunkLoader) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnEncoding(io.trino.orc.metadata.ColumnEncoding) InputStreamSources(io.trino.orc.stream.InputStreamSources) StripeFooter(io.trino.orc.metadata.StripeFooter) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex)

Example 2 with OrcInputStream

use of io.trino.orc.stream.OrcInputStream in project trino by trinodb.

the class StripeReader method readBloomFilterIndexes.

private Map<OrcColumnId, List<BloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData) throws IOException {
    HashMap<OrcColumnId, List<BloomFilter>> bloomFilters = new HashMap<>();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == BLOOM_FILTER_UTF8) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            bloomFilters.put(stream.getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
        }
    }
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == BLOOM_FILTER && !bloomFilters.containsKey(stream.getColumnId())) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            bloomFilters.put(entry.getKey().getColumnId(), metadataReader.readBloomFilterIndexes(inputStream));
        }
    }
    return ImmutableMap.copyOf(bloomFilters);
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) OrcInputStream(io.trino.orc.stream.OrcInputStream) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream)

Example 3 with OrcInputStream

use of io.trino.orc.stream.OrcInputStream in project trino by trinodb.

the class StripeReader method readStripeFooter.

private StripeFooter readStripeFooter(StripeInformation stripe, AggregatedMemoryContext memoryUsage) throws IOException {
    long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
    int tailLength = toIntExact(stripe.getFooterLength());
    // read the footer
    Slice tailBuffer = orcDataSource.readFully(offset, tailLength);
    try (InputStream inputStream = new OrcInputStream(OrcChunkLoader.create(orcDataSource.getId(), tailBuffer, decompressor, memoryUsage))) {
        return metadataReader.readStripeFooter(types, inputStream, legacyFileTimeZone);
    }
}
Also used : OrcInputStream(io.trino.orc.stream.OrcInputStream) Slice(io.airlift.slice.Slice) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream) StreamCheckpoint(io.trino.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)

Example 4 with OrcInputStream

use of io.trino.orc.stream.OrcInputStream in project trino by trinodb.

the class TestOrcWriter method testWriteOutputStreamsInOrder.

@Test
public void testWriteOutputStreamsInOrder() throws IOException {
    for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
        TempFile tempFile = new TempFile();
        List<String> columnNames = ImmutableList.of("test1", "test2", "test3", "test4", "test5");
        List<Type> types = ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR);
        OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), types, OrcType.createRootOrcType(columnNames, types), NONE, new OrcWriterOptions().withStripeMinSize(DataSize.of(0, MEGABYTE)).withStripeMaxSize(DataSize.of(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(DataSize.of(32, MEGABYTE)).withBloomFilterColumns(ImmutableSet.copyOf(columnNames)), ImmutableMap.of(), true, validationMode, new OrcWriterStats());
        // write down some data with unsorted streams
        String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
        Block[] blocks = new Block[data.length];
        int entries = 65536;
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
        for (int i = 0; i < data.length; i++) {
            byte[] bytes = data[i].getBytes(UTF_8);
            for (int j = 0; j < entries; j++) {
                // force to write different data
                bytes[0] = (byte) ((bytes[0] + 1) % 128);
                blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
                blockBuilder.closeEntry();
            }
            blocks[i] = blockBuilder.build();
            blockBuilder = blockBuilder.newBlockBuilderLike(null);
        }
        writer.write(new Page(blocks));
        writer.close();
        // read the footer and verify the streams are ordered by size
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), READER_OPTIONS);
        Footer footer = OrcReader.createOrcReader(orcDataSource, READER_OPTIONS).orElseThrow(() -> new RuntimeException("File is empty")).getFooter();
        // OrcReader closes the original data source because it buffers the full file, so we need to reopen
        orcDataSource = new FileOrcDataSource(tempFile.getFile(), READER_OPTIONS);
        for (StripeInformation stripe : footer.getStripes()) {
            // read the footer
            Slice tailBuffer = orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), toIntExact(stripe.getFooterLength()));
            try (InputStream inputStream = new OrcInputStream(OrcChunkLoader.create(orcDataSource.getId(), tailBuffer, Optional.empty(), newSimpleAggregatedMemoryContext()))) {
                StripeFooter stripeFooter = new OrcMetadataReader().readStripeFooter(footer.getTypes(), inputStream, ZoneId.of("UTC"));
                int size = 0;
                boolean dataStreamStarted = false;
                for (Stream stream : stripeFooter.getStreams()) {
                    if (isIndexStream(stream)) {
                        assertFalse(dataStreamStarted);
                        continue;
                    }
                    dataStreamStarted = true;
                    // verify sizes in order
                    assertGreaterThanOrEqual(stream.getLength(), size);
                    size = stream.getLength();
                }
            }
        }
    }
}
Also used : Page(io.trino.spi.Page) OrcWriteValidationMode(io.trino.orc.OrcWriteValidation.OrcWriteValidationMode) StripeReader.isIndexStream(io.trino.orc.StripeReader.isIndexStream) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) FileOutputStream(java.io.FileOutputStream) InputStream(java.io.InputStream) BlockBuilder(io.trino.spi.block.BlockBuilder) OrcInputStream(io.trino.orc.stream.OrcInputStream) OrcInputStream(io.trino.orc.stream.OrcInputStream) InputStream(java.io.InputStream) OrcMetadataReader(io.trino.orc.metadata.OrcMetadataReader) Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) StripeFooter(io.trino.orc.metadata.StripeFooter) Slice(io.airlift.slice.Slice) FileOutputStream(java.io.FileOutputStream) StripeFooter(io.trino.orc.metadata.StripeFooter) Footer(io.trino.orc.metadata.Footer) Block(io.trino.spi.block.Block) StripeInformation(io.trino.orc.metadata.StripeInformation) Test(org.testng.annotations.Test)

Example 5 with OrcInputStream

use of io.trino.orc.stream.OrcInputStream in project trino by trinodb.

the class StripeReader method readColumnIndexes.

private Map<StreamId, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData, Map<OrcColumnId, List<BloomFilter>> bloomFilterIndexes) throws IOException {
    ImmutableMap.Builder<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == ROW_INDEX) {
            OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
            List<BloomFilter> bloomFilters = bloomFilterIndexes.get(entry.getKey().getColumnId());
            List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
            if (bloomFilters != null && !bloomFilters.isEmpty()) {
                ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder();
                for (int i = 0; i < rowGroupIndexes.size(); i++) {
                    RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i);
                    ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics().withBloomFilter(bloomFilters.get(i));
                    newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics));
                }
                rowGroupIndexes = newRowGroupIndexes.build();
            }
            columnIndexes.put(entry.getKey(), rowGroupIndexes);
        }
    }
    return columnIndexes.buildOrThrow();
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcInputStream(io.trino.orc.stream.OrcInputStream) ImmutableList(com.google.common.collect.ImmutableList) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BloomFilter(io.trino.orc.metadata.statistics.BloomFilter) StreamCheckpoint(io.trino.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Stream(io.trino.orc.metadata.Stream) OrcInputStream(io.trino.orc.stream.OrcInputStream) ValueInputStream(io.trino.orc.stream.ValueInputStream) InputStream(java.io.InputStream)

Aggregations

OrcInputStream (io.trino.orc.stream.OrcInputStream)5 InputStream (java.io.InputStream)5 Stream (io.trino.orc.metadata.Stream)4 ValueInputStream (io.trino.orc.stream.ValueInputStream)4 ImmutableList (com.google.common.collect.ImmutableList)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)2 Slice (io.airlift.slice.Slice)2 Checkpoints.getDictionaryStreamCheckpoint (io.trino.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)2 StreamCheckpoint (io.trino.orc.checkpoint.StreamCheckpoint)2 OrcColumnId (io.trino.orc.metadata.OrcColumnId)2 RowGroupIndex (io.trino.orc.metadata.RowGroupIndex)2 StripeFooter (io.trino.orc.metadata.StripeFooter)2 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)2 HashMap (java.util.HashMap)2 OrcWriteValidationMode (io.trino.orc.OrcWriteValidation.OrcWriteValidationMode)1 StripeReader.isIndexStream (io.trino.orc.StripeReader.isIndexStream)1 InvalidCheckpointException (io.trino.orc.checkpoint.InvalidCheckpointException)1