Search in sources :

Example 1 with Footer

use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.

the class TestOrcReaderPositions method testReadUserMetadata.

@Test
public void testReadUserMetadata() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
        createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true, tempFile.getFile().lastModified());
        OrcReader orcReader = new OrcReader(orcDataSource, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
        Footer footer = orcReader.getFooter();
        Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
        assertEquals(readMetadata, metadata);
    }
}
Also used : Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) Footer(io.prestosql.orc.metadata.Footer) Test(org.testng.annotations.Test)

Example 2 with Footer

use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.

the class OrcWriter method bufferFileFooter.

/**
 * Collect the data for for the file footer.  This is not the actual data, but
 * instead are functions that know how to write the data.
 */
private List<OrcDataOutput> bufferFileFooter() throws IOException {
    if (preCloseCallback.isPresent()) {
        try {
            preCloseCallback.get().call();
        } catch (Exception e) {
            log.debug("Call pre close call back error");
        }
    }
    List<OrcDataOutput> outputData = new ArrayList<>();
    Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).map(Optional::of).collect(toList()));
    Slice metadataSlice = metadataWriter.writeMetadata(metadata);
    outputData.add(createDataOutput(metadataSlice));
    long numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
    Optional<ColumnMetadata<ColumnStatistics>> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
    recordValidation(validation -> validation.setFileStatistics(fileStats));
    Map<String, Slice> localUserMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
    Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toImmutableList()), orcTypes, fileStats, localUserMetadata);
    closedStripes.clear();
    closedStripesRetainedBytes = 0;
    Slice footerSlice = metadataWriter.writeFooter(footer);
    outputData.add(createDataOutput(footerSlice));
    recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
    Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), compression, maxCompressionBufferSize);
    outputData.add(createDataOutput(postscriptSlice));
    outputData.add(createDataOutput(Slices.wrappedBuffer(UnsignedBytes.checkedCast(postscriptSlice.length()))));
    return outputData;
}
Also used : Footer(io.prestosql.orc.metadata.Footer) CLOSED(io.prestosql.orc.OrcWriterStats.FlushReason.CLOSED) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Map(java.util.Map) ColumnWriters.createColumnWriter(io.prestosql.orc.writer.ColumnWriters.createColumnWriter) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Type(io.prestosql.spi.type.Type) Metadata(io.prestosql.orc.metadata.Metadata) ImmutableSet(com.google.common.collect.ImmutableSet) OrcMetadataWriter(io.prestosql.orc.metadata.OrcMetadataWriter) OrcDataOutput(io.prestosql.orc.stream.OrcDataOutput) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) OrcWriteValidationMode(io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode) MAX_BYTES(io.prestosql.orc.OrcWriterStats.FlushReason.MAX_BYTES) StreamDataOutput(io.prestosql.orc.stream.StreamDataOutput) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) StripeInformation(io.prestosql.orc.metadata.StripeInformation) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) CompressionKind(io.prestosql.orc.metadata.CompressionKind) Entry(java.util.Map.Entry) Optional(java.util.Optional) FlushReason(io.prestosql.orc.OrcWriterStats.FlushReason) OrcWriteValidationBuilder(io.prestosql.orc.OrcWriteValidation.OrcWriteValidationBuilder) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) MAX_ROWS(io.prestosql.orc.OrcWriterStats.FlushReason.MAX_ROWS) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnWriter(io.prestosql.orc.writer.ColumnWriter) ROOT_COLUMN(io.prestosql.orc.metadata.OrcColumnId.ROOT_COLUMN) Math.toIntExact(java.lang.Math.toIntExact) Nullable(javax.annotation.Nullable) Integer.min(java.lang.Integer.min) OrcReader.validateFile(io.prestosql.orc.OrcReader.validateFile) UnsignedBytes(com.google.common.primitives.UnsignedBytes) DICTIONARY_FULL(io.prestosql.orc.OrcWriterStats.FlushReason.DICTIONARY_FULL) SliceDictionaryColumnWriter(io.prestosql.orc.writer.SliceDictionaryColumnWriter) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) OrcDataOutput.createDataOutput(io.prestosql.orc.stream.OrcDataOutput.createDataOutput) Page(io.prestosql.spi.Page) IOException(java.io.IOException) DIRECT(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Stream(io.prestosql.orc.metadata.Stream) Consumer(java.util.function.Consumer) Collectors.toList(java.util.stream.Collectors.toList) Closeable(java.io.Closeable) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) CompressedMetadataWriter(io.prestosql.orc.metadata.CompressedMetadataWriter) Collections(java.util.Collections) MAGIC(io.prestosql.orc.metadata.PostScript.MAGIC) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Optional(java.util.Optional) ArrayList(java.util.ArrayList) Metadata(io.prestosql.orc.metadata.Metadata) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) OrcDataOutput(io.prestosql.orc.stream.OrcDataOutput) IOException(java.io.IOException) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Footer(io.prestosql.orc.metadata.Footer) StripeFooter(io.prestosql.orc.metadata.StripeFooter)

Example 3 with Footer

use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.

the class TestOrcWriter method testWriteOutputStreamsInOrder.

@Test
public void testWriteOutputStreamsInOrder() throws IOException {
    for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
        TempFile tempFile = new TempFile();
        OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), NONE, new OrcWriterOptions().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), false, ImmutableMap.of(), true, validationMode, new OrcWriterStats(), Optional.empty(), Optional.empty());
        // write down some data with unsorted streams
        String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
        Block[] blocks = new Block[data.length];
        int entries = 65536;
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
        for (int i = 0; i < data.length; i++) {
            byte[] bytes = data[i].getBytes();
            for (int j = 0; j < entries; j++) {
                // force to write different data
                bytes[0] = (byte) ((bytes[0] + 1) % 128);
                blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
                blockBuilder.closeEntry();
            }
            blocks[i] = blockBuilder.build();
            blockBuilder = blockBuilder.newBlockBuilderLike(null);
        }
        writer.write(new Page(blocks));
        writer.close();
        // read the footer and verify the streams are ordered by size
        DataSize dataSize = new DataSize(1, MEGABYTE);
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), dataSize, dataSize, dataSize, true, tempFile.getFile().lastModified());
        Footer footer = new OrcReader(orcDataSource, dataSize, dataSize, dataSize).getFooter();
        for (StripeInformation stripe : footer.getStripes()) {
            // read the footer
            Slice tailBuffer = orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), toIntExact(stripe.getFooterLength()));
            try (InputStream inputStream = new OrcInputStream(OrcChunkLoader.create(orcDataSource.getId(), tailBuffer, Optional.empty(), newSimpleAggregatedMemoryContext()))) {
                StripeFooter stripeFooter = new OrcMetadataReader().readStripeFooter(footer.getTypes(), inputStream, ZoneId.of("UTC"));
                int size = 0;
                boolean dataStreamStarted = false;
                for (Stream stream : stripeFooter.getStreams()) {
                    if (isIndexStream(stream)) {
                        assertFalse(dataStreamStarted);
                        continue;
                    }
                    dataStreamStarted = true;
                    // verify sizes in order
                    assertGreaterThanOrEqual(stream.getLength(), size);
                    size = stream.getLength();
                }
            }
        }
    }
}
Also used : Page(io.prestosql.spi.Page) DataSize(io.airlift.units.DataSize) OrcWriteValidationMode(io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode) StripeReader.isIndexStream(io.prestosql.orc.StripeReader.isIndexStream) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) FileOutputStream(java.io.FileOutputStream) Stream(io.prestosql.orc.metadata.Stream) InputStream(java.io.InputStream) BlockBuilder(io.prestosql.spi.block.BlockBuilder) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) InputStream(java.io.InputStream) OrcMetadataReader(io.prestosql.orc.metadata.OrcMetadataReader) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Slice(io.airlift.slice.Slice) FileOutputStream(java.io.FileOutputStream) Footer(io.prestosql.orc.metadata.Footer) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Block(io.prestosql.spi.block.Block) StripeInformation(io.prestosql.orc.metadata.StripeInformation) Test(org.testng.annotations.Test)

Aggregations

Slice (io.airlift.slice.Slice)3 Footer (io.prestosql.orc.metadata.Footer)3 DataSize (io.airlift.units.DataSize)2 OrcWriteValidationMode (io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode)2 Stream (io.prestosql.orc.metadata.Stream)2 StripeFooter (io.prestosql.orc.metadata.StripeFooter)2 StripeInformation (io.prestosql.orc.metadata.StripeInformation)2 Page (io.prestosql.spi.Page)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Verify.verify (com.google.common.base.Verify.verify)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 UnsignedBytes (com.google.common.primitives.UnsignedBytes)1 Logger (io.airlift.log.Logger)1 Slices (io.airlift.slice.Slices)1 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)1 OrcReader.validateFile (io.prestosql.orc.OrcReader.validateFile)1 OrcWriteValidationBuilder (io.prestosql.orc.OrcWriteValidation.OrcWriteValidationBuilder)1