Search in sources :

Example 1 with OrcWriteValidationMode

use of io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode in project hetu-core by openlookeng.

the class TestOrcWriter method testWriteOutputStreamsInOrder.

@Test
public void testWriteOutputStreamsInOrder() throws IOException {
    for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
        TempFile tempFile = new TempFile();
        OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), NONE, new OrcWriterOptions().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), false, ImmutableMap.of(), true, validationMode, new OrcWriterStats(), Optional.empty(), Optional.empty());
        // write down some data with unsorted streams
        String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
        Block[] blocks = new Block[data.length];
        int entries = 65536;
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
        for (int i = 0; i < data.length; i++) {
            byte[] bytes = data[i].getBytes();
            for (int j = 0; j < entries; j++) {
                // force to write different data
                bytes[0] = (byte) ((bytes[0] + 1) % 128);
                blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
                blockBuilder.closeEntry();
            }
            blocks[i] = blockBuilder.build();
            blockBuilder = blockBuilder.newBlockBuilderLike(null);
        }
        writer.write(new Page(blocks));
        writer.close();
        // read the footer and verify the streams are ordered by size
        DataSize dataSize = new DataSize(1, MEGABYTE);
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), dataSize, dataSize, dataSize, true, tempFile.getFile().lastModified());
        Footer footer = new OrcReader(orcDataSource, dataSize, dataSize, dataSize).getFooter();
        for (StripeInformation stripe : footer.getStripes()) {
            // read the footer
            Slice tailBuffer = orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), toIntExact(stripe.getFooterLength()));
            try (InputStream inputStream = new OrcInputStream(OrcChunkLoader.create(orcDataSource.getId(), tailBuffer, Optional.empty(), newSimpleAggregatedMemoryContext()))) {
                StripeFooter stripeFooter = new OrcMetadataReader().readStripeFooter(footer.getTypes(), inputStream, ZoneId.of("UTC"));
                int size = 0;
                boolean dataStreamStarted = false;
                for (Stream stream : stripeFooter.getStreams()) {
                    if (isIndexStream(stream)) {
                        assertFalse(dataStreamStarted);
                        continue;
                    }
                    dataStreamStarted = true;
                    // verify sizes in order
                    assertGreaterThanOrEqual(stream.getLength(), size);
                    size = stream.getLength();
                }
            }
        }
    }
}
Also used : Page(io.prestosql.spi.Page) DataSize(io.airlift.units.DataSize) OrcWriteValidationMode(io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode) StripeReader.isIndexStream(io.prestosql.orc.StripeReader.isIndexStream) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) FileOutputStream(java.io.FileOutputStream) Stream(io.prestosql.orc.metadata.Stream) InputStream(java.io.InputStream) BlockBuilder(io.prestosql.spi.block.BlockBuilder) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) InputStream(java.io.InputStream) OrcMetadataReader(io.prestosql.orc.metadata.OrcMetadataReader) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Slice(io.airlift.slice.Slice) FileOutputStream(java.io.FileOutputStream) Footer(io.prestosql.orc.metadata.Footer) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Block(io.prestosql.spi.block.Block) StripeInformation(io.prestosql.orc.metadata.StripeInformation) Test(org.testng.annotations.Test)

Aggregations

Slice (io.airlift.slice.Slice)1 DataSize (io.airlift.units.DataSize)1 OrcWriteValidationMode (io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode)1 StripeReader.isIndexStream (io.prestosql.orc.StripeReader.isIndexStream)1 Footer (io.prestosql.orc.metadata.Footer)1 OrcMetadataReader (io.prestosql.orc.metadata.OrcMetadataReader)1 Stream (io.prestosql.orc.metadata.Stream)1 StripeFooter (io.prestosql.orc.metadata.StripeFooter)1 StripeInformation (io.prestosql.orc.metadata.StripeInformation)1 OrcInputStream (io.prestosql.orc.stream.OrcInputStream)1 Page (io.prestosql.spi.Page)1 Block (io.prestosql.spi.block.Block)1 BlockBuilder (io.prestosql.spi.block.BlockBuilder)1 FileOutputStream (java.io.FileOutputStream)1 InputStream (java.io.InputStream)1 Test (org.testng.annotations.Test)1