Search in sources :

Example 1 with OrcWriterOptions

use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.

the class TestWriterBlockRawSize method testFileMetadataRawSize.

@Test
public void testFileMetadataRawSize() throws IOException {
    Type type = INTEGER;
    List<Type> types = ImmutableList.of(type);
    int numBlocksPerRowGroup = 3;
    int numBlocksPerStripe = numBlocksPerRowGroup * 5;
    int numStripes = 4;
    int numBlocksPerFile = numBlocksPerStripe * numStripes + 1;
    BlockBuilder blockBuilder = type.createBlockBuilder(null, NUM_ELEMENTS * 2);
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        blockBuilder.appendNull();
        type.writeLong(blockBuilder, i);
    }
    long blockRawSize = ((FixedWidthType) type).getFixedSize() * NUM_ELEMENTS + NUM_ELEMENTS;
    Block block = blockBuilder.build();
    Block[] blocks = new Block[] { block };
    OrcWriterOptions writerOptions = OrcWriterOptions.builder().withRowGroupMaxRowCount(block.getPositionCount() * numBlocksPerRowGroup).withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMaxRowCount(block.getPositionCount() * numBlocksPerStripe).build()).build();
    for (OrcEncoding encoding : OrcEncoding.values()) {
        try (TempFile tempFile = new TempFile()) {
            OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, writerOptions, new OrcWriterStats());
            for (int i = 0; i < numBlocksPerFile; i++) {
                writer.write(new Page(blocks));
            }
            writer.close();
            writer.validate(new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true));
            Footer footer = OrcTester.getFileMetadata(tempFile.getFile(), encoding).getFooter();
            verifyValue(encoding, footer.getRawSize(), blockRawSize * numBlocksPerFile);
            assertEquals(footer.getStripes().size(), numStripes + 1);
            int numBlocksRemaining = numBlocksPerFile;
            for (StripeInformation stripeInfo : footer.getStripes()) {
                int numBlocksInStripe = Math.min(numBlocksRemaining, numBlocksPerStripe);
                verifyValue(encoding, stripeInfo.getRawDataSize(), blockRawSize * numBlocksInStripe);
                numBlocksRemaining -= numBlocksInStripe;
            }
        }
    }
}
Also used : OrcWriterStats(com.facebook.presto.orc.OrcWriterStats) OrcWriter(com.facebook.presto.orc.OrcWriter) OrcTester.createOrcWriter(com.facebook.presto.orc.OrcTester.createOrcWriter) Page(com.facebook.presto.common.Page) OrcEncoding(com.facebook.presto.orc.OrcEncoding) OrcWriterOptions(com.facebook.presto.orc.OrcWriterOptions) TestOrcMapNullKey.createMapType(com.facebook.presto.orc.TestOrcMapNullKey.createMapType) TimestampType(com.facebook.presto.common.type.TimestampType) ArrayType(com.facebook.presto.common.type.ArrayType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) FixedWidthType(com.facebook.presto.common.type.FixedWidthType) RowType(com.facebook.presto.common.type.RowType) TempFile(com.facebook.presto.orc.TempFile) FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) DataSize(io.airlift.units.DataSize) Footer(com.facebook.presto.orc.metadata.Footer) RowBlock(com.facebook.presto.common.block.RowBlock) Block(com.facebook.presto.common.block.Block) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) FixedWidthType(com.facebook.presto.common.type.FixedWidthType) Test(org.testng.annotations.Test)

Example 2 with OrcWriterOptions

use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.

the class TestOrcFileWriterConfig method testStreamLayoutOption.

@Test
public void testStreamLayoutOption() {
    OrcFileWriterConfig config = new OrcFileWriterConfig();
    config.setStreamLayoutType(BY_STREAM_SIZE);
    OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();
    assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);
    config.setStreamLayoutType(BY_COLUMN_SIZE);
    options = config.toOrcWriterOptionsBuilder().build();
    assertTrue(options.getStreamLayoutFactory() instanceof ColumnSizeLayoutFactory);
}
Also used : OrcWriterOptions(com.facebook.presto.orc.OrcWriterOptions) ColumnSizeLayoutFactory(com.facebook.presto.orc.writer.StreamLayoutFactory.ColumnSizeLayoutFactory) StreamSizeLayoutFactory(com.facebook.presto.orc.writer.StreamLayoutFactory.StreamSizeLayoutFactory) Test(org.testng.annotations.Test)

Example 3 with OrcWriterOptions

use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.

the class TestOrcFileWriterConfig method testOrcWriterOptionsBuilder.

@Test
public void testOrcWriterOptionsBuilder() {
    DataSize stripeMinSize = new DataSize(10, MEGABYTE);
    DataSize stripeMaxSize = new DataSize(50, MEGABYTE);
    int stripeMaxRowCount = 1_000_000;
    int rowGroupMaxRowCount = 15_000;
    DataSize dictionaryMaxMemory = new DataSize(20, MEGABYTE);
    DataSize stringStatisticsLimit = new DataSize(32, BYTE);
    DataSize maxCompressionBufferSize = new DataSize(512, KILOBYTE);
    StreamLayoutType streamLayoutType = BY_STREAM_SIZE;
    DataSize dwrfStripeCacheMaxSize = new DataSize(4, MEGABYTE);
    DwrfStripeCacheMode dwrfStripeCacheMode = INDEX;
    OrcFileWriterConfig config = new OrcFileWriterConfig().setStripeMinSize(stripeMinSize).setStripeMaxSize(stripeMaxSize).setStripeMaxRowCount(stripeMaxRowCount).setRowGroupMaxRowCount(rowGroupMaxRowCount).setDictionaryMaxMemory(dictionaryMaxMemory).setStringStatisticsLimit(stringStatisticsLimit).setMaxCompressionBufferSize(maxCompressionBufferSize).setStreamLayoutType(streamLayoutType).setDwrfStripeCacheEnabled(false).setDwrfStripeCacheMaxSize(dwrfStripeCacheMaxSize).setDwrfStripeCacheMode(dwrfStripeCacheMode);
    assertEquals(stripeMinSize, config.getStripeMinSize());
    assertEquals(stripeMaxSize, config.getStripeMaxSize());
    assertEquals(stripeMaxRowCount, config.getStripeMaxRowCount());
    assertEquals(rowGroupMaxRowCount, config.getRowGroupMaxRowCount());
    assertEquals(dictionaryMaxMemory, config.getDictionaryMaxMemory());
    assertEquals(stringStatisticsLimit, config.getStringStatisticsLimit());
    assertEquals(maxCompressionBufferSize, config.getMaxCompressionBufferSize());
    assertEquals(streamLayoutType, config.getStreamLayoutType());
    assertFalse(config.isDwrfStripeCacheEnabled());
    assertEquals(dwrfStripeCacheMaxSize, config.getDwrfStripeCacheMaxSize());
    assertEquals(dwrfStripeCacheMode, config.getDwrfStripeCacheMode());
    assertNotSame(config.toOrcWriterOptionsBuilder(), config.toOrcWriterOptionsBuilder());
    OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();
    assertEquals(toIntExact(stripeMinSize.toBytes()), options.getFlushPolicy().getStripeMinBytes());
    assertEquals(toIntExact(stripeMaxSize.toBytes()), options.getFlushPolicy().getStripeMaxBytes());
    assertEquals(stripeMaxRowCount, options.getFlushPolicy().getStripeMaxRowCount());
    assertEquals(rowGroupMaxRowCount, options.getRowGroupMaxRowCount());
    assertEquals(dictionaryMaxMemory, options.getDictionaryMaxMemory());
    assertEquals(stringStatisticsLimit, options.getMaxStringStatisticsLimit());
    assertEquals(maxCompressionBufferSize, options.getMaxCompressionBufferSize());
    assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);
    assertEquals(Optional.empty(), options.getDwrfStripeCacheOptions());
}
Also used : OrcWriterOptions(com.facebook.presto.orc.OrcWriterOptions) DwrfStripeCacheMode(com.facebook.presto.orc.metadata.DwrfStripeCacheMode) DataSize(io.airlift.units.DataSize) StreamSizeLayoutFactory(com.facebook.presto.orc.writer.StreamLayoutFactory.StreamSizeLayoutFactory) StreamLayoutType(com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType) Test(org.testng.annotations.Test)

Aggregations

OrcWriterOptions (com.facebook.presto.orc.OrcWriterOptions)3 Test (org.testng.annotations.Test)3 StreamSizeLayoutFactory (com.facebook.presto.orc.writer.StreamLayoutFactory.StreamSizeLayoutFactory)2 DataSize (io.airlift.units.DataSize)2 Page (com.facebook.presto.common.Page)1 Block (com.facebook.presto.common.block.Block)1 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 RowBlock (com.facebook.presto.common.block.RowBlock)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 FixedWidthType (com.facebook.presto.common.type.FixedWidthType)1 RowType (com.facebook.presto.common.type.RowType)1 TimestampType (com.facebook.presto.common.type.TimestampType)1 Type (com.facebook.presto.common.type.Type)1 StreamLayoutType (com.facebook.presto.hive.OrcFileWriterConfig.StreamLayoutType)1 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)1 OrcEncoding (com.facebook.presto.orc.OrcEncoding)1 OrcTester.createOrcWriter (com.facebook.presto.orc.OrcTester.createOrcWriter)1 OrcWriter (com.facebook.presto.orc.OrcWriter)1 OrcWriterStats (com.facebook.presto.orc.OrcWriterStats)1 TempFile (com.facebook.presto.orc.TempFile)1