use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.
the class TestWriterBlockRawSize method testFileMetadataRawSize.
@Test
public void testFileMetadataRawSize() throws IOException {
Type type = INTEGER;
List<Type> types = ImmutableList.of(type);
int numBlocksPerRowGroup = 3;
int numBlocksPerStripe = numBlocksPerRowGroup * 5;
int numStripes = 4;
int numBlocksPerFile = numBlocksPerStripe * numStripes + 1;
BlockBuilder blockBuilder = type.createBlockBuilder(null, NUM_ELEMENTS * 2);
for (int i = 0; i < NUM_ELEMENTS; i++) {
blockBuilder.appendNull();
type.writeLong(blockBuilder, i);
}
long blockRawSize = ((FixedWidthType) type).getFixedSize() * NUM_ELEMENTS + NUM_ELEMENTS;
Block block = blockBuilder.build();
Block[] blocks = new Block[] { block };
OrcWriterOptions writerOptions = OrcWriterOptions.builder().withRowGroupMaxRowCount(block.getPositionCount() * numBlocksPerRowGroup).withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMaxRowCount(block.getPositionCount() * numBlocksPerStripe).build()).build();
for (OrcEncoding encoding : OrcEncoding.values()) {
try (TempFile tempFile = new TempFile()) {
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, writerOptions, new OrcWriterStats());
for (int i = 0; i < numBlocksPerFile; i++) {
writer.write(new Page(blocks));
}
writer.close();
writer.validate(new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true));
Footer footer = OrcTester.getFileMetadata(tempFile.getFile(), encoding).getFooter();
verifyValue(encoding, footer.getRawSize(), blockRawSize * numBlocksPerFile);
assertEquals(footer.getStripes().size(), numStripes + 1);
int numBlocksRemaining = numBlocksPerFile;
for (StripeInformation stripeInfo : footer.getStripes()) {
int numBlocksInStripe = Math.min(numBlocksRemaining, numBlocksPerStripe);
verifyValue(encoding, stripeInfo.getRawDataSize(), blockRawSize * numBlocksInStripe);
numBlocksRemaining -= numBlocksInStripe;
}
}
}
}
use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.
the class TestOrcFileWriterConfig method testStreamLayoutOption.
@Test
public void testStreamLayoutOption() {
OrcFileWriterConfig config = new OrcFileWriterConfig();
config.setStreamLayoutType(BY_STREAM_SIZE);
OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();
assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);
config.setStreamLayoutType(BY_COLUMN_SIZE);
options = config.toOrcWriterOptionsBuilder().build();
assertTrue(options.getStreamLayoutFactory() instanceof ColumnSizeLayoutFactory);
}
use of com.facebook.presto.orc.OrcWriterOptions in project presto by prestodb.
the class TestOrcFileWriterConfig method testOrcWriterOptionsBuilder.
@Test
public void testOrcWriterOptionsBuilder() {
DataSize stripeMinSize = new DataSize(10, MEGABYTE);
DataSize stripeMaxSize = new DataSize(50, MEGABYTE);
int stripeMaxRowCount = 1_000_000;
int rowGroupMaxRowCount = 15_000;
DataSize dictionaryMaxMemory = new DataSize(20, MEGABYTE);
DataSize stringStatisticsLimit = new DataSize(32, BYTE);
DataSize maxCompressionBufferSize = new DataSize(512, KILOBYTE);
StreamLayoutType streamLayoutType = BY_STREAM_SIZE;
DataSize dwrfStripeCacheMaxSize = new DataSize(4, MEGABYTE);
DwrfStripeCacheMode dwrfStripeCacheMode = INDEX;
OrcFileWriterConfig config = new OrcFileWriterConfig().setStripeMinSize(stripeMinSize).setStripeMaxSize(stripeMaxSize).setStripeMaxRowCount(stripeMaxRowCount).setRowGroupMaxRowCount(rowGroupMaxRowCount).setDictionaryMaxMemory(dictionaryMaxMemory).setStringStatisticsLimit(stringStatisticsLimit).setMaxCompressionBufferSize(maxCompressionBufferSize).setStreamLayoutType(streamLayoutType).setDwrfStripeCacheEnabled(false).setDwrfStripeCacheMaxSize(dwrfStripeCacheMaxSize).setDwrfStripeCacheMode(dwrfStripeCacheMode);
assertEquals(stripeMinSize, config.getStripeMinSize());
assertEquals(stripeMaxSize, config.getStripeMaxSize());
assertEquals(stripeMaxRowCount, config.getStripeMaxRowCount());
assertEquals(rowGroupMaxRowCount, config.getRowGroupMaxRowCount());
assertEquals(dictionaryMaxMemory, config.getDictionaryMaxMemory());
assertEquals(stringStatisticsLimit, config.getStringStatisticsLimit());
assertEquals(maxCompressionBufferSize, config.getMaxCompressionBufferSize());
assertEquals(streamLayoutType, config.getStreamLayoutType());
assertFalse(config.isDwrfStripeCacheEnabled());
assertEquals(dwrfStripeCacheMaxSize, config.getDwrfStripeCacheMaxSize());
assertEquals(dwrfStripeCacheMode, config.getDwrfStripeCacheMode());
assertNotSame(config.toOrcWriterOptionsBuilder(), config.toOrcWriterOptionsBuilder());
OrcWriterOptions options = config.toOrcWriterOptionsBuilder().build();
assertEquals(toIntExact(stripeMinSize.toBytes()), options.getFlushPolicy().getStripeMinBytes());
assertEquals(toIntExact(stripeMaxSize.toBytes()), options.getFlushPolicy().getStripeMaxBytes());
assertEquals(stripeMaxRowCount, options.getFlushPolicy().getStripeMaxRowCount());
assertEquals(rowGroupMaxRowCount, options.getRowGroupMaxRowCount());
assertEquals(dictionaryMaxMemory, options.getDictionaryMaxMemory());
assertEquals(stringStatisticsLimit, options.getMaxStringStatisticsLimit());
assertEquals(maxCompressionBufferSize, options.getMaxCompressionBufferSize());
assertTrue(options.getStreamLayoutFactory() instanceof StreamSizeLayoutFactory);
assertEquals(Optional.empty(), options.getDwrfStripeCacheOptions());
}
Aggregations