use of com.facebook.presto.orc.OrcWriterStats in project presto by prestodb.
the class TestWriterBlockRawSize method testFileMetadataRawSize.
@Test
public void testFileMetadataRawSize() throws IOException {
Type type = INTEGER;
List<Type> types = ImmutableList.of(type);
int numBlocksPerRowGroup = 3;
int numBlocksPerStripe = numBlocksPerRowGroup * 5;
int numStripes = 4;
int numBlocksPerFile = numBlocksPerStripe * numStripes + 1;
BlockBuilder blockBuilder = type.createBlockBuilder(null, NUM_ELEMENTS * 2);
for (int i = 0; i < NUM_ELEMENTS; i++) {
blockBuilder.appendNull();
type.writeLong(blockBuilder, i);
}
long blockRawSize = ((FixedWidthType) type).getFixedSize() * NUM_ELEMENTS + NUM_ELEMENTS;
Block block = blockBuilder.build();
Block[] blocks = new Block[] { block };
OrcWriterOptions writerOptions = OrcWriterOptions.builder().withRowGroupMaxRowCount(block.getPositionCount() * numBlocksPerRowGroup).withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMaxRowCount(block.getPositionCount() * numBlocksPerStripe).build()).build();
for (OrcEncoding encoding : OrcEncoding.values()) {
try (TempFile tempFile = new TempFile()) {
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, writerOptions, new OrcWriterStats());
for (int i = 0; i < numBlocksPerFile; i++) {
writer.write(new Page(blocks));
}
writer.close();
writer.validate(new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true));
Footer footer = OrcTester.getFileMetadata(tempFile.getFile(), encoding).getFooter();
verifyValue(encoding, footer.getRawSize(), blockRawSize * numBlocksPerFile);
assertEquals(footer.getStripes().size(), numStripes + 1);
int numBlocksRemaining = numBlocksPerFile;
for (StripeInformation stripeInfo : footer.getStripes()) {
int numBlocksInStripe = Math.min(numBlocksRemaining, numBlocksPerStripe);
verifyValue(encoding, stripeInfo.getRawDataSize(), blockRawSize * numBlocksInStripe);
numBlocksRemaining -= numBlocksInStripe;
}
}
}
}
use of com.facebook.presto.orc.OrcWriterStats in project presto by prestodb.
the class IcebergFileWriterFactory method createOrcWriter.
private IcebergFileWriter createOrcWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath)));
Callable<Void> rollbackAction = () -> {
hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false));
return null;
};
List<Types.NestedField> columnFields = icebergSchema.columns();
List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toPrestoType(type, typeManager)).collect(toImmutableList());
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)), readStats);
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
}
});
}
return new IcebergOrcFileWriter(icebergSchema, orcDataSink, rollbackAction, ORC, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)).build(), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), UTC, validationInputFactory, getOrcOptimizedWriterValidateMode(session), orcWriterStats, dwrfEncryptionProvider, Optional.empty());
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
Aggregations