use of io.trino.orc.OutputStreamOrcDataSink in project trino by trinodb.
the class IcebergFileWriterFactory method createOrcWriter.
private IcebergFileWriter createOrcWriter(MetricsConfig metricsConfig, Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf);
OrcDataSink orcDataSink = hdfsEnvironment.doAs(session.getIdentity(), () -> new OutputStreamOrcDataSink(fileSystem.create(outputPath)));
Callable<Void> rollbackAction = () -> {
hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.delete(outputPath, false));
return null;
};
List<Types.NestedField> columnFields = icebergSchema.columns();
List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toTrinoType(type, typeManager)).collect(toImmutableList());
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(outputPath).getLen()), new OrcReaderOptions(), hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(outputPath)), readStats);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
}
});
}
return new IcebergOrcFileWriter(metricsConfig, icebergSchema, orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcWriterOptions.withStripeMinSize(getOrcWriterMinStripeSize(session)).withStripeMaxSize(getOrcWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcWriterMaxStripeRows(session)).withDictionaryMaxMemory(getOrcWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory, getOrcWriterValidateMode(session), orcWriterStats);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
Aggregations