use of io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR in project trino by trinodb.
the class IcebergFileWriterFactory method createOrcWriter.
private IcebergFileWriter createOrcWriter(MetricsConfig metricsConfig, Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf);
OrcDataSink orcDataSink = hdfsEnvironment.doAs(session.getIdentity(), () -> new OutputStreamOrcDataSink(fileSystem.create(outputPath)));
Callable<Void> rollbackAction = () -> {
hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.delete(outputPath, false));
return null;
};
List<Types.NestedField> columnFields = icebergSchema.columns();
List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toTrinoType(type, typeManager)).collect(toImmutableList());
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(outputPath).getLen()), new OrcReaderOptions(), hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(outputPath)), readStats);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
}
});
}
return new IcebergOrcFileWriter(metricsConfig, icebergSchema, orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcWriterOptions.withStripeMinSize(getOrcWriterMinStripeSize(session)).withStripeMaxSize(getOrcWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcWriterMaxStripeRows(session)).withDictionaryMaxMemory(getOrcWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory, getOrcWriterValidateMode(session), orcWriterStats);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR in project trino by trinodb.
the class IcebergFileWriterFactory method createParquetWriter.
private IcebergFileWriter createParquetWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session, HdfsContext hdfsContext) {
List<String> fileColumnNames = icebergSchema.columns().stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = icebergSchema.columns().stream().map(column -> toTrinoType(column.type(), typeManager)).collect(toImmutableList());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(outputPath, false);
return null;
};
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).setBatchSize(getParquetWriterBatchSize(session)).build();
return new IcebergParquetFileWriter(hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.create(outputPath)), rollbackAction, fileColumnTypes, convert(icebergSchema, "table"), makeTypeMap(fileColumnTypes, fileColumnNames), parquetWriterOptions, IntStream.range(0, fileColumnNames.size()).toArray(), getCompressionCodec(session).getParquetCompressionCodec(), nodeVersion.toString(), outputPath, hdfsEnvironment, hdfsContext);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
Aggregations