use of io.trino.parquet.writer.ParquetWriterOptions in project trino by trinodb.
the class ParquetFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session)).setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session)).setBatchSize(HiveSessionProperties.getParquetBatchSize(session)).build();
CompressionCodecName compressionCodecName = getCompression(conf);
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(fileColumnTypes, fileColumnNames);
return Optional.of(new ParquetFileWriter(fileSystem.create(path, false), rollbackAction, fileColumnTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, fileInputColumnIndexes, compressionCodecName, nodeVersion.toString()));
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
use of io.trino.parquet.writer.ParquetWriterOptions in project trino by trinodb.
the class IcebergFileWriterFactory method createParquetWriter.
private IcebergFileWriter createParquetWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session, HdfsContext hdfsContext) {
List<String> fileColumnNames = icebergSchema.columns().stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = icebergSchema.columns().stream().map(column -> toTrinoType(column.type(), typeManager)).collect(toImmutableList());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(outputPath, false);
return null;
};
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).setBatchSize(getParquetWriterBatchSize(session)).build();
return new IcebergParquetFileWriter(hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.create(outputPath)), rollbackAction, fileColumnTypes, convert(icebergSchema, "table"), makeTypeMap(fileColumnTypes, fileColumnNames), parquetWriterOptions, IntStream.range(0, fileColumnNames.size()).toArray(), getCompressionCodec(session).getParquetCompressionCodec(), nodeVersion.toString(), outputPath, hdfsEnvironment, hdfsContext);
} catch (IOException e) {
throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
use of io.trino.parquet.writer.ParquetWriterOptions in project trino by trinodb.
the class DeltaLakePageSink method createParquetFileWriter.
private FileWriter createParquetFileWriter(Path path) {
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxBlockSize(getParquetWriterBlockSize(session)).setMaxPageSize(getParquetWriterPageSize(session)).build();
CompressionCodecName compressionCodecName = getCompressionCodec(session).getParquetCompressionCodec();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
List<Type> parquetTypes = dataColumnTypes.stream().map(type -> {
if (type instanceof TimestampWithTimeZoneType) {
verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
return TIMESTAMP_MILLIS;
}
return type;
}).collect(toImmutableList());
// we use identity column mapping; input page already contains only data columns per
// DataLagePageSink.getDataPage()
int[] identityMapping = new int[dataColumnTypes.size()];
for (int i = 0; i < identityMapping.length; ++i) {
identityMapping[i] = i;
}
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames);
return new ParquetFileWriter(fileSystem.create(path), rollbackAction, parquetTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, identityMapping, compressionCodecName, trinoVersion);
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Error creating Parquet file", e);
}
}
Aggregations