use of io.trino.parquet.writer.ParquetSchemaConverter in project trino by trinodb.
the class ParquetTester method writeParquetColumnTrino.
private static void writeParquetColumnTrino(File outputFile, List<Type> types, List<String> columnNames, Iterator<?>[] values, int size, CompressionCodecName compressionCodecName) throws Exception {
checkArgument(types.size() == columnNames.size() && types.size() == values.length);
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(types, columnNames);
ParquetWriter writer = new ParquetWriter(new FileOutputStream(outputFile), schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), ParquetWriterOptions.builder().setMaxPageSize(DataSize.ofBytes(100)).setMaxBlockSize(DataSize.ofBytes(100000)).build(), compressionCodecName, "test-version");
PageBuilder pageBuilder = new PageBuilder(types);
for (int i = 0; i < types.size(); ++i) {
Type type = types.get(i);
Iterator<?> iterator = values[i];
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
for (int j = 0; j < size; ++j) {
checkState(iterator.hasNext());
Object value = iterator.next();
writeValue(type, blockBuilder, value);
}
}
pageBuilder.declarePositions(size);
writer.write(pageBuilder.build());
writer.close();
}
use of io.trino.parquet.writer.ParquetSchemaConverter in project trino by trinodb.
the class ParquetFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session)).setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session)).setBatchSize(HiveSessionProperties.getParquetBatchSize(session)).build();
CompressionCodecName compressionCodecName = getCompression(conf);
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(fileColumnTypes, fileColumnNames);
return Optional.of(new ParquetFileWriter(fileSystem.create(path, false), rollbackAction, fileColumnTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, fileInputColumnIndexes, compressionCodecName, nodeVersion.toString()));
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
use of io.trino.parquet.writer.ParquetSchemaConverter in project trino by trinodb.
the class DeltaLakePageSink method createParquetFileWriter.
private FileWriter createParquetFileWriter(Path path) {
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxBlockSize(getParquetWriterBlockSize(session)).setMaxPageSize(getParquetWriterPageSize(session)).build();
CompressionCodecName compressionCodecName = getCompressionCodec(session).getParquetCompressionCodec();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
List<Type> parquetTypes = dataColumnTypes.stream().map(type -> {
if (type instanceof TimestampWithTimeZoneType) {
verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
return TIMESTAMP_MILLIS;
}
return type;
}).collect(toImmutableList());
// we use identity column mapping; input page already contains only data columns per
// DataLagePageSink.getDataPage()
int[] identityMapping = new int[dataColumnTypes.size()];
for (int i = 0; i < identityMapping.length; ++i) {
identityMapping[i] = i;
}
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames);
return new ParquetFileWriter(fileSystem.create(path), rollbackAction, parquetTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, identityMapping, compressionCodecName, trinoVersion);
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Error creating Parquet file", e);
}
}
Aggregations