use of org.apache.hudi.io.storage.HoodieParquetStreamWriter in project hudi by apache.
the class HoodieParquetDataBlock method serializeRecords.
@Override
protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
if (records.size() == 0) {
return new byte[0];
}
Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(writerSchema), writerSchema, Option.empty());
HoodieAvroParquetConfig avroParquetConfig = new HoodieAvroParquetConfig(writeSupport, compressionCodecName.get(), ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 1024 * 1024 * 1024, new Configuration(), // HoodieStorageConfig.PARQUET_COMPRESSION_RATIO.defaultValue()));
Double.parseDouble(String.valueOf(0.1)));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (FSDataOutputStream outputStream = new FSDataOutputStream(baos)) {
try (HoodieParquetStreamWriter<IndexedRecord> parquetWriter = new HoodieParquetStreamWriter<>(outputStream, avroParquetConfig)) {
for (IndexedRecord record : records) {
String recordKey = getRecordKey(record).orElse(null);
parquetWriter.writeAvro(recordKey, record);
}
outputStream.flush();
}
}
return baos.toByteArray();
}
Aggregations