use of org.apache.hudi.io.storage.HoodieAvroParquetConfig in project hudi by apache.
the class HoodieWriteableTestTable method withInserts.
public Path withInserts(String partition, String fileId, List<HoodieRecord> records, TaskContextSupplier contextSupplier) throws Exception {
FileCreateUtils.createPartitionMetaFile(basePath, partition);
String fileName = baseFileName(currentInstantTime, fileId);
Path baseFilePath = new Path(Paths.get(basePath, partition, fileName).toString());
if (this.fs.exists(baseFilePath)) {
LOG.warn("Deleting the existing base file " + baseFilePath);
this.fs.delete(baseFilePath, true);
}
if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.PARQUET)) {
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, Option.of(filter));
HoodieAvroParquetConfig config = new HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, new Configuration(), Double.parseDouble(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue()));
try (HoodieParquetWriter writer = new HoodieParquetWriter(currentInstantTime, new Path(Paths.get(basePath, partition, fileName).toString()), config, schema, contextSupplier, populateMetaFields)) {
int seqId = 1;
for (HoodieRecord record : records) {
GenericRecord avroRecord = (GenericRecord) ((HoodieRecordPayload) record.getData()).getInsertValue(schema).get();
if (populateMetaFields) {
HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, currentInstantTime, String.valueOf(seqId++));
HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName);
writer.writeAvro(record.getRecordKey(), avroRecord);
filter.add(record.getRecordKey());
} else {
writer.writeAvro(record.getRecordKey(), avroRecord);
}
}
}
} else if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.ORC)) {
Configuration conf = new Configuration();
int orcStripSize = Integer.parseInt(HoodieStorageConfig.ORC_STRIPE_SIZE.defaultValue());
int orcBlockSize = Integer.parseInt(HoodieStorageConfig.ORC_BLOCK_SIZE.defaultValue());
int maxFileSize = Integer.parseInt(HoodieStorageConfig.ORC_FILE_MAX_SIZE.defaultValue());
HoodieOrcConfig config = new HoodieOrcConfig(conf, CompressionKind.ZLIB, orcStripSize, orcBlockSize, maxFileSize, filter);
try (HoodieOrcWriter writer = new HoodieOrcWriter(currentInstantTime, new Path(Paths.get(basePath, partition, fileName).toString()), config, schema, contextSupplier)) {
int seqId = 1;
for (HoodieRecord record : records) {
GenericRecord avroRecord = (GenericRecord) ((HoodieRecordPayload) record.getData()).getInsertValue(schema).get();
HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, currentInstantTime, String.valueOf(seqId++));
HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName);
writer.writeAvro(record.getRecordKey(), avroRecord);
filter.add(record.getRecordKey());
}
}
}
return baseFilePath;
}
use of org.apache.hudi.io.storage.HoodieAvroParquetConfig in project hudi by apache.
the class HoodieParquetDataBlock method serializeRecords.
@Override
protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
if (records.size() == 0) {
return new byte[0];
}
Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(writerSchema), writerSchema, Option.empty());
HoodieAvroParquetConfig avroParquetConfig = new HoodieAvroParquetConfig(writeSupport, compressionCodecName.get(), ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 1024 * 1024 * 1024, new Configuration(), // HoodieStorageConfig.PARQUET_COMPRESSION_RATIO.defaultValue()));
Double.parseDouble(String.valueOf(0.1)));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (FSDataOutputStream outputStream = new FSDataOutputStream(baos)) {
try (HoodieParquetStreamWriter<IndexedRecord> parquetWriter = new HoodieParquetStreamWriter<>(outputStream, avroParquetConfig)) {
for (IndexedRecord record : records) {
String recordKey = getRecordKey(record).orElse(null);
parquetWriter.writeAvro(recordKey, record);
}
outputStream.flush();
}
}
return baos.toByteArray();
}
Aggregations