use of org.apache.parquet.column.impl.ColumnWriteStoreV2 in project drill by apache.
the class ParquetRecordWriter method newSchema.
private void newSchema() throws IOException {
List<Type> types = new ArrayList<>();
for (MaterializedField field : batchSchema) {
if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
continue;
}
types.add(getType(field));
}
schema = new MessageType("root", types);
// We don't want this number to be too small, ideally we divide the block equally across the columns.
// It is unlikely all columns are going to be the same size.
// Its value is likely below Integer.MAX_VALUE (2GB), although rowGroupSize is a long type.
// Therefore this size is cast to int, since allocating byte array in under layer needs to
// limit the array size in an int scope.
int initialBlockBufferSize = this.schema.getColumns().size() > 0 ? max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5) : MINIMUM_BUFFER_SIZE;
// We don't want this number to be too small either. Ideally, slightly bigger than the page size,
// but not bigger than the block buffer
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
ValuesWriterFactory valWriterFactory = writerVersion == WriterVersion.PARQUET_1_0 ? new DefaultV1ValuesWriterFactory() : new DefaultV2ValuesWriterFactory();
ParquetProperties parquetProperties = ParquetProperties.builder().withPageSize(pageSize).withDictionaryEncoding(enableDictionary).withDictionaryPageSize(initialPageBufferSize).withAllocator(new ParquetDirectByteBufferAllocator(oContext)).withValuesWriterFactory(valWriterFactory).withWriterVersion(writerVersion).build();
// TODO: Replace ParquetColumnChunkPageWriteStore with ColumnChunkPageWriteStore from parquet library
// once DRILL-7906 (PARQUET-1006) will be resolved
pageStore = new ParquetColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties.getInitialSlabSize(), pageSize, parquetProperties.getAllocator(), parquetProperties.getColumnIndexTruncateLength(), parquetProperties.getPageWriteChecksumEnabled());
store = writerVersion == WriterVersion.PARQUET_1_0 ? new ColumnWriteStoreV1(schema, pageStore, parquetProperties) : new ColumnWriteStoreV2(schema, pageStore, parquetProperties);
MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
consumer = columnIO.getRecordWriter(store);
setUp(schema, consumer);
}
Aggregations