Search in sources :

Example 1 with ParquetWriterConfiguration

use of org.apache.gobblin.parquet.writer.ParquetWriterConfiguration in project incubator-gobblin by apache.

the class ParquetDataWriterBuilder method getVersionSpecificWriter.

/**
 * Build a version-specific {@link ParquetWriter} for given {@link ParquetWriterConfiguration}
 * @param writerConfiguration
 * @return
 * @throws IOException
 */
@Override
public ParquetWriterShim getVersionSpecificWriter(ParquetWriterConfiguration writerConfiguration) throws IOException {
    CompressionCodecName codecName = CompressionCodecName.fromConf(writerConfiguration.getCodecName());
    ParquetProperties.WriterVersion writerVersion = ParquetProperties.WriterVersion.fromString(writerConfiguration.getWriterVersion());
    Configuration conf = new Configuration();
    ParquetWriter versionSpecificWriter = null;
    switch(writerConfiguration.getRecordFormat()) {
        case GROUP:
            {
                GroupWriteSupport.setSchema((MessageType) this.schema, conf);
                WriteSupport support = new GroupWriteSupport();
                versionSpecificWriter = new ParquetWriter<Group>(writerConfiguration.getAbsoluteStagingFile(), support, codecName, writerConfiguration.getBlockSize(), writerConfiguration.getPageSize(), writerConfiguration.getDictPageSize(), writerConfiguration.isDictionaryEnabled(), writerConfiguration.isValidate(), writerVersion, conf);
                break;
            }
        case AVRO:
            {
                versionSpecificWriter = new AvroParquetWriter(writerConfiguration.getAbsoluteStagingFile(), (Schema) this.schema, codecName, writerConfiguration.getBlockSize(), writerConfiguration.getPageSize(), writerConfiguration.isDictionaryEnabled(), conf);
                break;
            }
        case PROTOBUF:
            {
                versionSpecificWriter = new ProtoParquetWriter(writerConfiguration.getAbsoluteStagingFile(), (Class<? extends Message>) this.schema, codecName, writerConfiguration.getBlockSize(), writerConfiguration.getPageSize(), writerConfiguration.isDictionaryEnabled(), writerConfiguration.isValidate());
                break;
            }
        default:
            throw new RuntimeException("Record format not supported");
    }
    ParquetWriter finalVersionSpecificWriter = versionSpecificWriter;
    return new ParquetWriterShim() {

        @Override
        public void write(Object record) throws IOException {
            finalVersionSpecificWriter.write(record);
        }

        @Override
        public void close() throws IOException {
            finalVersionSpecificWriter.close();
        }
    };
}
Also used : ParquetWriterConfiguration(org.apache.gobblin.parquet.writer.ParquetWriterConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ProtoParquetWriter(org.apache.parquet.proto.ProtoParquetWriter) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) AvroParquetWriter(org.apache.parquet.avro.AvroParquetWriter) ParquetProperties(org.apache.parquet.column.ParquetProperties) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) WriteSupport(org.apache.parquet.hadoop.api.WriteSupport) AvroParquetWriter(org.apache.parquet.avro.AvroParquetWriter) ProtoParquetWriter(org.apache.parquet.proto.ProtoParquetWriter) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) ParquetWriterShim(org.apache.gobblin.parquet.writer.ParquetWriterShim) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

ParquetWriterConfiguration (org.apache.gobblin.parquet.writer.ParquetWriterConfiguration)1 ParquetWriterShim (org.apache.gobblin.parquet.writer.ParquetWriterShim)1 Configuration (org.apache.hadoop.conf.Configuration)1 AvroParquetWriter (org.apache.parquet.avro.AvroParquetWriter)1 ParquetProperties (org.apache.parquet.column.ParquetProperties)1 ParquetWriter (org.apache.parquet.hadoop.ParquetWriter)1 WriteSupport (org.apache.parquet.hadoop.api.WriteSupport)1 GroupWriteSupport (org.apache.parquet.hadoop.example.GroupWriteSupport)1 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)1 ProtoParquetWriter (org.apache.parquet.proto.ProtoParquetWriter)1 MessageType (org.apache.parquet.schema.MessageType)1