use of parquet.hadoop.ParquetWriter in project incubator-gobblin by apache.
the class ParquetDataWriterBuilder method getWriter.
/**
* Build a {@link ParquetWriter<Group>} for given file path with a block size.
* @param blockSize
* @param stagingFile
* @return
* @throws IOException
*/
public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile) throws IOException {
State state = this.destination.getProperties();
int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE);
int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE);
boolean enableDictionary = state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED);
boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED);
String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI);
Path absoluteStagingFile = new Path(rootURI, stagingFile);
CompressionCodecName codec = getCodecFromConfig();
GroupWriteSupport support = new GroupWriteSupport();
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(this.schema, conf);
ParquetProperties.WriterVersion writerVersion = getWriterVersion();
return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary, validate, writerVersion, conf);
}
Aggregations