use of parquet.hadoop.metadata.CompressionCodecName in project incubator-gobblin by apache.
the class ParquetDataWriterBuilder method getWriter.
/**
* Build a {@link ParquetWriter<Group>} for given file path with a block size.
* @param blockSize
* @param stagingFile
* @return
* @throws IOException
*/
public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile) throws IOException {
State state = this.destination.getProperties();
int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE);
int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE);
boolean enableDictionary = state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED);
boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED);
String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI);
Path absoluteStagingFile = new Path(rootURI, stagingFile);
CompressionCodecName codec = getCodecFromConfig();
GroupWriteSupport support = new GroupWriteSupport();
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(this.schema, conf);
ParquetProperties.WriterVersion writerVersion = getWriterVersion();
return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary, validate, writerVersion, conf);
}
use of parquet.hadoop.metadata.CompressionCodecName in project presto by prestodb.
the class ParquetTester method assertRoundTrip.
public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> writeValues, Iterable<?> readValues, Type type) throws Exception {
for (WriterVersion version : versions) {
for (CompressionCodecName compressionCodecName : compressions) {
try (TempFile tempFile = new TempFile("test", "parquet")) {
JobConf jobConf = new JobConf();
jobConf.setEnum(ParquetOutputFormat.COMPRESSION, compressionCodecName);
jobConf.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, true);
jobConf.setEnum(ParquetOutputFormat.WRITER_VERSION, version);
writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, objectInspector, writeValues.iterator());
assertFileContents(jobConf, tempFile, readValues, type);
}
}
}
}
Aggregations