use of alluxio.job.plan.transform.format.JobPath in project alluxio by Alluxio.
the class ParquetWriter method create.
/**
* Creates a Parquet writer specifying a row group size.
*
* @param schema the schema
* @param uri the URI to the output
* @param rowGroupSize the row group size
* @param enableDictionary whether to enable dictionary
* @param compressionCodec the compression codec name
* @return the writer
*/
public static ParquetWriter create(TableSchema schema, AlluxioURI uri, int rowGroupSize, boolean enableDictionary, String compressionCodec) throws IOException {
Configuration conf = ReadWriterUtils.writeThroughConf();
ParquetSchema parquetSchema = schema.toParquet();
return new ParquetWriter(AvroParquetWriter.<Record>builder(HadoopOutputFile.fromPath(new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath()), conf)).withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).withCompressionCodec(CompressionCodecName.fromConf(compressionCodec)).withRowGroupSize(rowGroupSize).withDictionaryPageSize(org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE).withDictionaryEncoding(enableDictionary).withPageSize(org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE).withDataModel(GenericData.get()).withSchema(parquetSchema.getSchema()).build());
}
use of alluxio.job.plan.transform.format.JobPath in project alluxio by Alluxio.
the class ParquetReader method create.
/**
* Creates a parquet reader.
*
* @param uri the URI to the input
* @return the reader
* @throws IOException when failed to create the reader
*/
public static ParquetReader create(AlluxioURI uri) throws IOException {
Path inputPath = new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath());
Configuration conf = ReadWriterUtils.readNoCacheConf();
InputFile inputFile = HadoopInputFile.fromPath(inputPath, conf);
org.apache.parquet.hadoop.ParquetReader<Record> reader = AvroParquetReader.<Record>builder(inputFile).disableCompatibility().withDataModel(GenericData.get()).withConf(conf).build();
Schema schema;
ParquetMetadata footer;
try (ParquetFileReader r = new ParquetFileReader(inputFile, ParquetReadOptions.builder().build())) {
footer = r.getFooter();
schema = new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema());
}
return new ParquetReader(reader, schema, footer);
}
Aggregations