use of org.apache.parquet.avro.AvroSchemaConverter in project alluxio by Alluxio.
the class ParquetReader method create.
/**
* Creates a parquet reader.
*
* @param uri the URI to the input
* @return the reader
* @throws IOException when failed to create the reader
*/
public static ParquetReader create(AlluxioURI uri) throws IOException {
Path inputPath = new JobPath(uri.getScheme(), uri.getAuthority().toString(), uri.getPath());
Configuration conf = ReadWriterUtils.readNoCacheConf();
InputFile inputFile = HadoopInputFile.fromPath(inputPath, conf);
org.apache.parquet.hadoop.ParquetReader<Record> reader = AvroParquetReader.<Record>builder(inputFile).disableCompatibility().withDataModel(GenericData.get()).withConf(conf).build();
Schema schema;
ParquetMetadata footer;
try (ParquetFileReader r = new ParquetFileReader(inputFile, ParquetReadOptions.builder().build())) {
footer = r.getFooter();
schema = new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema());
}
return new ParquetReader(reader, schema, footer);
}
use of org.apache.parquet.avro.AvroSchemaConverter in project parquet-mr by apache.
the class Schemas method fromParquet.
public static Schema fromParquet(Configuration conf, URI location) throws IOException {
Path path = new Path(location);
FileSystem fs = path.getFileSystem(conf);
ParquetMetadata footer = ParquetFileReader.readFooter(fs.getConf(), path);
String schemaString = footer.getFileMetaData().getKeyValueMetaData().get("parquet.avro.schema");
if (schemaString == null) {
// try the older property
schemaString = footer.getFileMetaData().getKeyValueMetaData().get("avro.schema");
}
if (schemaString != null) {
return new Schema.Parser().parse(schemaString);
} else {
return new AvroSchemaConverter().convert(footer.getFileMetaData().getSchema());
}
}
Aggregations