use of org.apache.parquet.hadoop.api.ReadSupport in project parquet-mr by apache.
the class InternalParquetRecordReader method initialize.
public void initialize(ParquetFileReader reader, ParquetReadOptions options) {
// copy custom configuration to the Configuration passed to the ReadSupport
Configuration conf = new Configuration();
if (options instanceof HadoopReadOptions) {
conf = ((HadoopReadOptions) options).getConf();
}
for (String property : options.getPropertyNames()) {
conf.set(property, options.getProperty(property));
}
// initialize a ReadContext for this file
this.reader = reader;
FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
this.fileSchema = parquetFileMetadata.getSchema();
Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema));
this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
this.requestedSchema = readContext.getRequestedSchema();
this.columnCount = requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext);
this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true);
this.total = reader.getRecordCount();
this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total);
this.filterRecords = options.useRecordFilter();
reader.setRequestedSchema(requestedSchema);
LOG.info("RecordReader initialized will read a total of {} records.", total);
}
Aggregations