use of org.apache.drill.exec.store.parquet.columnreaders.batchsizing.RecordBatchSizerManager in project drill by apache.
the class ParquetRecordReader method setup.
/**
* Prepare the Parquet reader. First determine the set of columns to read (the schema
* for this read.) Then, create a state object to track the read across calls to
* the reader <tt>next()</tt> method. Finally, create one of three readers to
* read batches depending on whether this scan is for only fixed-width fields,
* contains at least one variable-width field, or is a "mock" scan consisting
* only of null fields (fields in the SELECT clause but not in the Parquet file.)
*/
@Override
public void setup(OperatorContext operatorContext, OutputMutator output) throws ExecutionSetupException {
this.operatorContext = operatorContext;
ParquetSchema schema = new ParquetSchema(fragmentContext.getOptions(), rowGroupIndex, footer, isStarQuery() ? null : getColumns());
batchSizerMgr = new RecordBatchSizerManager(fragmentContext.getOptions(), schema, numRecordsToRead, new RecordBatchStatsContext(fragmentContext, operatorContext));
logger.debug("Reading {} records from row group({}) in file {}.", numRecordsToRead, rowGroupIndex, hadoopPath.toUri().getPath());
try {
schema.buildSchema();
batchSizerMgr.setup();
readState = new ReadState(schema, batchSizerMgr, parquetReaderStats, numRecordsToRead, useAsyncColReader);
readState.buildReader(this, output);
} catch (Exception e) {
throw handleAndRaise("Failure in setting up reader", e);
}
ColumnReader<?> firstColumnStatus = readState.getFirstColumnReader();
if (firstColumnStatus == null) {
batchReader = new BatchReader.MockBatchReader(readState);
} else if (schema.allFieldsFixedLength()) {
batchReader = new BatchReader.FixedWidthReader(readState);
} else {
batchReader = new BatchReader.VariableWidthReader(readState);
}
}
Aggregations