use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.
the class HiveDrillNativeParquetScanBatchCreator method getBatch.
@Override
public CloseableRecordBatch getBatch(ExecutorFragmentContext context, HiveDrillNativeParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
Preconditions.checkArgument(children.isEmpty());
OperatorContext oContext = context.newOperatorContext(rowGroupScan);
return getBatch(context, rowGroupScan, oContext);
}
use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.
the class CompliantTextBatchReader method open.
/**
* Performs the initial setup required for the record reader.
* Initializes the input stream, handling of the output record batch
* and the actual reader to be used.
*
* @param schemaNegotiator Used to create the schema in the output record batch
* @return true if opens successfully, false if output is null
*/
@Override
public boolean open(ColumnsSchemaNegotiator schemaNegotiator) {
final OperatorContext context = schemaNegotiator.context();
dfs = schemaNegotiator.fileSystem();
split = schemaNegotiator.split();
// Note: DO NOT use managed buffers here. They remain in existence
// until the fragment is shut down. The buffers here are large.
// If we scan 1000 files, and allocate 1 MB for each, we end up
// holding onto 1 GB of memory in managed buffers.
// Instead, we allocate the buffers explicitly, and must free
// them.
readBuffer = context.getAllocator().buffer(READ_BUFFER);
whitespaceBuffer = context.getAllocator().buffer(WHITE_SPACE_BUFFER);
schemaNegotiator.batchSize(MAX_RECORDS_PER_BATCH);
// setup Output, Input, and Reader
try {
TextOutput output;
if (settings.isHeaderExtractionEnabled()) {
output = openWithHeaders(schemaNegotiator);
} else {
output = openWithoutHeaders(schemaNegotiator);
}
if (output == null) {
return false;
}
openReader(output);
return true;
} catch (final IOException e) {
throw UserException.dataReadError(e).addContext("File Path", split.getPath().toString()).build(logger);
}
}
use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.
the class EasyFormatPlugin method buildScanBatch.
/**
* Use the original scanner based on the {@link RecordReader} interface.
* Requires that the storage plugin roll its own solutions for null columns.
* Is not able to limit vector or batch sizes. Retained or backward
* compatibility with "classic" format plugins which have not yet been
* upgraded to use the new framework.
*/
private CloseableRecordBatch buildScanBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException {
final ColumnExplorer columnExplorer = new ColumnExplorer(context.getOptions(), scan.getColumns());
if (!columnExplorer.isStarQuery()) {
scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(), columnExplorer.getTableColumns(), scan.getSelectionRoot(), scan.getPartitionDepth(), scan.getSchema(), scan.getMaxRecords());
scan.setOperatorId(scan.getOperatorId());
}
final OperatorContext oContext = context.newOperatorContext(scan);
final DrillFileSystem dfs;
try {
dfs = oContext.newFileSystem(easyConfig().fsConf);
} catch (final IOException e) {
throw new ExecutionSetupException(String.format("Failed to create FileSystem: %s", e.getMessage()), e);
}
final List<RecordReader> readers = new LinkedList<>();
final List<Map<String, String>> implicitColumns = Lists.newArrayList();
Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
final boolean supportsFileImplicitColumns = scan.getSelectionRoot() != null;
for (final FileWork work : scan.getWorkUnits()) {
final RecordReader recordReader = getRecordReader(context, dfs, work, scan.getColumns(), scan.getUserName());
readers.add(recordReader);
final List<String> partitionValues = ColumnExplorer.listPartitionValues(work.getPath(), scan.getSelectionRoot(), false);
final Map<String, String> implicitValues = columnExplorer.populateColumns(work.getPath(), partitionValues, supportsFileImplicitColumns, dfs);
implicitColumns.add(implicitValues);
if (implicitValues.size() > mapWithMaxColumns.size()) {
mapWithMaxColumns = implicitValues;
}
}
// all readers should have the same number of implicit columns, add missing ones with value null
final Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant(null));
for (final Map<String, String> map : implicitColumns) {
map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
}
return new ScanBatch(context, oContext, readers, implicitColumns);
}
Aggregations