use of org.apache.drill.exec.store.RecordReader in project drill by apache.
the class EasyFormatPlugin method getReaderBatch.
@SuppressWarnings("resource")
CloseableRecordBatch getReaderBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException {
final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, scan.getColumns());
if (!columnExplorer.isStarQuery()) {
scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(), columnExplorer.getTableColumns(), scan.getSelectionRoot());
scan.setOperatorId(scan.getOperatorId());
}
OperatorContext oContext = context.newOperatorContext(scan);
final DrillFileSystem dfs;
try {
dfs = oContext.newFileSystem(fsConf);
} catch (IOException e) {
throw new ExecutionSetupException(String.format("Failed to create FileSystem: %s", e.getMessage()), e);
}
List<RecordReader> readers = Lists.newArrayList();
List<Map<String, String>> implicitColumns = Lists.newArrayList();
Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
for (FileWork work : scan.getWorkUnits()) {
RecordReader recordReader = getRecordReader(context, dfs, work, scan.getColumns(), scan.getUserName());
readers.add(recordReader);
Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(work, scan.getSelectionRoot());
implicitColumns.add(implicitValues);
if (implicitValues.size() > mapWithMaxColumns.size()) {
mapWithMaxColumns = implicitValues;
}
}
// all readers should have the same number of implicit columns, add missing ones with value null
Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
for (Map<String, String> map : implicitColumns) {
map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
}
return new ScanBatch(scan, context, oContext, readers.iterator(), implicitColumns);
}
use of org.apache.drill.exec.store.RecordReader in project drill by apache.
the class HiveScanBatchCreator method getBatch.
@Override
public ScanBatch getBatch(FragmentContext context, HiveSubScan config, List<RecordBatch> children) throws ExecutionSetupException {
List<RecordReader> readers = Lists.newArrayList();
HiveTableWithColumnCache table = config.getTable();
List<InputSplit> splits = config.getInputSplits();
List<HivePartition> partitions = config.getPartitions();
boolean hasPartitions = (partitions != null && partitions.size() > 0);
int i = 0;
final UserGroupInformation proxyUgi = ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName());
final HiveConf hiveConf = config.getHiveConf();
final String formatName = table.getSd().getInputFormat();
Class<? extends HiveAbstractReader> readerClass = HiveDefaultReader.class;
if (readerMap.containsKey(formatName)) {
readerClass = readerMap.get(formatName);
}
Constructor<? extends HiveAbstractReader> readerConstructor = null;
try {
readerConstructor = readerClass.getConstructor(HiveTableWithColumnCache.class, HivePartition.class, InputSplit.class, List.class, FragmentContext.class, HiveConf.class, UserGroupInformation.class);
for (InputSplit split : splits) {
readers.add(readerConstructor.newInstance(table, (hasPartitions ? partitions.get(i++) : null), split, config.getColumns(), context, hiveConf, proxyUgi));
}
if (readers.size() == 0) {
readers.add(readerConstructor.newInstance(table, null, null, config.getColumns(), context, hiveConf, proxyUgi));
}
} catch (Exception e) {
logger.error("No constructor for {}, thrown {}", readerClass.getName(), e);
}
return new ScanBatch(config, context, readers.iterator());
}
Aggregations