Search in sources :

Example 6 with RecordReader

use of org.apache.drill.exec.store.RecordReader in project drill by apache.

the class KuduScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = Lists.newArrayList();
    List<SchemaPath> columns = null;
    for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns, context));
        } catch (Exception e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    return new ScanBatch(subScan, context, readers.iterator());
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 7 with RecordReader

use of org.apache.drill.exec.store.RecordReader in project drill by apache.

the class JdbcBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, JdbcSubScan config, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    JdbcStoragePlugin plugin = config.getPlugin();
    RecordReader reader = new JdbcRecordReader(context, plugin.getSource(), config.getSql(), plugin.getName());
    return new ScanBatch(config, context, Collections.singletonList(reader).iterator());
}
Also used : RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch)

Example 8 with RecordReader

use of org.apache.drill.exec.store.RecordReader in project drill by apache.

the class ParquetScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, ParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    OperatorContext oContext = context.newOperatorContext(rowGroupScan);
    final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, rowGroupScan.getColumns());
    if (!columnExplorer.isStarQuery()) {
        rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(), rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot(), rowGroupScan.getFilter());
        rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
    }
    DrillFileSystem fs;
    try {
        boolean useAsyncPageReader = context.getOptions().getOption(ExecConstants.PARQUET_PAGEREADER_ASYNC).bool_val;
        if (useAsyncPageReader) {
            fs = oContext.newNonTrackingFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        } else {
            fs = oContext.newFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        }
    } catch (IOException e) {
        throw new ExecutionSetupException(String.format("Failed to create DrillFileSystem: %s", e.getMessage()), e);
    }
    Configuration conf = new Configuration(fs.getConf());
    conf.setBoolean(ENABLE_BYTES_READ_COUNTER, false);
    conf.setBoolean(ENABLE_BYTES_TOTAL_COUNTER, false);
    conf.setBoolean(ENABLE_TIME_READ_COUNTER, false);
    // keep footers in a map to avoid re-reading them
    Map<String, ParquetMetadata> footers = Maps.newHashMap();
    List<RecordReader> readers = Lists.newArrayList();
    List<Map<String, String>> implicitColumns = Lists.newArrayList();
    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    for (RowGroupReadEntry e : rowGroupScan.getRowGroupReadEntries()) {
        /*
      Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file
      TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine)
      we should add more information to the RowGroupInfo that will be populated upon the first read to
      provide the reader with all of th file meta-data it needs
      These fields will be added to the constructor below
      */
        try {
            Stopwatch timer = Stopwatch.createUnstarted();
            if (!footers.containsKey(e.getPath())) {
                timer.start();
                ParquetMetadata footer = ParquetFileReader.readFooter(conf, new Path(e.getPath()));
                long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
                logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", e.getPath(), "", 0, 0, 0, timeToRead);
                footers.put(e.getPath(), footer);
            }
            boolean autoCorrectCorruptDates = rowGroupScan.formatConfig.autoCorrectCorruptDates;
            ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(footers.get(e.getPath()), rowGroupScan.getColumns(), autoCorrectCorruptDates);
            if (logger.isDebugEnabled()) {
                logger.debug(containsCorruptDates.toString());
            }
            if (!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val && !isComplex(footers.get(e.getPath()))) {
                readers.add(new ParquetRecordReader(context, e.getPath(), e.getRowGroupIndex(), e.getNumRecordsToRead(), fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), footers.get(e.getPath()), rowGroupScan.getColumns(), containsCorruptDates));
            } else {
                ParquetMetadata footer = footers.get(e.getPath());
                readers.add(new DrillParquetReader(context, footer, e, columnExplorer.getTableColumns(), fs, containsCorruptDates));
            }
            Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(e, rowGroupScan.getSelectionRoot());
            implicitColumns.add(implicitValues);
            if (implicitValues.size() > mapWithMaxColumns.size()) {
                mapWithMaxColumns = implicitValues;
            }
        } catch (IOException e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    // all readers should have the same number of implicit columns, add missing ones with value null
    Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
    }
    return new ScanBatch(rowGroupScan, context, oContext, readers.iterator(), implicitColumns);
}
Also used : ImplicitColumnExplorer(org.apache.drill.exec.store.ImplicitColumnExplorer) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) RecordReader(org.apache.drill.exec.store.RecordReader) Stopwatch(com.google.common.base.Stopwatch) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) OperatorContext(org.apache.drill.exec.ops.OperatorContext) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Path(org.apache.hadoop.fs.Path) DrillParquetReader(org.apache.drill.exec.store.parquet2.DrillParquetReader) IOException(java.io.IOException) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) Map(java.util.Map)

Example 9 with RecordReader

use of org.apache.drill.exec.store.RecordReader in project drill by apache.

the class SystemTableBatchCreator method getBatch.

//  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SystemTableBatchCreator.class);
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public ScanBatch getBatch(final FragmentContext context, final SystemTableScan scan, final List<RecordBatch> children) throws ExecutionSetupException {
    final SystemTable table = scan.getTable();
    final Iterator<Object> iterator = table.getIterator(context);
    final RecordReader reader = new PojoRecordReader(table.getPojoClass(), iterator);
    return new ScanBatch(scan, context, Collections.singleton(reader).iterator());
}
Also used : RecordReader(org.apache.drill.exec.store.RecordReader) PojoRecordReader(org.apache.drill.exec.store.pojo.PojoRecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) PojoRecordReader(org.apache.drill.exec.store.pojo.PojoRecordReader)

Example 10 with RecordReader

use of org.apache.drill.exec.store.RecordReader in project drill by apache.

the class HBaseScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, HBaseSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = Lists.newArrayList();
    List<SchemaPath> columns = null;
    for (HBaseSubScan.HBaseSubScanSpec scanSpec : subScan.getRegionScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new HBaseRecordReader(subScan.getStorageEngine().getConnection(), scanSpec, columns, context));
        } catch (Exception e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    return new ScanBatch(subScan, context, readers.iterator());
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Aggregations

RecordReader (org.apache.drill.exec.store.RecordReader)12 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)11 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)8 IOException (java.io.IOException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 Map (java.util.Map)3 OperatorContext (org.apache.drill.exec.ops.OperatorContext)3 ImplicitColumnExplorer (org.apache.drill.exec.store.ImplicitColumnExplorer)2 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)2 ParquetRecordReader (org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader)2 Path (org.apache.hadoop.fs.Path)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 ParquetMetadata (org.apache.parquet.hadoop.metadata.ParquetMetadata)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Stopwatch (com.google.common.base.Stopwatch)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Matcher (java.util.regex.Matcher)1