Search in sources :

Example 21 with OperatorContext

use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.

the class HiveDrillNativeParquetScanBatchCreator method getBatch.

@Override
public CloseableRecordBatch getBatch(ExecutorFragmentContext context, HiveDrillNativeParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    OperatorContext oContext = context.newOperatorContext(rowGroupScan);
    return getBatch(context, rowGroupScan, oContext);
}
Also used : OperatorContext(org.apache.drill.exec.ops.OperatorContext)

Example 22 with OperatorContext

use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.

the class CompliantTextBatchReader method open.

/**
 * Performs the initial setup required for the record reader.
 * Initializes the input stream, handling of the output record batch
 * and the actual reader to be used.
 *
 * @param schemaNegotiator Used to create the schema in the output record batch
 * @return true if opens successfully, false if output is null
 */
@Override
public boolean open(ColumnsSchemaNegotiator schemaNegotiator) {
    final OperatorContext context = schemaNegotiator.context();
    dfs = schemaNegotiator.fileSystem();
    split = schemaNegotiator.split();
    // Note: DO NOT use managed buffers here. They remain in existence
    // until the fragment is shut down. The buffers here are large.
    // If we scan 1000 files, and allocate 1 MB for each, we end up
    // holding onto 1 GB of memory in managed buffers.
    // Instead, we allocate the buffers explicitly, and must free
    // them.
    readBuffer = context.getAllocator().buffer(READ_BUFFER);
    whitespaceBuffer = context.getAllocator().buffer(WHITE_SPACE_BUFFER);
    schemaNegotiator.batchSize(MAX_RECORDS_PER_BATCH);
    // setup Output, Input, and Reader
    try {
        TextOutput output;
        if (settings.isHeaderExtractionEnabled()) {
            output = openWithHeaders(schemaNegotiator);
        } else {
            output = openWithoutHeaders(schemaNegotiator);
        }
        if (output == null) {
            return false;
        }
        openReader(output);
        return true;
    } catch (final IOException e) {
        throw UserException.dataReadError(e).addContext("File Path", split.getPath().toString()).build(logger);
    }
}
Also used : OperatorContext(org.apache.drill.exec.ops.OperatorContext) IOException(java.io.IOException)

Example 23 with OperatorContext

use of org.apache.drill.exec.ops.OperatorContext in project drill by apache.

the class EasyFormatPlugin method buildScanBatch.

/**
 * Use the original scanner based on the {@link RecordReader} interface.
 * Requires that the storage plugin roll its own solutions for null columns.
 * Is not able to limit vector or batch sizes. Retained or backward
 * compatibility with "classic" format plugins which have not yet been
 * upgraded to use the new framework.
 */
private CloseableRecordBatch buildScanBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException {
    final ColumnExplorer columnExplorer = new ColumnExplorer(context.getOptions(), scan.getColumns());
    if (!columnExplorer.isStarQuery()) {
        scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(), columnExplorer.getTableColumns(), scan.getSelectionRoot(), scan.getPartitionDepth(), scan.getSchema(), scan.getMaxRecords());
        scan.setOperatorId(scan.getOperatorId());
    }
    final OperatorContext oContext = context.newOperatorContext(scan);
    final DrillFileSystem dfs;
    try {
        dfs = oContext.newFileSystem(easyConfig().fsConf);
    } catch (final IOException e) {
        throw new ExecutionSetupException(String.format("Failed to create FileSystem: %s", e.getMessage()), e);
    }
    final List<RecordReader> readers = new LinkedList<>();
    final List<Map<String, String>> implicitColumns = Lists.newArrayList();
    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    final boolean supportsFileImplicitColumns = scan.getSelectionRoot() != null;
    for (final FileWork work : scan.getWorkUnits()) {
        final RecordReader recordReader = getRecordReader(context, dfs, work, scan.getColumns(), scan.getUserName());
        readers.add(recordReader);
        final List<String> partitionValues = ColumnExplorer.listPartitionValues(work.getPath(), scan.getSelectionRoot(), false);
        final Map<String, String> implicitValues = columnExplorer.populateColumns(work.getPath(), partitionValues, supportsFileImplicitColumns, dfs);
        implicitColumns.add(implicitValues);
        if (implicitValues.size() > mapWithMaxColumns.size()) {
            mapWithMaxColumns = implicitValues;
        }
    }
    // all readers should have the same number of implicit columns, add missing ones with value null
    final Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant(null));
    for (final Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
    }
    return new ScanBatch(context, oContext, readers, implicitColumns);
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) RecordReader(org.apache.drill.exec.store.RecordReader) CompleteFileWork(org.apache.drill.exec.store.schedule.CompleteFileWork) IOException(java.io.IOException) LinkedList(java.util.LinkedList) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) OperatorContext(org.apache.drill.exec.ops.OperatorContext) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Map(java.util.Map)

Aggregations

OperatorContext (org.apache.drill.exec.ops.OperatorContext)23 IOException (java.io.IOException)9 Map (java.util.Map)8 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)8 Test (org.junit.Test)8 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)7 RecordReader (org.apache.drill.exec.store.RecordReader)7 DrillFileSystem (org.apache.drill.exec.store.dfs.DrillFileSystem)6 LinkedList (java.util.LinkedList)5 OperatorTest (org.apache.drill.categories.OperatorTest)4 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)4 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 ParquetMetadata (org.apache.parquet.hadoop.metadata.ParquetMetadata)4 DrillBuf (io.netty.buffer.DrillBuf)3 ArrayList (java.util.ArrayList)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)3 Sort (org.apache.drill.exec.physical.config.Sort)3 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)3