Search in sources :

Example 56 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.

the class CompliantTextRecordReader method setup.

/**
   * Performs the initial setup required for the record reader.
   * Initializes the input stream, handling of the output record batch
   * and the actual reader to be used.
   * @param context  operator context from which buffer's will be allocated and managed
   * @param outputMutator  Used to create the schema in the output record batch
   * @throws ExecutionSetupException
   */
@SuppressWarnings("resource")
@Override
public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException {
    oContext = context;
    // Note: DO NOT use managed buffers here. They remain in existence
    // until the fragment is shut down. The buffers here are large.
    // If we scan 1000 files, and allocate 1 MB for each, we end up
    // holding onto 1 GB of memory in managed buffers.
    // Instead, we allocate the buffers explicitly, and must free
    // them.
    //    readBuffer = context.getManagedBuffer(READ_BUFFER);
    //    whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);
    readBuffer = context.getAllocator().buffer(READ_BUFFER);
    whitespaceBuffer = context.getAllocator().buffer(WHITE_SPACE_BUFFER);
    // setup Output, Input, and Reader
    try {
        TextOutput output = null;
        TextInput input = null;
        InputStream stream = null;
        // setup Output using OutputMutator
        if (settings.isHeaderExtractionEnabled()) {
            //extract header and use that to setup a set of VarCharVectors
            String[] fieldNames = extractHeader();
            output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery());
        } else {
            //simply use RepeatedVarCharVector
            output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery());
        }
        // setup Input using InputStream
        logger.trace("Opening file {}", split.getPath());
        stream = dfs.openPossiblyCompressedStream(split.getPath());
        input = new TextInput(settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());
        // setup Reader using Input and Output
        reader = new TextReader(settings, input, output, whitespaceBuffer);
        reader.start();
    } catch (SchemaChangeException | IOException e) {
        throw new ExecutionSetupException(String.format("Failure while setting up text reader for file %s", split.getPath()), e);
    } catch (IllegalArgumentException e) {
        throw UserException.dataReadError(e).addContext("File Path", split.getPath().toString()).build(logger);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) InputStream(java.io.InputStream) IOException(java.io.IOException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException)

Example 57 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.

the class KuduScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = Lists.newArrayList();
    List<SchemaPath> columns = null;
    for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns, context));
        } catch (Exception e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    return new ScanBatch(subScan, context, readers.iterator());
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 58 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.

the class ParquetScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, ParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    OperatorContext oContext = context.newOperatorContext(rowGroupScan);
    final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, rowGroupScan.getColumns());
    if (!columnExplorer.isStarQuery()) {
        rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(), rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot(), rowGroupScan.getFilter());
        rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
    }
    DrillFileSystem fs;
    try {
        boolean useAsyncPageReader = context.getOptions().getOption(ExecConstants.PARQUET_PAGEREADER_ASYNC).bool_val;
        if (useAsyncPageReader) {
            fs = oContext.newNonTrackingFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        } else {
            fs = oContext.newFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        }
    } catch (IOException e) {
        throw new ExecutionSetupException(String.format("Failed to create DrillFileSystem: %s", e.getMessage()), e);
    }
    Configuration conf = new Configuration(fs.getConf());
    conf.setBoolean(ENABLE_BYTES_READ_COUNTER, false);
    conf.setBoolean(ENABLE_BYTES_TOTAL_COUNTER, false);
    conf.setBoolean(ENABLE_TIME_READ_COUNTER, false);
    // keep footers in a map to avoid re-reading them
    Map<String, ParquetMetadata> footers = Maps.newHashMap();
    List<RecordReader> readers = Lists.newArrayList();
    List<Map<String, String>> implicitColumns = Lists.newArrayList();
    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    for (RowGroupReadEntry e : rowGroupScan.getRowGroupReadEntries()) {
        /*
      Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file
      TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine)
      we should add more information to the RowGroupInfo that will be populated upon the first read to
      provide the reader with all of th file meta-data it needs
      These fields will be added to the constructor below
      */
        try {
            Stopwatch timer = Stopwatch.createUnstarted();
            if (!footers.containsKey(e.getPath())) {
                timer.start();
                ParquetMetadata footer = ParquetFileReader.readFooter(conf, new Path(e.getPath()));
                long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
                logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", e.getPath(), "", 0, 0, 0, timeToRead);
                footers.put(e.getPath(), footer);
            }
            boolean autoCorrectCorruptDates = rowGroupScan.formatConfig.autoCorrectCorruptDates;
            ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(footers.get(e.getPath()), rowGroupScan.getColumns(), autoCorrectCorruptDates);
            if (logger.isDebugEnabled()) {
                logger.debug(containsCorruptDates.toString());
            }
            if (!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val && !isComplex(footers.get(e.getPath()))) {
                readers.add(new ParquetRecordReader(context, e.getPath(), e.getRowGroupIndex(), e.getNumRecordsToRead(), fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), footers.get(e.getPath()), rowGroupScan.getColumns(), containsCorruptDates));
            } else {
                ParquetMetadata footer = footers.get(e.getPath());
                readers.add(new DrillParquetReader(context, footer, e, columnExplorer.getTableColumns(), fs, containsCorruptDates));
            }
            Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(e, rowGroupScan.getSelectionRoot());
            implicitColumns.add(implicitValues);
            if (implicitValues.size() > mapWithMaxColumns.size()) {
                mapWithMaxColumns = implicitValues;
            }
        } catch (IOException e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    // all readers should have the same number of implicit columns, add missing ones with value null
    Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
    }
    return new ScanBatch(rowGroupScan, context, oContext, readers.iterator(), implicitColumns);
}
Also used : ImplicitColumnExplorer(org.apache.drill.exec.store.ImplicitColumnExplorer) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) RecordReader(org.apache.drill.exec.store.RecordReader) Stopwatch(com.google.common.base.Stopwatch) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) OperatorContext(org.apache.drill.exec.ops.OperatorContext) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Path(org.apache.hadoop.fs.Path) DrillParquetReader(org.apache.drill.exec.store.parquet2.DrillParquetReader) IOException(java.io.IOException) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) Map(java.util.Map)

Example 59 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.

the class PojoRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    operatorContext = context;
    try {
        Field[] fields = pojoClass.getDeclaredFields();
        List<PojoWriter> writers = Lists.newArrayList();
        for (int i = 0; i < fields.length; i++) {
            Field f = fields[i];
            if (Modifier.isStatic(f.getModifiers())) {
                continue;
            }
            Class<?> type = f.getType();
            PojoWriter w = null;
            if (type == int.class) {
                w = new IntWriter(f);
            } else if (type == Integer.class) {
                w = new NIntWriter(f);
            } else if (type == Long.class) {
                w = new NBigIntWriter(f);
            } else if (type == Boolean.class) {
                w = new NBooleanWriter(f);
            } else if (type == double.class) {
                w = new DoubleWriter(f);
            } else if (type == Double.class) {
                w = new NDoubleWriter(f);
            } else if (type.isEnum()) {
                w = new EnumWriter(f, output.getManagedBuffer());
            } else if (type == boolean.class) {
                w = new BitWriter(f);
            } else if (type == long.class) {
                w = new LongWriter(f);
            } else if (type == String.class) {
                w = new StringWriter(f, output.getManagedBuffer());
            } else if (type == Timestamp.class) {
                w = new NTimeStampWriter(f);
            } else {
                throw new ExecutionSetupException(String.format("PojoRecord reader doesn't yet support conversions from type [%s].", type));
            }
            writers.add(w);
            w.init(output);
        }
        this.writers = writers.toArray(new PojoWriter[writers.size()]);
    } catch (SchemaChangeException e) {
        throw new ExecutionSetupException("Failure while setting up schema for PojoRecordReader.", e);
    }
    currentIterator = pojoObjects.iterator();
}
Also used : BitWriter(org.apache.drill.exec.store.pojo.Writers.BitWriter) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) EnumWriter(org.apache.drill.exec.store.pojo.Writers.EnumWriter) LongWriter(org.apache.drill.exec.store.pojo.Writers.LongWriter) IntWriter(org.apache.drill.exec.store.pojo.Writers.IntWriter) NIntWriter(org.apache.drill.exec.store.pojo.Writers.NIntWriter) NBigIntWriter(org.apache.drill.exec.store.pojo.Writers.NBigIntWriter) NBigIntWriter(org.apache.drill.exec.store.pojo.Writers.NBigIntWriter) NTimeStampWriter(org.apache.drill.exec.store.pojo.Writers.NTimeStampWriter) Field(java.lang.reflect.Field) NIntWriter(org.apache.drill.exec.store.pojo.Writers.NIntWriter) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StringWriter(org.apache.drill.exec.store.pojo.Writers.StringWriter) NDoubleWriter(org.apache.drill.exec.store.pojo.Writers.NDoubleWriter) NDoubleWriter(org.apache.drill.exec.store.pojo.Writers.NDoubleWriter) DoubleWriter(org.apache.drill.exec.store.pojo.Writers.DoubleWriter) NBooleanWriter(org.apache.drill.exec.store.pojo.Writers.NBooleanWriter)

Example 60 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by apache.

the class HiveScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, HiveSubScan config, List<RecordBatch> children) throws ExecutionSetupException {
    List<RecordReader> readers = Lists.newArrayList();
    HiveTableWithColumnCache table = config.getTable();
    List<InputSplit> splits = config.getInputSplits();
    List<HivePartition> partitions = config.getPartitions();
    boolean hasPartitions = (partitions != null && partitions.size() > 0);
    int i = 0;
    final UserGroupInformation proxyUgi = ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName());
    final HiveConf hiveConf = config.getHiveConf();
    final String formatName = table.getSd().getInputFormat();
    Class<? extends HiveAbstractReader> readerClass = HiveDefaultReader.class;
    if (readerMap.containsKey(formatName)) {
        readerClass = readerMap.get(formatName);
    }
    Constructor<? extends HiveAbstractReader> readerConstructor = null;
    try {
        readerConstructor = readerClass.getConstructor(HiveTableWithColumnCache.class, HivePartition.class, InputSplit.class, List.class, FragmentContext.class, HiveConf.class, UserGroupInformation.class);
        for (InputSplit split : splits) {
            readers.add(readerConstructor.newInstance(table, (hasPartitions ? partitions.get(i++) : null), split, config.getColumns(), context, hiveConf, proxyUgi));
        }
        if (readers.size() == 0) {
            readers.add(readerConstructor.newInstance(table, null, null, config.getColumns(), context, hiveConf, proxyUgi));
        }
    } catch (Exception e) {
        logger.error("No constructor for {}, thrown {}", readerClass.getName(), e);
    }
    return new ScanBatch(config, context, readers.iterator());
}
Also used : FragmentContext(org.apache.drill.exec.ops.FragmentContext) RecordReader(org.apache.drill.exec.store.RecordReader) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) InputSplit(org.apache.hadoop.mapred.InputSplit) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)94 IOException (java.io.IOException)43 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)26 SchemaPath (org.apache.drill.common.expression.SchemaPath)25 RecordReader (org.apache.drill.exec.store.RecordReader)24 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)22 LinkedList (java.util.LinkedList)16 Map (java.util.Map)14 MaterializedField (org.apache.drill.exec.record.MaterializedField)13 ExecutionException (java.util.concurrent.ExecutionException)10 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)10 OperatorContext (org.apache.drill.exec.ops.OperatorContext)8 UserException (org.apache.drill.common.exceptions.UserException)7 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)7 JobConf (org.apache.hadoop.mapred.JobConf)7 HashMap (java.util.HashMap)6 List (java.util.List)6 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)6 VectorContainerWriter (org.apache.drill.exec.vector.complex.impl.VectorContainerWriter)6 Path (org.apache.hadoop.fs.Path)6