Search in sources :

Example 26 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by axbaretto.

the class SequenceFileRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    final SequenceFileAsBinaryInputFormat inputFormat = new SequenceFileAsBinaryInputFormat();
    final JobConf jobConf = new JobConf(dfs.getConf());
    jobConf.setInputFormat(inputFormat.getClass());
    reader = getRecordReader(inputFormat, jobConf);
    final MaterializedField keyField = MaterializedField.create(keySchema, KEY_TYPE);
    final MaterializedField valueField = MaterializedField.create(valueSchema, VALUE_TYPE);
    try {
        keyVector = output.addField(keyField, NullableVarBinaryVector.class);
        valueVector = output.addField(valueField, NullableVarBinaryVector.class);
    } catch (SchemaChangeException sce) {
        throw new ExecutionSetupException("Error in setting up sequencefile reader.", sce);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) SequenceFileAsBinaryInputFormat(org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat) MaterializedField(org.apache.drill.exec.record.MaterializedField) JobConf(org.apache.hadoop.mapred.JobConf)

Example 27 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by axbaretto.

the class CompliantTextRecordReader method setup.

/**
 * Performs the initial setup required for the record reader.
 * Initializes the input stream, handling of the output record batch
 * and the actual reader to be used.
 * @param context  operator context from which buffer's will be allocated and managed
 * @param outputMutator  Used to create the schema in the output record batch
 * @throws ExecutionSetupException
 */
@SuppressWarnings("resource")
@Override
public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException {
    oContext = context;
    // Note: DO NOT use managed buffers here. They remain in existence
    // until the fragment is shut down. The buffers here are large.
    // If we scan 1000 files, and allocate 1 MB for each, we end up
    // holding onto 1 GB of memory in managed buffers.
    // Instead, we allocate the buffers explicitly, and must free
    // them.
    // readBuffer = context.getManagedBuffer(READ_BUFFER);
    // whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);
    readBuffer = context.getAllocator().buffer(READ_BUFFER);
    whitespaceBuffer = context.getAllocator().buffer(WHITE_SPACE_BUFFER);
    // setup Output, Input, and Reader
    try {
        TextOutput output = null;
        TextInput input = null;
        InputStream stream = null;
        // setup Output using OutputMutator
        if (settings.isHeaderExtractionEnabled()) {
            // extract header and use that to setup a set of VarCharVectors
            String[] fieldNames = extractHeader();
            output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery());
        } else {
            // simply use RepeatedVarCharVector
            output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery());
        }
        // setup Input using InputStream
        logger.trace("Opening file {}", split.getPath());
        stream = dfs.openPossiblyCompressedStream(split.getPath());
        input = new TextInput(settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());
        // setup Reader using Input and Output
        reader = new TextReader(settings, input, output, whitespaceBuffer);
        reader.start();
    } catch (SchemaChangeException | IOException e) {
        throw new ExecutionSetupException(String.format("Failure while setting up text reader for file %s", split.getPath()), e);
    } catch (IllegalArgumentException e) {
        throw UserException.dataReadError(e).addContext("File Path", split.getPath().toString()).build(logger);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) InputStream(java.io.InputStream) IOException(java.io.IOException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException)

Example 28 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by axbaretto.

the class JSONRecordReader method setup.

@Override
public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException {
    try {
        if (hadoopPath != null) {
            this.stream = fileSystem.openPossiblyCompressedStream(hadoopPath);
        }
        this.writer = new VectorContainerWriter(output, unionEnabled);
        if (isSkipQuery()) {
            this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf);
        } else {
            this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer()).schemaPathColumns(ImmutableList.copyOf(getColumns())).allTextMode(enableAllTextMode).skipOuterList(true).readNumbersAsDouble(readNumbersAsDouble).enableNanInf(enableNanInf).build();
        }
        setupParser();
    } catch (final Exception e) {
        handleAndRaise("Failure reading JSON file", e);
    }
}
Also used : VectorContainerWriter(org.apache.drill.exec.vector.complex.impl.VectorContainerWriter) CountingJsonReader(org.apache.drill.exec.store.easy.json.reader.CountingJsonReader) JsonReader(org.apache.drill.exec.vector.complex.fn.JsonReader) CountingJsonReader(org.apache.drill.exec.store.easy.json.reader.CountingJsonReader) UserException(org.apache.drill.common.exceptions.UserException) IOException(java.io.IOException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) JsonParseException(com.fasterxml.jackson.core.JsonParseException)

Example 29 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by axbaretto.

the class ParquetScanBatchCreator method getBatch.

@SuppressWarnings("resource")
@Override
public ScanBatch getBatch(ExecutorFragmentContext context, ParquetRowGroupScan rowGroupScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    OperatorContext oContext = context.newOperatorContext(rowGroupScan);
    final ColumnExplorer columnExplorer = new ColumnExplorer(context.getOptions(), rowGroupScan.getColumns());
    if (!columnExplorer.isStarQuery()) {
        rowGroupScan = new ParquetRowGroupScan(rowGroupScan.getUserName(), rowGroupScan.getStorageEngine(), rowGroupScan.getRowGroupReadEntries(), columnExplorer.getTableColumns(), rowGroupScan.getSelectionRoot(), rowGroupScan.getFilter());
        rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
    }
    DrillFileSystem fs;
    try {
        boolean useAsyncPageReader = context.getOptions().getOption(ExecConstants.PARQUET_PAGEREADER_ASYNC).bool_val;
        if (useAsyncPageReader) {
            fs = oContext.newNonTrackingFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        } else {
            fs = oContext.newFileSystem(rowGroupScan.getStorageEngine().getFsConf());
        }
    } catch (IOException e) {
        throw new ExecutionSetupException(String.format("Failed to create DrillFileSystem: %s", e.getMessage()), e);
    }
    Configuration conf = new Configuration(fs.getConf());
    conf.setBoolean(ENABLE_BYTES_READ_COUNTER, false);
    conf.setBoolean(ENABLE_BYTES_TOTAL_COUNTER, false);
    conf.setBoolean(ENABLE_TIME_READ_COUNTER, false);
    // keep footers in a map to avoid re-reading them
    Map<String, ParquetMetadata> footers = Maps.newHashMap();
    List<RecordReader> readers = new LinkedList<>();
    List<Map<String, String>> implicitColumns = Lists.newArrayList();
    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    for (RowGroupReadEntry e : rowGroupScan.getRowGroupReadEntries()) {
        /*
      Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file
      TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine)
      we should add more information to the RowGroupInfo that will be populated upon the first read to
      provide the reader with all of th file meta-data it needs
      These fields will be added to the constructor below
      */
        try {
            Stopwatch timer = Stopwatch.createUnstarted();
            if (!footers.containsKey(e.getPath())) {
                timer.start();
                ParquetMetadata footer = ParquetFileReader.readFooter(conf, new Path(e.getPath()));
                long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
                logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", e.getPath(), "", 0, 0, 0, timeToRead);
                footers.put(e.getPath(), footer);
            }
            boolean autoCorrectCorruptDates = rowGroupScan.getFormatConfig().areCorruptDatesAutoCorrected();
            ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(footers.get(e.getPath()), rowGroupScan.getColumns(), autoCorrectCorruptDates);
            if (logger.isDebugEnabled()) {
                logger.debug(containsCorruptDates.toString());
            }
            if (!context.getOptions().getBoolean(ExecConstants.PARQUET_NEW_RECORD_READER) && !isComplex(footers.get(e.getPath()))) {
                readers.add(new ParquetRecordReader(context, e.getPath(), e.getRowGroupIndex(), e.getNumRecordsToRead(), fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), footers.get(e.getPath()), rowGroupScan.getColumns(), containsCorruptDates));
            } else {
                ParquetMetadata footer = footers.get(e.getPath());
                readers.add(new DrillParquetReader(context, footer, e, columnExplorer.getTableColumns(), fs, containsCorruptDates));
            }
            Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(e, rowGroupScan.getSelectionRoot());
            implicitColumns.add(implicitValues);
            if (implicitValues.size() > mapWithMaxColumns.size()) {
                mapWithMaxColumns = implicitValues;
            }
        } catch (IOException e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    // all readers should have the same number of implicit columns, add missing ones with value null
    Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
    }
    return new ScanBatch(context, oContext, readers, implicitColumns);
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) RecordReader(org.apache.drill.exec.store.RecordReader) Stopwatch(com.google.common.base.Stopwatch) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) OperatorContext(org.apache.drill.exec.ops.OperatorContext) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) Path(org.apache.hadoop.fs.Path) DrillParquetReader(org.apache.drill.exec.store.parquet2.DrillParquetReader) IOException(java.io.IOException) LinkedList(java.util.LinkedList) ParquetRecordReader(org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) Map(java.util.Map)

Example 30 with ExecutionSetupException

use of org.apache.drill.common.exceptions.ExecutionSetupException in project drill by axbaretto.

the class ExtendedMockRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    try {
        final int estimateRowSize = getEstimatedRecordSize();
        valueVectors = new ValueVector[fields.length];
        int batchSize = config.getBatchSize();
        if (batchSize == 0) {
            batchSize = 10 * 1024 * 1024;
        }
        batchRecordCount = Math.max(1, batchSize / estimateRowSize);
        batchRecordCount = Math.min(batchRecordCount, Character.MAX_VALUE);
        for (int i = 0; i < fields.length; i++) {
            final ColumnDef col = fields[i];
            final MajorType type = col.getConfig().getMajorType();
            final MaterializedField field = MaterializedField.create(col.getName(), type);
            final Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(field.getType().getMinorType(), field.getDataMode());
            valueVectors[i] = output.addField(field, vvClass);
        }
    } catch (SchemaChangeException e) {
        throw new ExecutionSetupException("Failure while setting up fields", e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField)

Aggregations

ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)94 IOException (java.io.IOException)43 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)26 SchemaPath (org.apache.drill.common.expression.SchemaPath)25 RecordReader (org.apache.drill.exec.store.RecordReader)24 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)22 LinkedList (java.util.LinkedList)16 Map (java.util.Map)14 MaterializedField (org.apache.drill.exec.record.MaterializedField)13 ExecutionException (java.util.concurrent.ExecutionException)10 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)10 OperatorContext (org.apache.drill.exec.ops.OperatorContext)8 UserException (org.apache.drill.common.exceptions.UserException)7 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)7 JobConf (org.apache.hadoop.mapred.JobConf)7 HashMap (java.util.HashMap)6 List (java.util.List)6 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)6 VectorContainerWriter (org.apache.drill.exec.vector.complex.impl.VectorContainerWriter)6 Path (org.apache.hadoop.fs.Path)6