Search in sources :

Example 1 with NullRowsRecordReader

use of org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader in project hive by apache.

the class HiveInputFormat method getRecordReader.

@Override
public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    HiveInputSplit hsplit = (HiveInputSplit) split;
    String inputFormatClassName = null;
    Class inputFormatClass = null;
    try {
        inputFormatClassName = hsplit.inputFormatClassName();
        inputFormatClass = job.getClassByName(inputFormatClassName);
    } catch (Exception e) {
        throw new IOException("cannot find class " + inputFormatClassName, e);
    }
    if (this.mrwork == null || pathToPartitionInfo == null) {
        init(job);
    }
    boolean nonNative = false;
    PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, hsplit.getPath(), null);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Found spec for " + hsplit.getPath() + " " + part + " from " + pathToPartitionInfo);
    }
    try {
        if ((part != null) && (part.getTableDesc() != null)) {
            Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), job);
            nonNative = part.getTableDesc().isNonNative();
        }
    } catch (HiveException e) {
        throw new IOException(e);
    }
    Path splitPath = hsplit.getPath();
    pushProjectionsAndFiltersAndAsOf(job, splitPath);
    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
    if (HiveConf.getBoolVar(job, ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon())) {
        try {
            inputFormat = HiveInputFormat.wrapForLlap(inputFormat, job, part);
        } catch (HiveException e) {
            throw new IOException(e);
        }
    }
    RecordReader innerReader = null;
    try {
        // Handle the special header/footer skipping cases here.
        innerReader = RecordReaderWrapper.create(inputFormat, hsplit, part.getTableDesc(), job, reporter);
    } catch (Exception e) {
        Throwable rootCause = JavaUtils.findRootCause(e);
        if (checkLimitReached(job) && (rootCause instanceof InterruptedException || rootCause instanceof ClosedByInterruptException)) {
            LOG.info("Ignoring exception while getting record reader as limit is reached", rootCause);
            innerReader = new NullRowsRecordReader(job, split);
        } else {
            innerReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, job);
        }
    }
    HiveRecordReader<K, V> rr = new HiveRecordReader(innerReader, job);
    rr.initIOContext(hsplit, job, inputFormatClass, innerReader);
    return rr;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) NullRowsRecordReader(org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader) RecordReader(org.apache.hadoop.mapred.RecordReader) IOException(java.io.IOException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) NullRowsRecordReader(org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Example 2 with NullRowsRecordReader

use of org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader in project hive by apache.

the class LlapInputFormat method getRecordReader.

@Override
public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    // Check LLAP-aware split (e.g. OrcSplit) to make sure it's compatible.
    RecordReader<NullWritable, VectorizedRowBatch> noLlap = checkLlapSplit(split, job, reporter);
    if (noLlap != null)
        return noLlap;
    FileSplit fileSplit = (FileSplit) split;
    reporter.setStatus(fileSplit.toString());
    try {
        // At this entry point, we are going to assume that these are logical table columns.
        // Perhaps we should go thru the code and clean this up to be more explicit; for now, we
        // will start with this single assumption and maintain clear semantics from here.
        List<Integer> tableIncludedCols = ColumnProjectionUtils.isReadAllColumns(job) ? null : ColumnProjectionUtils.getReadColumnIDs(job);
        LlapRecordReader rr = LlapRecordReader.create(job, fileSplit, tableIncludedCols, hostName, cvp, executor, sourceInputFormat, sourceSerDe, reporter, daemonConf);
        if (rr == null) {
            // Reader-specific incompatibility like SMB or schema evolution.
            return sourceInputFormat.getRecordReader(split, job, reporter);
        }
        // For non-vectorized operator case, wrap the reader if possible.
        RecordReader<NullWritable, VectorizedRowBatch> result = rr;
        if (!Utilities.getIsVectorized(job)) {
            result = null;
            if (HiveConf.getBoolVar(job, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED)) {
                result = wrapLlapReader(tableIncludedCols, rr, split);
            }
            if (result == null) {
                // Cannot wrap a reader for non-vectorized pipeline.
                return sourceInputFormat.getRecordReader(split, job, reporter);
            }
        }
        // This starts the reader in the background.
        rr.start();
        return result;
    } catch (Exception ex) {
        Throwable rootCause = JavaUtils.findRootCause(ex);
        if (checkLimitReached(job) && (rootCause instanceof InterruptedException || rootCause instanceof ClosedByInterruptException)) {
            LlapIoImpl.LOG.info("Ignoring exception while getting record reader as limit is reached", rootCause);
            return new NullRowsRecordReader(job, split);
        } else {
            throw new IOException(ex);
        }
    }
}
Also used : IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapred.FileSplit) NullWritable(org.apache.hadoop.io.NullWritable) IOException(java.io.IOException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ClosedByInterruptException(java.nio.channels.ClosedByInterruptException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) NullRowsRecordReader(org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader)

Aggregations

IOException (java.io.IOException)2 ClosedByInterruptException (java.nio.channels.ClosedByInterruptException)2 NullRowsRecordReader (org.apache.hadoop.hive.ql.io.NullRowsInputFormat.NullRowsRecordReader)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExecutionException (java.util.concurrent.ExecutionException)1 Path (org.apache.hadoop.fs.Path)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)1 NullWritable (org.apache.hadoop.io.NullWritable)1 FileInputFormat (org.apache.hadoop.mapred.FileInputFormat)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1 RecordReader (org.apache.hadoop.mapred.RecordReader)1 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)1