Search in sources :

Example 1 with AcidOperationalProperties

use of org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties in project hive by apache.

the class OrcInputFormat method getReader.

@Override
public RowReader<OrcStruct> getReader(InputSplit inputSplit, Options options) throws IOException {
    final OrcSplit split = (OrcSplit) inputSplit;
    // Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
    AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration());
    if (!acidOperationalProperties.isSplitUpdate()) {
        throw new IllegalStateException("Expected SpliUpdate table: " + split.getPath());
    }
    final Path[] deltas = VectorizedOrcAcidRowBatchReader.getDeleteDeltaDirsFromSplit(split);
    final Configuration conf = options.getConfiguration();
    final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
    OrcRawRecordMerger.Options mergerOptions = new OrcRawRecordMerger.Options().isCompacting(false);
    mergerOptions.rootPath(split.getRootDir());
    mergerOptions.bucketPath(split.getPath());
    final int bucket;
    if (split.hasBase()) {
        AcidOutputFormat.Options acidIOOptions = AcidUtils.parseBaseOrDeltaBucketFilename(split.getPath(), conf);
        if (acidIOOptions.getBucketId() < 0) {
            LOG.warn("Can't determine bucket ID for " + split.getPath() + "; ignoring");
        }
        bucket = acidIOOptions.getBucketId();
        if (split.isOriginal()) {
            mergerOptions.copyIndex(acidIOOptions.getCopyNumber()).bucketPath(split.getPath());
        }
    } else {
        bucket = (int) split.getStart();
        assert false : "We should never have a split w/o base in acid 2.0 for full acid: " + split.getPath();
    }
    // todo: createOptionsForReader() assumes it's !isOriginal.... why?
    final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
    readOptions.range(split.getStart(), split.getLength());
    String txnString = conf.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
    ValidWriteIdList validWriteIdList = (txnString == null) ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(txnString);
    LOG.debug("getReader:: Read ValidWriteIdList: " + validWriteIdList.toString() + " isTransactionalTable: " + HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN));
    final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validWriteIdList, readOptions, deltas, mergerOptions);
    return new RowReader<OrcStruct>() {

        OrcStruct innerRecord = records.createValue();

        @Override
        public ObjectInspector getObjectInspector() {
            return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
        }

        @Override
        public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException {
            boolean result;
            // filter out the deleted records
            do {
                result = records.next(recordIdentifier, innerRecord);
            } while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION);
            if (result) {
                // swap the fields with the passed in orcStruct
                orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
            }
            return result;
        }

        @Override
        public RecordIdentifier createKey() {
            return records.createKey();
        }

        @Override
        public OrcStruct createValue() {
            return new OrcStruct(records.getColumns());
        }

        @Override
        public long getPos() throws IOException {
            return records.getPos();
        }

        @Override
        public void close() throws IOException {
            records.close();
        }

        @Override
        public float getProgress() throws IOException {
            return records.getProgress();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) BatchToRowReader(org.apache.hadoop.hive.ql.io.BatchToRowReader) Configuration(org.apache.hadoop.conf.Configuration) StatsProvidingRecordReader(org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader) BatchToRowReader(org.apache.hadoop.hive.ql.io.BatchToRowReader) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) AcidOperationalProperties(org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Aggregations

Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)1 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)1 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)1 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)1 AcidOperationalProperties (org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties)1 BatchToRowReader (org.apache.hadoop.hive.ql.io.BatchToRowReader)1 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)1 StatsProvidingRecordReader (org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader)1