use of org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties in project hive by apache.
the class OrcInputFormat method getReader.
@Override
public RowReader<OrcStruct> getReader(InputSplit inputSplit, Options options) throws IOException {
final OrcSplit split = (OrcSplit) inputSplit;
// Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration());
if (!acidOperationalProperties.isSplitUpdate()) {
throw new IllegalStateException("Expected SpliUpdate table: " + split.getPath());
}
final Path[] deltas = VectorizedOrcAcidRowBatchReader.getDeleteDeltaDirsFromSplit(split);
final Configuration conf = options.getConfiguration();
final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
OrcRawRecordMerger.Options mergerOptions = new OrcRawRecordMerger.Options().isCompacting(false);
mergerOptions.rootPath(split.getRootDir());
mergerOptions.bucketPath(split.getPath());
final int bucket;
if (split.hasBase()) {
AcidOutputFormat.Options acidIOOptions = AcidUtils.parseBaseOrDeltaBucketFilename(split.getPath(), conf);
if (acidIOOptions.getBucketId() < 0) {
LOG.warn("Can't determine bucket ID for " + split.getPath() + "; ignoring");
}
bucket = acidIOOptions.getBucketId();
if (split.isOriginal()) {
mergerOptions.copyIndex(acidIOOptions.getCopyNumber()).bucketPath(split.getPath());
}
} else {
bucket = (int) split.getStart();
assert false : "We should never have a split w/o base in acid 2.0 for full acid: " + split.getPath();
}
// todo: createOptionsForReader() assumes it's !isOriginal.... why?
final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
readOptions.range(split.getStart(), split.getLength());
String txnString = conf.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
ValidWriteIdList validWriteIdList = (txnString == null) ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(txnString);
LOG.debug("getReader:: Read ValidWriteIdList: " + validWriteIdList.toString() + " isTransactionalTable: " + HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN));
final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validWriteIdList, readOptions, deltas, mergerOptions);
return new RowReader<OrcStruct>() {
OrcStruct innerRecord = records.createValue();
@Override
public ObjectInspector getObjectInspector() {
return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
}
@Override
public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException {
boolean result;
// filter out the deleted records
do {
result = records.next(recordIdentifier, innerRecord);
} while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION);
if (result) {
// swap the fields with the passed in orcStruct
orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
}
return result;
}
@Override
public RecordIdentifier createKey() {
return records.createKey();
}
@Override
public OrcStruct createValue() {
return new OrcStruct(records.getColumns());
}
@Override
public long getPos() throws IOException {
return records.getPos();
}
@Override
public void close() throws IOException {
records.close();
}
@Override
public float getProgress() throws IOException {
return records.getProgress();
}
};
}
Aggregations