use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.
the class OrcInputFormat method getReader.
@Override
public RowReader<OrcStruct> getReader(InputSplit inputSplit, Options options) throws IOException {
final OrcSplit split = (OrcSplit) inputSplit;
final Path path = split.getPath();
Path root;
if (split.hasBase()) {
if (split.isOriginal()) {
root = path.getParent();
} else {
root = path.getParent().getParent();
}
} else {
root = path;
}
// Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration());
// The deltas are decided based on whether split-update has been turned on for the table or not.
// When split-update is turned off, everything in the delta_x_y/ directory should be treated
// as delta. However if split-update is turned on, only the files in delete_delta_x_y/ directory
// need to be considered as delta, because files in delta_x_y/ will be processed as base files
// since they only have insert events in them.
final Path[] deltas = acidOperationalProperties.isSplitUpdate() ? AcidUtils.deserializeDeleteDeltas(root, split.getDeltas()) : AcidUtils.deserializeDeltas(root, split.getDeltas());
final Configuration conf = options.getConfiguration();
final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
final int bucket = OrcInputFormat.getBucketForSplit(conf, split);
final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
readOptions.range(split.getStart(), split.getLength());
String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
ValidTxnList validTxnList = txnString == null ? new ValidReadTxnList() : new ValidReadTxnList(txnString);
final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validTxnList, readOptions, deltas);
return new RowReader<OrcStruct>() {
OrcStruct innerRecord = records.createValue();
@Override
public ObjectInspector getObjectInspector() {
return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
}
@Override
public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException {
boolean result;
// filter out the deleted records
do {
result = records.next(recordIdentifier, innerRecord);
} while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION);
if (result) {
// swap the fields with the passed in orcStruct
orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
}
return result;
}
@Override
public RecordIdentifier createKey() {
return records.createKey();
}
@Override
public OrcStruct createValue() {
return new OrcStruct(records.getColumns());
}
@Override
public long getPos() throws IOException {
return records.getPos();
}
@Override
public void close() throws IOException {
records.close();
}
@Override
public float getProgress() throws IOException {
return records.getProgress();
}
};
}
use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.
the class TestCompactor method checkExpectedTxnsPresent.
private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max) throws IOException {
ValidTxnList txnList = new ValidTxnList() {
@Override
public boolean isTxnValid(long txnid) {
return true;
}
@Override
public RangeResponse isTxnRangeValid(long minTxnId, long maxTxnId) {
return RangeResponse.ALL;
}
@Override
public String writeToString() {
return "";
}
@Override
public void readFromString(String src) {
}
@Override
public long getHighWatermark() {
return Long.MAX_VALUE;
}
@Override
public long[] getInvalidTransactions() {
return new long[0];
}
@Override
public boolean isValidBase(long txnid) {
return true;
}
};
OrcInputFormat aif = new OrcInputFormat();
Configuration conf = new Configuration();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, false, bucket, txnList, base, deltas);
RecordIdentifier identifier = reader.createKey();
OrcStruct value = reader.createValue();
long currentTxn = min;
boolean seenCurrentTxn = false;
while (reader.next(identifier, value)) {
if (!seenCurrentTxn) {
Assert.assertEquals(currentTxn, identifier.getTransactionId());
seenCurrentTxn = true;
}
if (currentTxn != identifier.getTransactionId()) {
Assert.assertEquals(currentTxn + 1, identifier.getTransactionId());
currentTxn++;
}
}
Assert.assertEquals(max, currentTxn);
}
use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.
the class TestValidCompactorTxnList method maxTxnLow.
@Test
public void maxTxnLow() {
ValidTxnList txns = new ValidCompactorTxnList(new long[] { 13, 14 }, 12);
ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp);
}
use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.
the class TestValidCompactorTxnList method minTxnHigh.
@Test
public void minTxnHigh() {
ValidTxnList txns = new ValidCompactorTxnList(new long[] { 3, 4 }, 2);
ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp);
}
use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.
the class TestValidCompactorTxnList method writeToString.
@Test
public void writeToString() {
ValidTxnList txns = new ValidCompactorTxnList(new long[] { 9, 7, 10, Long.MAX_VALUE }, 8);
Assert.assertEquals("8:" + Long.MAX_VALUE + ":7", txns.writeToString());
txns = new ValidCompactorTxnList();
Assert.assertEquals(Long.toString(Long.MAX_VALUE) + ":" + Long.MAX_VALUE + ":", txns.writeToString());
txns = new ValidCompactorTxnList(new long[0], 23);
Assert.assertEquals("23:" + Long.MAX_VALUE + ":", txns.writeToString());
}
Aggregations