use of org.apache.hadoop.hive.common.ValidReaderWriteIdList in project hive by apache.
the class TestOrcRawRecordMerger method testOriginalReaderPair.
@Test
public void testOriginalReaderPair() throws Exception {
int BUCKET = 10;
ReaderKey key = new ReaderKey();
Configuration conf = new Configuration();
int bucketProperty = OrcRawRecordMerger.encodeBucketId(conf, BUCKET, 0);
Reader reader = createMockOriginalReader();
RecordIdentifier minKey = new RecordIdentifier(0, bucketProperty, 1);
RecordIdentifier maxKey = new RecordIdentifier(0, bucketProperty, 3);
boolean[] includes = new boolean[] { true, true };
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testOriginalReaderPair");
fs.makeQualified(root);
fs.create(root);
ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, minKey, maxKey, new Reader.Options().include(includes), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0);
RecordReader recordReader = pair.getRecordReader();
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(2, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
assertEquals("third", value(pair.nextRecord()));
pair.next(pair.nextRecord());
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(3, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
assertEquals("fourth", value(pair.nextRecord()));
pair.next(pair.nextRecord());
assertEquals(null, pair.nextRecord());
Mockito.verify(recordReader).close();
}
use of org.apache.hadoop.hive.common.ValidReaderWriteIdList in project hive by apache.
the class TestOrcRawRecordMerger method testOriginalReaderPairNoMin.
@Test
public void testOriginalReaderPairNoMin() throws Exception {
int BUCKET = 10;
ReaderKey key = new ReaderKey();
Reader reader = createMockOriginalReader();
Configuration conf = new Configuration();
int bucketProperty = OrcRawRecordMerger.encodeBucketId(conf, BUCKET, 0);
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testOriginalReaderPairNoMin");
fs.makeQualified(root);
fs.create(root);
ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, null, null, new Reader.Options(), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0);
assertEquals("first", value(pair.nextRecord()));
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(0, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
pair.next(pair.nextRecord());
assertEquals("second", value(pair.nextRecord()));
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(1, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
pair.next(pair.nextRecord());
assertEquals("third", value(pair.nextRecord()));
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(2, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
pair.next(pair.nextRecord());
assertEquals("fourth", value(pair.nextRecord()));
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(3, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
pair.next(pair.nextRecord());
assertEquals("fifth", value(pair.nextRecord()));
assertEquals(0, key.getWriteId());
assertEquals(bucketProperty, key.getBucketProperty());
assertEquals(4, key.getRowId());
assertEquals(0, key.getCurrentWriteId());
pair.next(pair.nextRecord());
assertEquals(null, pair.nextRecord());
Mockito.verify(pair.getRecordReader()).close();
}
use of org.apache.hadoop.hive.common.ValidReaderWriteIdList in project hive by apache.
the class OrcInputFormat method getReader.
@Override
public RowReader<OrcStruct> getReader(InputSplit inputSplit, Options options) throws IOException {
final OrcSplit split = (OrcSplit) inputSplit;
// Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration());
if (!acidOperationalProperties.isSplitUpdate()) {
throw new IllegalStateException("Expected SpliUpdate table: " + split.getPath());
}
final Path[] deltas = VectorizedOrcAcidRowBatchReader.getDeleteDeltaDirsFromSplit(split);
final Configuration conf = options.getConfiguration();
final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
OrcRawRecordMerger.Options mergerOptions = new OrcRawRecordMerger.Options().isCompacting(false);
mergerOptions.rootPath(split.getRootDir());
mergerOptions.bucketPath(split.getPath());
final int bucket;
if (split.hasBase()) {
AcidOutputFormat.Options acidIOOptions = AcidUtils.parseBaseOrDeltaBucketFilename(split.getPath(), conf);
if (acidIOOptions.getBucketId() < 0) {
LOG.warn("Can't determine bucket ID for " + split.getPath() + "; ignoring");
}
bucket = acidIOOptions.getBucketId();
if (split.isOriginal()) {
mergerOptions.copyIndex(acidIOOptions.getCopyNumber()).bucketPath(split.getPath());
}
} else {
bucket = (int) split.getStart();
assert false : "We should never have a split w/o base in acid 2.0 for full acid: " + split.getPath();
}
// todo: createOptionsForReader() assumes it's !isOriginal.... why?
final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
readOptions.range(split.getStart(), split.getLength());
String txnString = conf.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
ValidWriteIdList validWriteIdList = (txnString == null) ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(txnString);
LOG.debug("getReader:: Read ValidWriteIdList: " + validWriteIdList.toString() + " isTransactionalTable: " + HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN));
final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validWriteIdList, readOptions, deltas, mergerOptions);
return new RowReader<OrcStruct>() {
OrcStruct innerRecord = records.createValue();
@Override
public ObjectInspector getObjectInspector() {
return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
}
@Override
public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException {
boolean result;
// filter out the deleted records
do {
result = records.next(recordIdentifier, innerRecord);
} while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION);
if (result) {
// swap the fields with the passed in orcStruct
orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
}
return result;
}
@Override
public RecordIdentifier createKey() {
return records.createKey();
}
@Override
public OrcStruct createValue() {
return new OrcStruct(records.getColumns());
}
@Override
public long getPos() throws IOException {
return records.getPos();
}
@Override
public void close() throws IOException {
records.close();
}
@Override
public float getProgress() throws IOException {
return records.getProgress();
}
};
}
use of org.apache.hadoop.hive.common.ValidReaderWriteIdList in project hive by apache.
the class Cleaner method clean.
private void clean(CompactionInfo ci) throws MetaException {
LOG.info("Starting cleaning for " + ci.getFullPartitionName());
try {
Table t = resolveTable(ci);
if (t == null) {
// The table was dropped before we got around to cleaning it.
LOG.info("Unable to find table " + ci.getFullTableName() + ", assuming it was dropped");
txnHandler.markCleaned(ci);
return;
}
Partition p = null;
if (ci.partName != null) {
p = resolvePartition(ci);
if (p == null) {
// The partition was dropped before we got around to cleaning it.
LOG.info("Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped");
txnHandler.markCleaned(ci);
return;
}
}
StorageDescriptor sd = resolveStorageDescriptor(t, p);
final String location = sd.getLocation();
/**
* Each Compaction only compacts as far as the highest txn id such that all txns below it
* are resolved (i.e. not opened). This is what "highestWriteId" tracks. This is only tracked
* since Hive 1.3.0/2.0 - thus may be 0. See ValidCompactorWriteIdList and uses for more info.
*
* We only want to clean up to the highestWriteId - otherwise we risk deleting deltas from
* under an active reader.
*
* Suppose we have deltas D2 D3 for table T, i.e. the last compaction created D3 so now there is a
* clean request for D2.
* Cleaner checks existing locks and finds none.
* Between that check and removeFiles() a query starts (it will be reading D3) and another compaction
* completes which creates D4.
* Now removeFiles() (more specifically AcidUtils.getAcidState()) will declare D3 to be obsolete
* unless ValidTxnList is "capped" at highestWriteId.
*/
final ValidWriteIdList txnList = (ci.highestWriteId > 0) ? new ValidReaderWriteIdList(ci.getFullTableName(), new long[0], new BitSet(), ci.highestWriteId) : new ValidReaderWriteIdList();
if (runJobAsSelf(ci.runAs)) {
removeFiles(location, txnList);
} else {
LOG.info("Cleaning as user " + ci.runAs + " for " + ci.getFullPartitionName());
UserGroupInformation ugi = UserGroupInformation.createProxyUser(ci.runAs, UserGroupInformation.getLoginUser());
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
removeFiles(location, txnList);
return null;
}
});
try {
FileSystem.closeAllForUGI(ugi);
} catch (IOException exception) {
LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " + ci.getFullPartitionName(), exception);
}
}
txnHandler.markCleaned(ci);
} catch (Exception e) {
LOG.error("Caught exception when cleaning, unable to complete cleaning of " + ci + " " + StringUtils.stringifyException(e));
txnHandler.markFailed(ci);
}
}
use of org.apache.hadoop.hive.common.ValidReaderWriteIdList in project hive by apache.
the class FetchOperator method extractValidWriteIdList.
private ValidWriteIdList extractValidWriteIdList() {
if (currDesc.getTableName() == null || !org.apache.commons.lang.StringUtils.isBlank(currDesc.getTableName())) {
String txnString = job.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
LOG.debug("FetchOperator get writeIdStr: " + txnString);
return txnString == null ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(txnString);
}
// not fetching from a table directly but from a temp location
return null;
}
Aggregations