use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestOrcRawRecordMerger method testNewBaseAndDelta.
private void testNewBaseAndDelta(boolean use130Format) throws Exception {
final int BUCKET = 10;
String[] values = new String[] { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth" };
Configuration conf = new Configuration();
OrcOutputFormat of = new OrcOutputFormat();
FileSystem fs = FileSystem.getLocal(conf);
Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
fs.delete(root, true);
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
// write the base
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).finalDestination(root);
final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
if (!use130Format) {
options.statementId(-1);
}
RecordUpdater ru = of.getRecordUpdater(root, options.writingBase(true).maximumWriteId(100));
for (String v : values) {
ru.insert(0, new MyRow(v));
}
ru.close(false);
// write a delta
ru = of.getRecordUpdater(root, options.writingBase(false).minimumWriteId(200).maximumWriteId(200).recordIdColumn(1));
ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY));
ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
ru.update(200, new MyRow("update 3", 3, 0, BUCKET_PROPERTY));
ru.delete(200, new MyRow("", 7, 0, BUCKET_PROPERTY));
ru.delete(200, new MyRow("", 8, 0, BUCKET_PROPERTY));
ru.close(false);
conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE);
AcidDirectory directory = AcidUtils.getAcidState(fs, root, conf, writeIdList, null, use130Format);
assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
assertEquals(new Path(root, use130Format ? AcidUtils.deleteDeltaSubdir(200, 200, 0) : AcidUtils.deleteDeltaSubdir(200, 200)), directory.getCurrentDirectories().get(0).getPath());
assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200, 200, 0) : AcidUtils.deltaSubdir(200, 200)), directory.getCurrentDirectories().get(1).getPath());
Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET);
Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
AcidUtils.setAcidOperationalProperties(conf, true, null);
conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
// the first "split" is for base/
Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
RecordIdentifier id = merger.createKey();
OrcStruct event = merger.createValue();
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
assertEquals("second", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
assertEquals("fifth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
assertEquals("sixth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
assertEquals("seventh", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
assertEquals("tenth", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// second "split" is delta_200_200
baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// now run as if it's a minor Compaction so we don't collapse events
// here there is only 1 "split" since we only have data for 1 bucket
merger = new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
// minor comp, so we ignore 'base_0000100' files so all Deletes end up first since
// they all modify primordial rows
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
// data from delta_200_200
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// now run as if it's a major Compaction so we collapse events
// here there is only 1 "split" since we only have data for 1 bucket
baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
assertEquals("second", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
assertEquals("fifth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
assertEquals("sixth", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
assertEquals("seventh", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
assertNull(OrcRecordUpdater.getRow(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
assertEquals("tenth", getValue(event));
// data from delta_200_200
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
assertEquals("update 1", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
assertEquals("update 2", getValue(event));
assertEquals(true, merger.next(id, event));
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
assertEquals("update 3", getValue(event));
assertEquals(false, merger.next(id, event));
merger.close();
// try ignoring the 200 transaction and make sure it works still
ValidWriteIdList writeIds = new ValidReaderWriteIdList("testNewBaseAndDelta:2000:200:200");
// again 1st split is for base/
baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
for (int i = 0; i < values.length; ++i) {
assertEquals(true, merger.next(id, event));
LOG.info("id = " + id + "event = " + event);
assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
assertEquals(new ReaderKey(0, BUCKET_PROPERTY, i, 0), id);
assertEquals(values[i], getValue(event));
}
assertEquals(false, merger.next(id, event));
merger.close();
// 2nd split is for delta_200_200 which is filtered out entirely by "txns"
baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
assertEquals(null, merger.getMinKey());
assertEquals(null, merger.getMaxKey());
assertEquals(false, merger.next(id, event));
merger.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestOrcRawRecordMerger method generateKeyInterval.
private static OrcRawRecordMerger.KeyInterval generateKeyInterval(Long minRowId, Long maxRowId) {
RecordIdentifier min = null;
if (minRowId != null) {
min = new RecordIdentifier(1, 100, minRowId);
}
RecordIdentifier max = null;
if (maxRowId != null) {
max = new RecordIdentifier(1, 100, maxRowId);
}
return new OrcRawRecordMerger.KeyInterval(min, max);
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestOrcRawRecordMerger method testReaderPair.
@Test
public void testReaderPair() throws Exception {
ReaderKey key = new ReaderKey();
Reader reader = createMockReader();
RecordIdentifier minKey = new RecordIdentifier(10, 20, 30);
RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60);
ReaderPair pair = new OrcRawRecordMerger.ReaderPairAcid(key, reader, minKey, maxKey, new Reader.Options(), new HiveConf());
RecordReader recordReader = pair.getRecordReader();
assertEquals(10, key.getWriteId());
assertEquals(20, key.getBucketProperty());
assertEquals(40, key.getRowId());
assertEquals(120, key.getCurrentWriteId());
assertEquals("third", value(pair.nextRecord()));
pair.next(pair.nextRecord());
assertEquals(40, key.getWriteId());
assertEquals(50, key.getBucketProperty());
assertEquals(60, key.getRowId());
assertEquals(130, key.getCurrentWriteId());
assertEquals("fourth", value(pair.nextRecord()));
pair.next(pair.nextRecord());
assertEquals(null, pair.nextRecord());
Mockito.verify(recordReader).close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class OrcRecordUpdater method parseKeyIndex.
private static RecordIdentifier[] parseKeyIndex(ByteBuffer val) {
String[] stripes;
try {
CharsetDecoder utf8Decoder = UTF8.newDecoder();
stripes = utf8Decoder.decode(val).toString().split(";");
} catch (CharacterCodingException e) {
throw new IllegalArgumentException("Bad string encoding for " + OrcRecordUpdater.ACID_KEY_INDEX_NAME, e);
}
RecordIdentifier[] result = new RecordIdentifier[stripes.length];
for (int i = 0; i < stripes.length; ++i) {
if (stripes[i].length() != 0) {
String[] parts = stripes[i].split(",");
result[i] = new RecordIdentifier();
result[i].setValues(Long.parseLong(parts[0]), Integer.parseInt(parts[1]), Long.parseLong(parts[2]));
}
}
return result;
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class CompactorTestUtil method checkExpectedTxnsPresent.
static void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max, List<Integer> invaliWriteIDs, int numBuckets) throws IOException {
ValidWriteIdList writeIdList = new ValidWriteIdList() {
@Override
public String getTableName() {
return "AcidTable";
}
@Override
public boolean isWriteIdValid(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
@Override
public String writeToString() {
return "";
}
@Override
public void readFromString(String src) {
}
@Override
public Long getMinOpenWriteId() {
return null;
}
@Override
public long getHighWatermark() {
return Long.MAX_VALUE;
}
@Override
public long[] getInvalidWriteIds() {
return new long[0];
}
@Override
public boolean isValidBase(long writeid) {
return true;
}
@Override
public boolean isWriteIdAborted(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
};
OrcInputFormat aif = new OrcInputFormat();
Configuration conf = new Configuration();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
conf.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(numBuckets));
conf.setBoolean("orc.schema.evolution.case.sensitive", false);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, true, bucket, writeIdList, base, deltas, new HashMap<String, Integer>());
RecordIdentifier identifier = reader.createKey();
OrcStruct value = reader.createValue();
long currentTxn = min;
boolean seenCurrentTxn = false;
while (reader.next(identifier, value)) {
if (!seenCurrentTxn) {
Assert.assertEquals(currentTxn, identifier.getWriteId());
seenCurrentTxn = true;
}
if (currentTxn != identifier.getWriteId()) {
if (invaliWriteIDs != null) {
Assert.assertFalse(invaliWriteIDs.contains(identifier.getWriteId()));
}
currentTxn++;
}
}
Assert.assertEquals(max, currentTxn);
}
Aggregations