Search in sources :

Example 1 with ReaderKey

use of org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey in project hive by apache.

the class TestOrcRawRecordMerger method testOriginalReaderPair.

@Test
public void testOriginalReaderPair() throws Exception {
    int BUCKET = 10;
    ReaderKey key = new ReaderKey();
    Configuration conf = new Configuration();
    int bucketProperty = OrcRawRecordMerger.encodeBucketId(conf, BUCKET, 0);
    Reader reader = createMockOriginalReader();
    RecordIdentifier minKey = new RecordIdentifier(0, bucketProperty, 1);
    RecordIdentifier maxKey = new RecordIdentifier(0, bucketProperty, 3);
    boolean[] includes = new boolean[] { true, true };
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testOriginalReaderPair");
    fs.makeQualified(root);
    fs.create(root);
    ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, minKey, maxKey, new Reader.Options().include(includes), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0);
    RecordReader recordReader = pair.getRecordReader();
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(2, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    assertEquals("third", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(3, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    assertEquals("fourth", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(null, pair.nextRecord());
    Mockito.verify(recordReader).close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ReaderPair(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) Test(org.junit.Test)

Example 2 with ReaderKey

use of org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey in project hive by apache.

the class TestOrcRawRecordMerger method testReaderPairNoMin.

@Test
public void testReaderPairNoMin() throws Exception {
    ReaderKey key = new ReaderKey();
    Reader reader = createMockReader();
    ReaderPair pair = new OrcRawRecordMerger.ReaderPairAcid(key, reader, null, null, new Reader.Options(), new HiveConf());
    RecordReader recordReader = pair.getRecordReader();
    assertEquals(10, key.getWriteId());
    assertEquals(20, key.getBucketProperty());
    assertEquals(20, key.getRowId());
    assertEquals(100, key.getCurrentWriteId());
    assertEquals("first", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(10, key.getWriteId());
    assertEquals(20, key.getBucketProperty());
    assertEquals(30, key.getRowId());
    assertEquals(110, key.getCurrentWriteId());
    assertEquals("second", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(10, key.getWriteId());
    assertEquals(20, key.getBucketProperty());
    assertEquals(40, key.getRowId());
    assertEquals(120, key.getCurrentWriteId());
    assertEquals("third", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(40, key.getWriteId());
    assertEquals(50, key.getBucketProperty());
    assertEquals(60, key.getRowId());
    assertEquals(130, key.getCurrentWriteId());
    assertEquals("fourth", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(40, key.getWriteId());
    assertEquals(50, key.getBucketProperty());
    assertEquals(61, key.getRowId());
    assertEquals(140, key.getCurrentWriteId());
    assertEquals("fifth", value(pair.nextRecord()));
    pair.next(pair.nextRecord());
    assertEquals(null, pair.nextRecord());
    Mockito.verify(recordReader).close();
}
Also used : ReaderPair(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) Test(org.junit.Test)

Example 3 with ReaderKey

use of org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey in project hive by apache.

the class TestOrcRawRecordMerger method testOriginalReaderPairNoMin.

@Test
public void testOriginalReaderPairNoMin() throws Exception {
    int BUCKET = 10;
    ReaderKey key = new ReaderKey();
    Reader reader = createMockOriginalReader();
    Configuration conf = new Configuration();
    int bucketProperty = OrcRawRecordMerger.encodeBucketId(conf, BUCKET, 0);
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testOriginalReaderPairNoMin");
    fs.makeQualified(root);
    fs.create(root);
    ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, null, null, new Reader.Options(), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0);
    assertEquals("first", value(pair.nextRecord()));
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(0, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    pair.next(pair.nextRecord());
    assertEquals("second", value(pair.nextRecord()));
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(1, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    pair.next(pair.nextRecord());
    assertEquals("third", value(pair.nextRecord()));
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(2, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    pair.next(pair.nextRecord());
    assertEquals("fourth", value(pair.nextRecord()));
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(3, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    pair.next(pair.nextRecord());
    assertEquals("fifth", value(pair.nextRecord()));
    assertEquals(0, key.getWriteId());
    assertEquals(bucketProperty, key.getBucketProperty());
    assertEquals(4, key.getRowId());
    assertEquals(0, key.getCurrentWriteId());
    pair.next(pair.nextRecord());
    assertEquals(null, pair.nextRecord());
    Mockito.verify(pair.getRecordReader()).close();
}
Also used : Path(org.apache.hadoop.fs.Path) ReaderPair(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) Test(org.junit.Test)

Example 4 with ReaderKey

use of org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey in project hive by apache.

the class TestOrcRawRecordMerger method testOrdering.

@Test
public void testOrdering() throws Exception {
    ReaderKey left = new ReaderKey(100, 200, 1200, 300);
    ReaderKey right = new ReaderKey();
    right.setValues(100, 200, 1000, 200, false);
    assertTrue(right.compareTo(left) < 0);
    assertTrue(left.compareTo(right) > 0);
    assertEquals(false, left.equals(right));
    left.set(right);
    assertTrue(right.compareTo(left) == 0);
    assertEquals(true, right.equals(left));
    right.setRowId(2000);
    assertTrue(right.compareTo(left) > 0);
    left.setValues(1, 2, 3, 4, false);
    right.setValues(100, 2, 3, 4, false);
    assertTrue(left.compareTo(right) < 0);
    assertTrue(right.compareTo(left) > 0);
    left.setValues(1, 2, 3, 4, false);
    right.setValues(1, 100, 3, 4, false);
    assertTrue(left.compareTo(right) < 0);
    assertTrue(right.compareTo(left) > 0);
    left.setValues(1, 2, 3, 100, false);
    right.setValues(1, 2, 3, 4, false);
    assertTrue(left.compareTo(right) < 0);
    assertTrue(right.compareTo(left) > 0);
    // ensure that we are consistent when comparing to the base class
    RecordIdentifier ri = new RecordIdentifier(1, 2, 3);
    assertEquals(1, ri.compareTo(left));
    assertEquals(-1, left.compareTo(ri));
    assertEquals(false, ri.equals(left));
    assertEquals(false, left.equals(ri));
}
Also used : RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) Test(org.junit.Test)

Example 5 with ReaderKey

use of org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey in project hive by apache.

the class TestOrcRawRecordMerger method testNewBaseAndDelta.

private void testNewBaseAndDelta(boolean use130Format) throws Exception {
    final int BUCKET = 10;
    String[] values = new String[] { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth" };
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).finalDestination(root);
    final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
    if (!use130Format) {
        options.statementId(-1);
    }
    RecordUpdater ru = of.getRecordUpdater(root, options.writingBase(true).maximumWriteId(100));
    for (String v : values) {
        ru.insert(0, new MyRow(v));
    }
    ru.close(false);
    // write a delta
    ru = of.getRecordUpdater(root, options.writingBase(false).minimumWriteId(200).maximumWriteId(200).recordIdColumn(1));
    ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 3", 3, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 7, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 8, 0, BUCKET_PROPERTY));
    ru.close(false);
    conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
    ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE);
    AcidDirectory directory = AcidUtils.getAcidState(fs, root, conf, writeIdList, null, use130Format);
    assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
    assertEquals(new Path(root, use130Format ? AcidUtils.deleteDeltaSubdir(200, 200, 0) : AcidUtils.deleteDeltaSubdir(200, 200)), directory.getCurrentDirectories().get(0).getPath());
    assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200, 200, 0) : AcidUtils.deltaSubdir(200, 200)), directory.getCurrentDirectories().get(1).getPath());
    Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
    Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET);
    Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(conf, true, null);
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    // the first "split" is for base/
    Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    RecordIdentifier id = merger.createKey();
    OrcStruct event = merger.createValue();
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // second "split" is delta_200_200
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a minor Compaction so we don't collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    merger = new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    // minor comp, so we ignore 'base_0000100' files so all Deletes end up first since
    // they all modify primordial rows
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a major Compaction so we collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // try ignoring the 200 transaction and make sure it works still
    ValidWriteIdList writeIds = new ValidReaderWriteIdList("testNewBaseAndDelta:2000:200:200");
    // again 1st split is for base/
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    for (int i = 0; i < values.length; ++i) {
        assertEquals(true, merger.next(id, event));
        LOG.info("id = " + id + "event = " + event);
        assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
        assertEquals(new ReaderKey(0, BUCKET_PROPERTY, i, 0), id);
        assertEquals(values[i], getValue(event));
    }
    assertEquals(false, merger.next(id, event));
    merger.close();
    // 2nd split is for delta_200_200 which is filtered out entirely by "txns"
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(false, merger.next(id, event));
    merger.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) AcidDirectory(org.apache.hadoop.hive.ql.io.AcidDirectory) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BitSet(java.util.BitSet) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList)

Aggregations

ReaderKey (org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey)6 Test (org.junit.Test)5 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)4 ReaderPair (org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair)4 Configuration (org.apache.hadoop.conf.Configuration)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 BitSet (java.util.BitSet)1 ValidReadTxnList (org.apache.hadoop.hive.common.ValidReadTxnList)1 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)1 AcidDirectory (org.apache.hadoop.hive.ql.io.AcidDirectory)1 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)1 RecordUpdater (org.apache.hadoop.hive.ql.io.RecordUpdater)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1