Search in sources :

Example 31 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestOrcRecordUpdater method testConcurrentParseKeyIndex.

/*
    CharsetDecoder instances are not thread safe, so it can end up in an inconsistent state when reading multiple
    buffers parallel.
    E.g:
    java.lang.IllegalStateException: Current state = FLUSHED, new state = CODING_END
  */
@Test
public void testConcurrentParseKeyIndex() throws Exception {
    // Given
    Reader mockReader = mock(Reader.class);
    when(mockReader.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)).thenReturn(true);
    // Create a large buffer
    final StringBuilder sb = new StringBuilder();
    for (int i = 0; i < 3000; i++) {
        sb.append("100000,200000,300000;");
    }
    when(mockReader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)).thenReturn(ByteBuffer.wrap(sb.toString().getBytes()));
    // When
    // Hit OrcRecordUpdater.parseKeyIndex with large parallelism
    final int parallelism = 4000;
    Callable<RecordIdentifier[]>[] r = new Callable[parallelism];
    for (int i = 0; i < parallelism; i++) {
        r[i] = () -> {
            return OrcRecordUpdater.parseKeyIndex(mockReader);
        };
    }
    ExecutorService executorService = Executors.newFixedThreadPool(parallelism);
    List<Future<RecordIdentifier[]>> res = executorService.invokeAll(Arrays.asList(r));
    // Check for exceptions
    for (Future<RecordIdentifier[]> ri : res) {
        ri.get();
    }
}
Also used : RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) Callable(java.util.concurrent.Callable) Test(org.junit.Test)

Example 32 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestVectorizedOrcAcidRowBatchReader method testDeleteEventFiltering.

/**
 * Tests that we can figure out min/max ROW__ID for each split and then use
 * that to only load delete events between min/max.
 * This test doesn't actually check what is read - that is done more E2E
 * unit tests.
 * @throws Exception
 */
private void testDeleteEventFiltering() throws Exception {
    boolean filterOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS);
    int bucket = 0;
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(bucket).writingBase(false).minimumWriteId(1).maximumWriteId(1).inspector(inspector).reporter(Reporter.NULL).recordIdColumn(1).finalDestination(root);
    int bucketProperty = BucketCodec.V1.encode(options);
    // create 3 insert deltas so that we have 3 splits
    RecordUpdater updater = new OrcRecordUpdater(root, options);
    // In the first delta add 2000 recs to simulate recs in multiple stripes.
    int numRows = 2000;
    for (int i = 1; i <= numRows; i++) {
        updater.insert(options.getMinimumWriteId(), new DummyRow(i, i - 1, options.getMinimumWriteId(), bucket));
    }
    updater.close(false);
    options.minimumWriteId(2).maximumWriteId(2);
    updater = new OrcRecordUpdater(root, options);
    updater.insert(options.getMinimumWriteId(), new DummyRow(4, 0, options.getMinimumWriteId(), bucket));
    updater.insert(options.getMinimumWriteId(), new DummyRow(5, 1, options.getMinimumWriteId(), bucket));
    updater.insert(options.getMinimumWriteId(), new DummyRow(6, 2, options.getMinimumWriteId(), bucket));
    updater.close(false);
    options.minimumWriteId(3).maximumWriteId(3);
    updater = new OrcRecordUpdater(root, options);
    updater.insert(options.getMinimumWriteId(), new DummyRow(7, 0, options.getMinimumWriteId(), bucket));
    updater.insert(options.getMinimumWriteId(), new DummyRow(8, 1, options.getMinimumWriteId(), bucket));
    updater.insert(options.getMinimumWriteId(), new DummyRow(9, 2, options.getMinimumWriteId(), bucket));
    updater.close(false);
    // delete 1 row from each of the insert deltas
    options.minimumWriteId(4).maximumWriteId(4);
    updater = new OrcRecordUpdater(root, options);
    updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 0, 1, bucket));
    updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 1, 2, bucket));
    updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 2, 3, bucket));
    updater.close(false);
    conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
    // HWM is not important - just make sure deltas created above are read as
    // if committed
    conf.set(ValidWriteIdList.VALID_WRITEIDS_KEY, "tbl:5:" + Long.MAX_VALUE + "::");
    // now we have 3 delete events total, but for each split we should only
    // load 1 into DeleteRegistry (if filtering is on)
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = getSplitStrategies();
    assertEquals(1, splitStrategies.size());
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(3, splits.size());
    assertEquals(root.toUri().toString() + File.separator + "delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    assertEquals(root.toUri().toString() + File.separator + "delta_0000002_0000002_0000/bucket_00000", splits.get(1).getPath().toUri().toString());
    assertFalse(splits.get(1).isOriginal());
    assertEquals(root.toUri().toString() + File.separator + "delta_0000003_0000003_0000/bucket_00000", splits.get(2).getPath().toUri().toString());
    assertFalse(splits.get(2).isOriginal());
    VectorizedOrcAcidRowBatchReader vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(0), conf, Reporter.NULL, new VectorizedRowBatchCtx());
    ColumnizedDeleteEventRegistry deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry();
    assertEquals("number of delete events for stripe 1", filterOn ? 1 : 3, deleteEventRegistry.size());
    OrcRawRecordMerger.KeyInterval keyInterval = vectorizedReader.getKeyInterval();
    if (filterOn) {
        assertEquals(new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(1, bucketProperty, 0), new RecordIdentifier(1, bucketProperty, numRows - 1)), keyInterval);
    } else {
        assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval);
    }
    vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(1), conf, Reporter.NULL, new VectorizedRowBatchCtx());
    deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry();
    assertEquals("number of delete events for stripe 2", filterOn ? 1 : 3, deleteEventRegistry.size());
    keyInterval = vectorizedReader.getKeyInterval();
    if (filterOn) {
        assertEquals(new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(2, bucketProperty, 0), new RecordIdentifier(2, bucketProperty, 2)), keyInterval);
    } else {
        assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval);
    }
    vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(2), conf, Reporter.NULL, new VectorizedRowBatchCtx());
    deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry();
    assertEquals("number of delete events for stripe 3", filterOn ? 1 : 3, deleteEventRegistry.size());
    keyInterval = vectorizedReader.getKeyInterval();
    if (filterOn) {
        assertEquals(new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(3, bucketProperty, 0), new RecordIdentifier(3, bucketProperty, 2)), keyInterval);
    } else {
        assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval);
    }
}
Also used : BitSet(java.util.BitSet) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) ColumnizedDeleteEventRegistry(org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry)

Example 33 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestVectorizedOrcAcidRowBatchReader method testDeleteEventOriginalFiltering2.

private void testDeleteEventOriginalFiltering2() throws Exception {
    boolean filterOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS);
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, false);
    // Need to use a bigger row than DummyRow for the writer to flush the stripes
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    Properties properties = new Properties();
    OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(properties, conf);
    writerOptions.inspector(bigOriginalRowInspector).stripeSize(1).batchSize(1);
    String originalFile = "000000_0";
    Path originalFilePath = new Path(root, originalFile);
    byte[] data = new byte[1000];
    Writer writer = OrcFile.createWriter(originalFilePath, writerOptions);
    writer.addRow(new BigOriginalRow(data));
    writer.addRow(new BigOriginalRow(data));
    writer.addRow(new BigOriginalRow(data));
    writer.close();
    Reader reader = OrcFile.createReader(originalFilePath, OrcFile.readerOptions(conf));
    List<StripeInformation> stripes = reader.getStripes();
    // Make sure 3 stripes are created
    assertEquals(3, stripes.size());
    FileStatus fileStatus = fs.getFileStatus(originalFilePath);
    long fileLength = fileStatus.getLen();
    // Set vector mode to true in the map work so that we can generate the syntheticProps
    MapWork mapWork = new MapWork();
    mapWork.setVectorMode(true);
    VectorizedRowBatchCtx vrbContext = new VectorizedRowBatchCtx();
    mapWork.setVectorizedRowBatchCtx(vrbContext);
    HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
    Utilities.setMapWork(conf, mapWork);
    OrcSplit.OffsetAndBucketProperty syntheticProps = VectorizedOrcAcidRowBatchReader.computeOffsetAndBucket(fileStatus, root, true, true, conf);
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(0);
    int bucketProperty = BucketCodec.V1.encode(options);
    // 1. Splits within a stripe
    // A split that's completely within the 2nd stripe
    StripeInformation stripe = stripes.get(1);
    OrcSplit split = new OrcSplit(originalFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 2), new RecordIdentifier(0, bucketProperty, 1), filterOn);
    // A split that's completely within the last stripe
    stripe = stripes.get(2);
    split = new OrcSplit(originalFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 3), new RecordIdentifier(0, bucketProperty, 2), filterOn);
    // 2. Splits starting at a stripe boundary
    // A split that starts where the 1st stripe starts and ends before the 1st stripe ends
    stripe = stripes.get(0);
    split = new OrcSplit(originalFilePath, null, stripe.getOffset(), stripe.getLength() - 50, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    // The key interval for the 1st stripe
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 0), new RecordIdentifier(0, bucketProperty, 0), filterOn);
    // A split that starts where the 2nd stripe starts and ends after the 2nd stripe ends
    stripe = stripes.get(1);
    split = new OrcSplit(originalFilePath, null, stripe.getOffset(), stripe.getLength() + 50, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    // The key interval for the last 2 stripes
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 1), new RecordIdentifier(0, bucketProperty, 2), filterOn);
    // 3. Splits ending at a stripe boundary
    // A split that starts before the last stripe starts and ends at the last stripe boundary
    stripe = stripes.get(2);
    split = new OrcSplit(originalFilePath, null, stripe.getOffset() - 50, stripe.getLength() + 50, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    // The key interval for the last stripe
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 2), new RecordIdentifier(0, bucketProperty, 2), filterOn);
    // A split that starts after the 1st stripe starts and ends where the last stripe ends
    split = new OrcSplit(originalFilePath, null, stripes.get(0).getOffset() + 50, reader.getContentLength() - 50, new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    // The key interval for the last 2 stripes
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 1), new RecordIdentifier(0, bucketProperty, 2), filterOn);
    // A split that starts where the 1st stripe starts and ends where the last stripe ends
    split = new OrcSplit(originalFilePath, null, stripes.get(0).getOffset(), reader.getContentLength(), new String[] { "localhost" }, null, true, true, getDeltaMetaDataWithBucketFile(0), fileLength, fileLength, root, syntheticProps);
    // The key interval for all 3 stripes
    validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 0), new RecordIdentifier(0, bucketProperty, 2), filterOn);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Properties(java.util.Properties) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) StripeInformation(org.apache.orc.StripeInformation)

Example 34 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestVectorizedOrcAcidRowBatchReader method testDeleteEventFiltering2.

private void testDeleteEventFiltering2() throws Exception {
    boolean filterOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS);
    boolean skipKeyIdx = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVETESTMODEACIDKEYIDXSKIP);
    int bucket = 1;
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(bucket).writingBase(true).minimumWriteId(10000002).maximumWriteId(10000002).inspector(inspector).reporter(Reporter.NULL).recordIdColumn(1).finalDestination(root);
    int bucketProperty = BucketCodec.V1.encode(options);
    // create data that looks like a compacted base that includes some data
    // from 'original' files and some from native Acid write
    RecordUpdater updater = new OrcRecordUpdater(root, options);
    updater.insert(0, new DummyRow(1, 0, 0, bucket));
    updater.insert(0, new DummyRow(1, 1, 0, bucket));
    updater.insert(0, new DummyRow(2, 2, 0, bucket));
    updater.insert(10000001, new DummyRow(3, 0, 10000001, bucket));
    updater.close(false);
    // delete 3rd row
    options.writingBase(false).minimumWriteId(10000004).maximumWriteId(10000004);
    updater = new OrcRecordUpdater(root, options);
    updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 0, 0, bucket));
    // hypothetically this matches something in (nonexistent here)
    // delta_10000003_10000003
    updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 5, 10000003, bucket));
    updater.close(false);
    conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
    // HWM is not important - just make sure deltas created above are read as
    // if committed
    conf.set(ValidWriteIdList.VALID_WRITEIDS_KEY, "tbl:10000005:" + Long.MAX_VALUE + "::");
    List<OrcInputFormat.SplitStrategy<?>> splitStrategies = getSplitStrategies();
    assertEquals(1, splitStrategies.size());
    List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy) splitStrategies.get(0)).getSplits();
    assertEquals(1, splits.size());
    assertEquals(root.toUri().toString() + File.separator + "base_10000002/bucket_00001", splits.get(0).getPath().toUri().toString());
    assertFalse(splits.get(0).isOriginal());
    VectorizedOrcAcidRowBatchReader vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(0), conf, Reporter.NULL, new VectorizedRowBatchCtx());
    ColumnizedDeleteEventRegistry deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry();
    assertEquals("number of delete events for stripe 1", filterOn ? 1 : 2, deleteEventRegistry.size());
    OrcRawRecordMerger.KeyInterval keyInterval = vectorizedReader.getKeyInterval();
    SearchArgument sarg = vectorizedReader.getDeleteEventSarg();
    if (filterOn) {
        if (skipKeyIdx) {
            // If key index is not present, the min max key interval uses stripe stats instead
            assertEquals(new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(0, bucketProperty, 0), new RecordIdentifier(10000001, bucketProperty, 2)), keyInterval);
        } else {
            assertEquals(new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(0, bucketProperty, 0), new RecordIdentifier(10000001, bucketProperty, 0)), keyInterval);
        }
        // key point is that in leaf-5 is (rowId <= 2) even though maxKey has
        // rowId 0.  more in VectorizedOrcAcidRowBatchReader.findMinMaxKeys
        assertEquals("leaf-0 = (LESS_THAN originalTransaction 0)," + " leaf-1 = (LESS_THAN bucket 536936448)," + " leaf-2 = (LESS_THAN rowId 0)," + " leaf-3 = (LESS_THAN_EQUALS originalTransaction 10000001)," + " leaf-4 = (LESS_THAN_EQUALS bucket 536936448)," + " leaf-5 = (LESS_THAN_EQUALS rowId 2)," + " expr = (and (not leaf-0) (not leaf-1) " + "(not leaf-2) leaf-3 leaf-4 leaf-5)", ((SearchArgumentImpl) sarg).toOldString());
    } else {
        assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval);
        assertNull(sarg);
    }
}
Also used : BitSet(java.util.BitSet) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) ColumnizedDeleteEventRegistry(org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry)

Example 35 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestOrcRawRecordMerger method testEmpty.

@Test
public void testEmpty() throws Exception {
    final int BUCKET = 0;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testEmpty").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the empty base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).writingBase(true).maximumWriteId(100).finalDestination(root);
    of.getRecordUpdater(root, options).close(false);
    conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
    ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testEmpty:200:" + Long.MAX_VALUE);
    AcidDirectory directory = AcidUtils.getAcidState(fs, root, conf, writeIdList, null, false);
    Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
    Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(false));
    RecordIdentifier key = merger.createKey();
    OrcStruct value = merger.createValue();
    assertEquals(false, merger.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) BitSet(java.util.BitSet) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) FileSystem(org.apache.hadoop.fs.FileSystem) AcidDirectory(org.apache.hadoop.hive.ql.io.AcidDirectory) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) Test(org.junit.Test)

Aggregations

RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)40 Test (org.junit.Test)13 Path (org.apache.hadoop.fs.Path)9 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)9 StripeInformation (org.apache.orc.StripeInformation)9 Configuration (org.apache.hadoop.conf.Configuration)7 BitSet (java.util.BitSet)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 ValidReadTxnList (org.apache.hadoop.hive.common.ValidReadTxnList)5 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)5 RecordUpdater (org.apache.hadoop.hive.ql.io.RecordUpdater)5 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)4 Table (org.apache.hadoop.hive.metastore.api.Table)4 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)4 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)4 ReaderKey (org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey)4 OrcStruct (org.apache.hadoop.hive.ql.io.orc.OrcStruct)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 ArrayList (java.util.ArrayList)3 AcidInputFormat (org.apache.hadoop.hive.ql.io.AcidInputFormat)3