Search in sources :

Example 1 with SortMergedDeleteEventRegistry

use of org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry in project hive by apache.

the class TestVectorizedOrcAcidRowBatchReader method testVectorizedOrcAcidRowBatchReader.

private void testVectorizedOrcAcidRowBatchReader(String deleteEventRegistry) throws Exception {
    List<OrcSplit> splits = getSplits();
    // Mark one of the transactions as an exception to test that invalid transactions
    // are being handled properly.
    // Exclude transaction 5
    conf.set(ValidTxnList.VALID_TXNS_KEY, "14:1:1:5");
    VectorizedOrcAcidRowBatchReader vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(0), conf, Reporter.NULL);
    if (deleteEventRegistry.equals(ColumnizedDeleteEventRegistry.class.getName())) {
        assertTrue(vectorizedReader.getDeleteEventRegistry() instanceof ColumnizedDeleteEventRegistry);
    }
    if (deleteEventRegistry.equals(SortMergedDeleteEventRegistry.class.getName())) {
        assertTrue(vectorizedReader.getDeleteEventRegistry() instanceof SortMergedDeleteEventRegistry);
    }
    TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
    VectorizedRowBatch vectorizedRowBatch = schema.createRowBatch();
    // set data column count as 1.
    vectorizedRowBatch.setPartitionInfo(1, 0);
    long previousPayload = Long.MIN_VALUE;
    while (vectorizedReader.next(null, vectorizedRowBatch)) {
        assertTrue(vectorizedRowBatch.selectedInUse);
        LongColumnVector col = (LongColumnVector) vectorizedRowBatch.cols[0];
        for (int i = 0; i < vectorizedRowBatch.size; ++i) {
            int idx = vectorizedRowBatch.selected[i];
            long payload = col.vector[idx];
            long otid = (payload / NUM_ROWID_PER_OTID) + 1;
            long rowId = payload % NUM_ROWID_PER_OTID;
            assertFalse(rowId % 2 == 0 || rowId % 3 == 0);
            // Check that txn#5 has been excluded.
            assertTrue(otid != 5);
            // Check that the data is in sorted order.
            assertTrue(payload > previousPayload);
            previousPayload = payload;
        }
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TypeDescription(org.apache.orc.TypeDescription) ColumnizedDeleteEventRegistry(org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) SortMergedDeleteEventRegistry(org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry)

Aggregations

LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 ColumnizedDeleteEventRegistry (org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.ColumnizedDeleteEventRegistry)1 SortMergedDeleteEventRegistry (org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry)1 TypeDescription (org.apache.orc.TypeDescription)1