Search in sources :

Example 6 with ValidTxnList

use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.

the class OrcInputFormat method getReader.

@Override
public RowReader<OrcStruct> getReader(InputSplit inputSplit, Options options) throws IOException {
    final OrcSplit split = (OrcSplit) inputSplit;
    final Path path = split.getPath();
    Path root;
    if (split.hasBase()) {
        if (split.isOriginal()) {
            root = path.getParent();
        } else {
            root = path.getParent().getParent();
        }
    } else {
        root = path;
    }
    // Retrieve the acidOperationalProperties for the table, initialized in HiveInputFormat.
    AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(options.getConfiguration());
    // The deltas are decided based on whether split-update has been turned on for the table or not.
    // When split-update is turned off, everything in the delta_x_y/ directory should be treated
    // as delta. However if split-update is turned on, only the files in delete_delta_x_y/ directory
    // need to be considered as delta, because files in delta_x_y/ will be processed as base files
    // since they only have insert events in them.
    final Path[] deltas = acidOperationalProperties.isSplitUpdate() ? AcidUtils.deserializeDeleteDeltas(root, split.getDeltas()) : AcidUtils.deserializeDeltas(root, split.getDeltas());
    final Configuration conf = options.getConfiguration();
    final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, split);
    final int bucket = OrcInputFormat.getBucketForSplit(conf, split);
    final Reader.Options readOptions = OrcInputFormat.createOptionsForReader(conf);
    readOptions.range(split.getStart(), split.getLength());
    String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
    ValidTxnList validTxnList = txnString == null ? new ValidReadTxnList() : new ValidReadTxnList(txnString);
    final OrcRawRecordMerger records = new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, validTxnList, readOptions, deltas);
    return new RowReader<OrcStruct>() {

        OrcStruct innerRecord = records.createValue();

        @Override
        public ObjectInspector getObjectInspector() {
            return OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(readOptions.getSchema()));
        }

        @Override
        public boolean next(RecordIdentifier recordIdentifier, OrcStruct orcStruct) throws IOException {
            boolean result;
            // filter out the deleted records
            do {
                result = records.next(recordIdentifier, innerRecord);
            } while (result && OrcRecordUpdater.getOperation(innerRecord) == OrcRecordUpdater.DELETE_OPERATION);
            if (result) {
                // swap the fields with the passed in orcStruct
                orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
            }
            return result;
        }

        @Override
        public RecordIdentifier createKey() {
            return records.createKey();
        }

        @Override
        public OrcStruct createValue() {
            return new OrcStruct(records.getColumns());
        }

        @Override
        public long getPos() throws IOException {
            return records.getPos();
        }

        @Override
        public void close() throws IOException {
            records.close();
        }

        @Override
        public float getProgress() throws IOException {
            return records.getProgress();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) BatchToRowReader(org.apache.hadoop.hive.ql.io.BatchToRowReader) Configuration(org.apache.hadoop.conf.Configuration) StatsProvidingRecordReader(org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader) BatchToRowReader(org.apache.hadoop.hive.ql.io.BatchToRowReader) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) AcidOperationalProperties(org.apache.hadoop.hive.ql.io.AcidUtils.AcidOperationalProperties) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 7 with ValidTxnList

use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.

the class TestCompactor method checkExpectedTxnsPresent.

private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max) throws IOException {
    ValidTxnList txnList = new ValidTxnList() {

        @Override
        public boolean isTxnValid(long txnid) {
            return true;
        }

        @Override
        public RangeResponse isTxnRangeValid(long minTxnId, long maxTxnId) {
            return RangeResponse.ALL;
        }

        @Override
        public String writeToString() {
            return "";
        }

        @Override
        public void readFromString(String src) {
        }

        @Override
        public long getHighWatermark() {
            return Long.MAX_VALUE;
        }

        @Override
        public long[] getInvalidTransactions() {
            return new long[0];
        }

        @Override
        public boolean isValidBase(long txnid) {
            return true;
        }
    };
    OrcInputFormat aif = new OrcInputFormat();
    Configuration conf = new Configuration();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, false, bucket, txnList, base, deltas);
    RecordIdentifier identifier = reader.createKey();
    OrcStruct value = reader.createValue();
    long currentTxn = min;
    boolean seenCurrentTxn = false;
    while (reader.next(identifier, value)) {
        if (!seenCurrentTxn) {
            Assert.assertEquals(currentTxn, identifier.getTransactionId());
            seenCurrentTxn = true;
        }
        if (currentTxn != identifier.getTransactionId()) {
            Assert.assertEquals(currentTxn + 1, identifier.getTransactionId());
            currentTxn++;
        }
    }
    Assert.assertEquals(max, currentTxn);
}
Also used : AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList)

Example 8 with ValidTxnList

use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.

the class TestValidCompactorTxnList method maxTxnLow.

@Test
public void maxTxnLow() {
    ValidTxnList txns = new ValidCompactorTxnList(new long[] { 13, 14 }, 12);
    ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
    Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp);
}
Also used : ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidCompactorTxnList(org.apache.hadoop.hive.common.ValidCompactorTxnList) Test(org.junit.Test)

Example 9 with ValidTxnList

use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.

the class TestValidCompactorTxnList method minTxnHigh.

@Test
public void minTxnHigh() {
    ValidTxnList txns = new ValidCompactorTxnList(new long[] { 3, 4 }, 2);
    ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9);
    Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp);
}
Also used : ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidCompactorTxnList(org.apache.hadoop.hive.common.ValidCompactorTxnList) Test(org.junit.Test)

Example 10 with ValidTxnList

use of org.apache.hadoop.hive.common.ValidTxnList in project hive by apache.

the class TestValidCompactorTxnList method writeToString.

@Test
public void writeToString() {
    ValidTxnList txns = new ValidCompactorTxnList(new long[] { 9, 7, 10, Long.MAX_VALUE }, 8);
    Assert.assertEquals("8:" + Long.MAX_VALUE + ":7", txns.writeToString());
    txns = new ValidCompactorTxnList();
    Assert.assertEquals(Long.toString(Long.MAX_VALUE) + ":" + Long.MAX_VALUE + ":", txns.writeToString());
    txns = new ValidCompactorTxnList(new long[0], 23);
    Assert.assertEquals("23:" + Long.MAX_VALUE + ":", txns.writeToString());
}
Also used : ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidCompactorTxnList(org.apache.hadoop.hive.common.ValidCompactorTxnList) Test(org.junit.Test)

Aggregations

ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)22 Test (org.junit.Test)13 ValidCompactorTxnList (org.apache.hadoop.hive.common.ValidCompactorTxnList)8 ValidReadTxnList (org.apache.hadoop.hive.common.ValidReadTxnList)6 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)5 Configuration (org.apache.hadoop.conf.Configuration)4 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)4 Path (org.apache.hadoop.fs.Path)3 Partition (org.apache.hadoop.hive.metastore.api.Partition)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 IOException (java.io.IOException)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)2 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)2 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)2 OrcStruct (org.apache.hadoop.hive.ql.io.orc.OrcStruct)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2