Search in sources :

Example 1 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class TestStreaming method checkDataWritten2.

/**
 * @param validationQuery query to read from table to compare data against {@code records}
 * @param records expected data.  each row is CVS list of values
 */
private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles, String validationQuery, boolean vectorize, String... records) throws Exception {
    ValidWriteIdList txns = msClient.getValidWriteIds(AcidUtils.getFullTableName(dbName, tblName));
    AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, txns);
    Assert.assertEquals(0, dir.getObsolete().size());
    Assert.assertEquals(0, dir.getOriginalFiles().size());
    List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
    System.out.println("Files found: ");
    for (AcidUtils.ParsedDelta pd : current) {
        System.out.println(pd.getPath().toString());
    }
    Assert.assertEquals(numExpectedFiles, current.size());
    // find the absolute minimum transaction
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta pd : current) {
        if (pd.getMaxWriteId() > max) {
            max = pd.getMaxWriteId();
        }
        if (pd.getMinWriteId() < min) {
            min = pd.getMinWriteId();
        }
    }
    Assert.assertEquals(minTxn, min);
    Assert.assertEquals(maxTxn, max);
    boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
    if (vectorize) {
        conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    }
    String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
    for (String strategy : ((Validator.StringSet) HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
        // run it with each split strategy - make sure there are differences
        conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
        List<String> actualResult = queryTable(driver, validationQuery);
        for (int i = 0; i < actualResult.size(); i++) {
            Assert.assertEquals("diff at [" + i + "].  actual=" + actualResult + " expected=" + Arrays.toString(records), records[i], actualResult.get(i));
        }
    }
    conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
}
Also used : ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 2 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class AcidUtils method setValidWriteIdList.

/**
 * Set the valid write id list for the current table scan.
 */
public static void setValidWriteIdList(Configuration conf, TableScanDesc tsDesc) {
    if (tsDesc.isTranscationalTable()) {
        String dbName = tsDesc.getDatabaseName();
        String tableName = tsDesc.getTableName();
        ValidWriteIdList validWriteIdList = getTableValidWriteIdList(conf, AcidUtils.getFullTableName(dbName, tableName));
        if (validWriteIdList != null) {
            setValidWriteIdList(conf, validWriteIdList);
        } else {
            // Log error if the acid table is missing from the ValidWriteIdList conf
            LOG.error("setValidWriteIdList on table: " + AcidUtils.getFullTableName(dbName, tableName) + " isAcidTable: " + true + " acidProperty: " + getAcidOperationalProperties(conf) + " couldn't find the ValidWriteId list from ValidTxnWriteIdList: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
            throw new IllegalStateException("ACID table: " + AcidUtils.getFullTableName(dbName, tableName) + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
        }
    }
}
Also used : ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList)

Example 3 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class HiveInputFormat method addSplitsForGroup.

/*
   * AddSplitsForGroup collects separate calls to setInputPaths into one where possible.
   * The reason for this is that this is faster on some InputFormats. E.g.: Orc will start
   * a threadpool to do the work and calling it multiple times unnecessarily will create a lot
   * of unnecessary thread pools.
   */
private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf, InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits, TableDesc table, List<InputSplit> result) throws IOException {
    ValidWriteIdList validWriteIdList = AcidUtils.getTableValidWriteIdList(conf, table.getTableName());
    ValidWriteIdList validMmWriteIdList;
    if (AcidUtils.isInsertOnlyTable(table.getProperties())) {
        if (validWriteIdList == null) {
            throw new IOException("Insert-Only table: " + table.getTableName() + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
        }
        validMmWriteIdList = validWriteIdList;
    } else {
        // for non-MM case
        validMmWriteIdList = null;
    }
    try {
        Utilities.copyTablePropertiesToConf(table, conf);
        if (tableScan != null) {
            AcidUtils.setAcidOperationalProperties(conf, tableScan.getConf().isTranscationalTable(), tableScan.getConf().getAcidOperationalProperties());
            if (tableScan.getConf().isTranscationalTable() && (validWriteIdList == null)) {
                throw new IOException("Acid table: " + table.getTableName() + " is missing from the ValidWriteIdList config: " + conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
            }
            if (validWriteIdList != null) {
                AcidUtils.setValidWriteIdList(conf, validWriteIdList);
            }
        }
    } catch (HiveException e) {
        throw new IOException(e);
    }
    if (tableScan != null) {
        pushFilters(conf, tableScan, this.mrwork);
    }
    Path[] finalDirs = processPathsForMmRead(dirs, conf, validMmWriteIdList);
    if (finalDirs == null) {
        // No valid inputs.
        return;
    }
    FileInputFormat.setInputPaths(conf, finalDirs);
    conf.setInputFormat(inputFormat.getClass());
    int headerCount = 0;
    int footerCount = 0;
    if (table != null) {
        headerCount = Utilities.getHeaderCount(table);
        footerCount = Utilities.getFooterCount(table, conf);
        if (headerCount != 0 || footerCount != 0) {
            // Input file has header or footer, cannot be splitted.
            HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE);
        }
    }
    InputSplit[] iss = inputFormat.getSplits(conf, splits);
    for (InputSplit is : iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
    }
    if (iss.length == 0 && finalDirs.length > 0 && conf.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
        // If there are no inputs; the Execution engine skips the operator tree.
        // To prevent it from happening; an opaque  ZeroRows input is added here - when needed.
        result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(finalDirs[0].toString()), ZeroRowsInputFormat.class.getName()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) IOException(java.io.IOException) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 4 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class TestDbTxnManager2 method testValidWriteIdListSnapshot.

@Test
public void testValidWriteIdListSnapshot() throws Exception {
    // Create a transactional table
    dropTable(new String[] { "temp.T7" });
    CommandProcessorResponse cpr = driver.run("create database if not exists temp");
    checkCmdOnDriver(cpr);
    cpr = driver.run("create table if not exists temp.T7(a int, b int) clustered by(b) into 2 buckets stored as orc " + "TBLPROPERTIES ('transactional'='true')");
    checkCmdOnDriver(cpr);
    // Open a base txn which allocates write ID and then committed.
    long baseTxnId = txnMgr.openTxn(ctx, "u0");
    long baseWriteId = txnMgr.getTableWriteId("temp", "T7");
    Assert.assertEquals(1, baseWriteId);
    // committed baseTxnId
    txnMgr.commitTxn();
    // Open a txn with no writes.
    HiveTxnManager txnMgr1 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    long underHwmOpenTxnId = txnMgr1.openTxn(ctx, "u1");
    Assert.assertTrue("Invalid txn ID", underHwmOpenTxnId > baseTxnId);
    // Open a txn to be tested for ValidWriteIdList. Get the ValidTxnList during open itself.
    // Verify the ValidWriteIdList with no open/aborted write txns on this table.
    // Write ID of committed txn should be valid.
    HiveTxnManager txnMgr2 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    long testTxnId = txnMgr2.openTxn(ctx, "u2");
    Assert.assertTrue("Invalid txn ID", testTxnId > underHwmOpenTxnId);
    String testValidTxns = txnMgr2.getValidTxns().toString();
    ValidWriteIdList testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
    Assert.assertEquals(baseWriteId, testValidWriteIds.getHighWatermark());
    Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
    // Open a txn which allocate write ID and remain open state.
    HiveTxnManager txnMgr3 = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    long aboveHwmOpenTxnId = txnMgr3.openTxn(ctx, "u3");
    Assert.assertTrue("Invalid txn ID", aboveHwmOpenTxnId > testTxnId);
    long aboveHwmOpenWriteId = txnMgr3.getTableWriteId("temp", "T7");
    Assert.assertEquals(2, aboveHwmOpenWriteId);
    // Allocate writeId to txn under HWM. This will get Id greater than a txn > HWM.
    long underHwmOpenWriteId = txnMgr1.getTableWriteId("temp", "T7");
    Assert.assertEquals(3, underHwmOpenWriteId);
    // Verify the ValidWriteIdList with one open txn on this table. Write ID of open txn should be invalid.
    testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
    Assert.assertEquals(underHwmOpenWriteId, testValidWriteIds.getHighWatermark());
    Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
    // Commit the txn under HWM.
    // Verify the writeId of this committed txn should be invalid for test txn.
    txnMgr1.commitTxn();
    testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
    Assert.assertEquals(underHwmOpenWriteId, testValidWriteIds.getHighWatermark());
    Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
    // Allocate writeId from test txn and then verify ValidWriteIdList.
    // Write Ids of committed and self test txn should be valid but writeId of open txn should be invalid.
    // WriteId of recently committed txn which was open when get ValidTxnList snapshot should be invalid as well.
    long testWriteId = txnMgr2.getTableWriteId("temp", "T7");
    Assert.assertEquals(4, testWriteId);
    testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns).getTableValidWriteIdList("temp.t7");
    Assert.assertEquals(testWriteId, testValidWriteIds.getHighWatermark());
    Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(baseWriteId));
    Assert.assertTrue("Invalid write ID list", testValidWriteIds.isWriteIdValid(testWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(underHwmOpenWriteId));
    Assert.assertFalse("Invalid write ID list", testValidWriteIds.isWriteIdValid(aboveHwmOpenWriteId));
    txnMgr2.commitTxn();
    txnMgr3.commitTxn();
    cpr = driver.run("drop database if exists temp cascade");
    checkCmdOnDriver(cpr);
}
Also used : ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) Test(org.junit.Test)

Example 5 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class TestCompactor method checkExpectedTxnsPresent.

private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max, int numBuckets) throws IOException {
    ValidWriteIdList writeIdList = new ValidWriteIdList() {

        @Override
        public String getTableName() {
            return "AcidTable";
        }

        @Override
        public boolean isWriteIdValid(long writeid) {
            return true;
        }

        @Override
        public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
            return RangeResponse.ALL;
        }

        @Override
        public String writeToString() {
            return "";
        }

        @Override
        public void readFromString(String src) {
        }

        @Override
        public Long getMinOpenWriteId() {
            return null;
        }

        @Override
        public long getHighWatermark() {
            return Long.MAX_VALUE;
        }

        @Override
        public long[] getInvalidWriteIds() {
            return new long[0];
        }

        @Override
        public boolean isValidBase(long writeid) {
            return true;
        }

        @Override
        public boolean isWriteIdAborted(long writeid) {
            return true;
        }

        @Override
        public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) {
            return RangeResponse.ALL;
        }
    };
    OrcInputFormat aif = new OrcInputFormat();
    Configuration conf = new Configuration();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
    conf.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(numBuckets));
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, true, bucket, writeIdList, base, deltas);
    RecordIdentifier identifier = reader.createKey();
    OrcStruct value = reader.createValue();
    long currentTxn = min;
    boolean seenCurrentTxn = false;
    while (reader.next(identifier, value)) {
        if (!seenCurrentTxn) {
            Assert.assertEquals(currentTxn, identifier.getWriteId());
            seenCurrentTxn = true;
        }
        if (currentTxn != identifier.getWriteId()) {
            Assert.assertEquals(currentTxn + 1, identifier.getWriteId());
            currentTxn++;
        }
    }
    Assert.assertEquals(max, currentTxn);
}
Also used : AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList)

Aggregations

ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)16 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)6 Path (org.apache.hadoop.fs.Path)5 OrcAcidUtils (org.apache.orc.impl.OrcAcidUtils)5 Configuration (org.apache.hadoop.conf.Configuration)4 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)4 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)4 IOException (java.io.IOException)3 Partition (org.apache.hadoop.hive.metastore.api.Partition)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)3 FileSystem (org.apache.hadoop.fs.FileSystem)2 GetValidWriteIdsRequest (org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)2 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)2 OrcStruct (org.apache.hadoop.hive.ql.io.orc.OrcStruct)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2