Search in sources :

Example 11 with AcidDirectory

use of org.apache.hadoop.hive.ql.io.AcidDirectory in project hive by apache.

the class TestStreaming method checkDataWritten2.

/**
 * @param validationQuery query to read from table to compare data against {@code records}
 * @param records         expected data.  each row is CVS list of values
 */
private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles, String validationQuery, boolean vectorize, String... records) throws Exception {
    AcidDirectory dir = AcidUtils.getAcidState(null, partitionPath, conf, getTransactionContext(conf), null, false);
    Assert.assertEquals(0, dir.getObsolete().size());
    Assert.assertEquals(0, dir.getOriginalFiles().size());
    List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
    System.out.println("Files found: ");
    for (AcidUtils.ParsedDelta pd : current) {
        System.out.println(pd.getPath().toString());
    }
    Assert.assertEquals(numExpectedFiles, current.size());
    // find the absolute minimum transaction
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta pd : current) {
        if (pd.getMaxWriteId() > max) {
            max = pd.getMaxWriteId();
        }
        if (pd.getMinWriteId() < min) {
            min = pd.getMinWriteId();
        }
    }
    // We are doing +1, as DDL operation will also advance the write Id now.
    Assert.assertEquals(minTxn + 1, min);
    Assert.assertEquals(maxTxn + 1, max);
    boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
    if (vectorize) {
        conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    }
    String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
    for (String strategy : ((Validator.StringSet) HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator()).getExpected()) {
        // run it with each split strategy - make sure there are differences
        conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
        List<String> actualResult = queryTable(driver, validationQuery);
        for (int i = 0; i < actualResult.size(); i++) {
            Assert.assertEquals("diff at [" + i + "].  actual=" + actualResult + " expected=" + Arrays.toString(records), records[i], actualResult.get(i));
        }
    }
    conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
    conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
}
Also used : AcidDirectory(org.apache.hadoop.hive.ql.io.AcidDirectory) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Aggregations

AcidDirectory (org.apache.hadoop.hive.ql.io.AcidDirectory)11 FileSystem (org.apache.hadoop.fs.FileSystem)5 Path (org.apache.hadoop.fs.Path)5 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)5 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)4 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)3 OrcAcidUtils (org.apache.orc.impl.OrcAcidUtils)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)2 BitSet (java.util.BitSet)2 Configuration (org.apache.hadoop.conf.Configuration)2 ValidReadTxnList (org.apache.hadoop.hive.common.ValidReadTxnList)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 CompactionType (org.apache.hadoop.hive.metastore.api.CompactionType)2 Partition (org.apache.hadoop.hive.metastore.api.Partition)2 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)2 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)2