Examples with TxnStore - org.apache.hadoop.hive.metastore.txn.TxnStore

Example 1 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testTargetDbReplIncompatible.

private void testTargetDbReplIncompatible(boolean setReplIncompProp) throws Throwable {
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    primary.run("use " + primaryDbName).run("CREATE TABLE t1(a string) STORED AS TEXTFILE").dump(primaryDbName);
    replica.load(replicatedDbName, primaryDbName);
    if (setReplIncompProp) {
        replica.run("ALTER DATABASE " + replicatedDbName + " SET DBPROPERTIES('" + ReplConst.REPL_INCOMPATIBLE + "'='false')");
        assert "false".equals(replica.getDatabase(replicatedDbName).getParameters().get(ReplConst.REPL_INCOMPATIBLE));
    }
    assertFalse(MetaStoreUtils.isDbReplIncompatible(replica.getDatabase(replicatedDbName)));
    Long sourceTxnId = openTxns(1, txnHandler, primaryConf).get(0);
    txnHandler.abortTxn(new AbortTxnRequest(sourceTxnId));
    try {
        sourceTxnId = openTxns(1, txnHandler, primaryConf).get(0);
        primary.dump(primaryDbName);
        replica.load(replicatedDbName, primaryDbName);
        assertFalse(MetaStoreUtils.isDbReplIncompatible(replica.getDatabase(replicatedDbName)));
        Long targetTxnId = txnHandler.getTargetTxnId(HiveUtils.getReplPolicy(replicatedDbName), sourceTxnId);
        txnHandler.abortTxn(new AbortTxnRequest(targetTxnId));
        assertTrue(MetaStoreUtils.isDbReplIncompatible(replica.getDatabase(replicatedDbName)));
        WarehouseInstance.Tuple dumpData = primary.dump(primaryDbName);
        assertFalse(ReplUtils.failedWithNonRecoverableError(new Path(dumpData.dumpLocation), conf));
        replica.loadFailure(replicatedDbName, primaryDbName, null, ErrorMsg.REPL_INCOMPATIBLE_EXCEPTION.getErrorCode());
        assertTrue(ReplUtils.failedWithNonRecoverableError(new Path(dumpData.dumpLocation), conf));
        primary.dumpFailure(primaryDbName);
        assertTrue(ReplUtils.failedWithNonRecoverableError(new Path(dumpData.dumpLocation), conf));
    } finally {
        txnHandler.abortTxn(new AbortTxnRequest(sourceTxnId));
    }
}

Also used : Path(org.apache.hadoop.fs.Path) HiveConf(org.apache.hadoop.hive.conf.HiveConf) AbortTxnRequest(org.apache.hadoop.hive.metastore.api.AbortTxnRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore)

Example 2 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testCompleteFailoverWithReverseBootstrap.

@Test
public void testCompleteFailoverWithReverseBootstrap() throws Throwable {
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
    WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
    // This dump is not failover ready as target db can be used for replication only after first incremental load.
    FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
    Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    Database db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(db));
    primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    /**
     *Open transactions can be of two types:
     *     Case 1) Txns that have not acquired HIVE LOCKS or they belong to different db: These txns would be captured in
     *     _failovermetadata file inside dump directory.
     *     Case 2) Txns that have acquired HIVE LOCKS and belong to db under replication: These txns would be aborted by hive
     *     as part of dump operation.
     */
    // Open 3 txns for Database which is not under replication
    int numTxnsForSecDb = 3;
    List<Long> txnsForSecDb = openTxns(numTxnsForSecDb, txnHandler, primaryConf);
    // Allocate write ids for both tables of secondary db for 3 txns
    // t1=5 and t2=5
    Map<String, Long> tablesInSecDb = new HashMap<>();
    tablesInSecDb.put("t1", (long) numTxnsForSecDb);
    tablesInSecDb.put("t2", (long) numTxnsForSecDb);
    List<Long> lockIdsForSecDb = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txnsForSecDb, primaryConf);
    // Open 2 txns for Primary Db
    int numTxnsForPrimaryDb = 2;
    List<Long> txnsForPrimaryDb = openTxns(numTxnsForPrimaryDb, txnHandler, primaryConf);
    // Allocate write ids for both tables of primary db for 2 txns
    // t1=5 and t2=5
    Map<String, Long> tablesInPrimaryDb = new HashMap<>();
    tablesInPrimaryDb.put("t1", (long) numTxnsForPrimaryDb + 1);
    tablesInPrimaryDb.put("t2", (long) numTxnsForPrimaryDb + 2);
    List<Long> lockIdsForPrimaryDb = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tablesInPrimaryDb, txnHandler, txnsForPrimaryDb, primaryConf);
    // Open 1 txn with no hive locks acquired
    List<Long> txnsWithNoLocks = openTxns(1, txnHandler, primaryConf);
    dumpData = primary.dump(primaryDbName, failoverConfigs);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    assertTrue(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    FailoverMetaData failoverMD = new FailoverMetaData(dumpPath, conf);
    List<Long> openTxns = failoverMD.getOpenTxns();
    List<Long> txnsAborted = failoverMD.getAbortedTxns();
    assertTrue(txnsAborted.size() == 2);
    assertTrue(txnsAborted.containsAll(txnsForPrimaryDb));
    assertTrue(openTxns.size() == 4);
    assertTrue(openTxns.containsAll(txnsForSecDb));
    assertTrue(openTxns.containsAll(txnsWithNoLocks));
    assertTrue(failoverMD.getTxnsWithoutLock().equals(txnsWithNoLocks));
    // TxnsForPrimaryDb and txnsWithNoLocks would have been aborted by dump operation.
    verifyAllOpenTxnsAborted(txnsForPrimaryDb, primaryConf);
    verifyAllOpenTxnsNotAborted(txnsForSecDb, primaryConf);
    verifyAllOpenTxnsNotAborted(txnsWithNoLocks, primaryConf);
    // Abort the txns
    txnHandler.abortTxns(new AbortTxnsRequest(txnsForSecDb));
    txnHandler.abortTxns(new AbortTxnsRequest(txnsWithNoLocks));
    verifyAllOpenTxnsAborted(txnsForSecDb, primaryConf);
    verifyAllOpenTxnsAborted(txnsWithNoLocks, primaryConf);
    releaseLocks(txnHandler, lockIdsForSecDb);
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    Path dbRootDir = new Path(dumpData.dumpLocation).getParent();
    long prevDumpDirModifTime = getLatestDumpDirModifTime(dbRootDir);
    primary.run("REPL DUMP " + primaryDbName + " with ('" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "' = 'true')");
    Assert.assertEquals(dumpData.dumpLocation, ReplUtils.getLatestDumpPath(dbRootDir, conf).toString());
    Assert.assertEquals(prevDumpDirModifTime, getLatestDumpDirModifTime(dbRootDir));
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    primary.run("drop database if exists " + primaryDbName + " cascade");
    assertTrue(primary.getDatabase(primaryDbName) == null);
    assertFalse(ReplChangeManager.isSourceOfReplication(replica.getDatabase(replicatedDbName)));
    WarehouseInstance.Tuple reverseDumpData = replica.run("create table t3 (id int)").run("insert into t2 partition(name='Bob') values(20)").run("insert into t3 values (2)").dump(replicatedDbName);
    assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(MetaStoreUtils.isTargetOfReplication(db));
    primary.load(primaryDbName, replicatedDbName).run("use " + primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + primaryDbName).verifyResult(reverseDumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11", "20" }).run("select id from t3").verifyResults(new String[] { "2" });
    Database primaryDb = primary.getDatabase(primaryDbName);
    assertFalse(primaryDb == null);
    assertTrue(ReplUtils.isFirstIncPending(primaryDb.getParameters()));
    assertTrue(MetaStoreUtils.isTargetOfReplication(primaryDb));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primaryDb));
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
    assertFalse(ReplChangeManager.isSourceOfReplication(primaryDb));
    assertTrue(ReplChangeManager.isSourceOfReplication(replica.getDatabase(replicatedDbName)));
    reverseDumpData = replica.run("insert into t3 values (3)").run("insert into t2 partition(name='Bob') values(30)").dump(replicatedDbName);
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(replica.getDatabase(replicatedDbName)));
    primary.load(primaryDbName, replicatedDbName).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11", "20", "30" }).run("select id from t3").verifyResults(new String[] { "2", "3" }).run("repl status " + primaryDbName).verifyResult(reverseDumpData.lastReplicationId);
    assertFalse(ReplUtils.isFirstIncPending(primary.getDatabase(primaryDbName).getParameters()));
}

Also used : Path(org.apache.hadoop.fs.Path) FailoverMetaData(org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData) HashMap(java.util.HashMap) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) AbortTxnsRequest(org.apache.hadoop.hive.metastore.api.AbortTxnsRequest) FileSystem(org.apache.hadoop.fs.FileSystem) Database(org.apache.hadoop.hive.metastore.api.Database) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 3 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsTimeout.

@Test
public void testAcidTablesBootstrapWithOpenTxnsTimeout() throws Throwable {
    int numTxns = 5;
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    // Open 5 txns
    List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
    // Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
    primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    // Allocate write ids for both tables t1 and t2 for all txns
    // t1=5+1(insert) and t2=5+2(insert)
    Map<String, Long> tables = new HashMap<>();
    tables.put("t1", numTxns + 1L);
    tables.put("t2", numTxns + 2L);
    List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tables, txnHandler, txns, primaryConf);
    // Bootstrap dump with open txn timeout as 1s.
    List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='1s'");
    WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
    // After bootstrap dump, all the opened txns should be aborted. Verify it.
    verifyAllOpenTxnsAborted(txns, primaryConf);
    // Release the locks
    releaseLocks(txnHandler, lockIds);
    verifyNextId(tables, primaryDbName, primaryConf);
    // Bootstrap load which should also replicate the aborted write ids on both tables.
    HiveConf replicaConf = replica.getConf();
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
    // Verify if HWM is properly set after REPL LOAD
    verifyNextId(tables, replicatedDbName, replicaConf);
    // Verify if all the aborted write ids are replicated to the replicated DB
    for (Map.Entry<String, Long> entry : tables.entrySet()) {
        entry.setValue((long) numTxns);
    }
    verifyWriteIdsForTables(tables, replicaConf, replicatedDbName);
    // Verify if entries added in COMPACTION_QUEUE for each table/partition
    // t1-> 1 entry and t2-> 2 entries (1 per partition)
    tables.clear();
    tables.put("t1", 1L);
    tables.put("t2", 2L);
    verifyCompactionQueue(tables, replicatedDbName, replicaConf);
}

Also used : HashMap(java.util.HashMap) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 4 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTablesBootstrap method testAcidTablesBootstrapDuringIncrementalWithOpenTxnsTimeout.

@Test
public void testAcidTablesBootstrapDuringIncrementalWithOpenTxnsTimeout() throws Throwable {
    // Take a dump without ACID tables
    WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause);
    LOG.info(testName.getMethodName() + ": loading dump without acid tables.");
    replica.load(replicatedDbName, primaryDbName);
    // Open concurrent transactions, create data for incremental and take an incremental dump
    // with ACID table bootstrap.
    int numTxns = 5;
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    // Open 5 txns
    List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
    prepareIncNonAcidData(primaryDbName);
    prepareIncAcidData(primaryDbName);
    // Allocate write ids for tables t1 and t2 for all txns
    // t1=5+2(insert) and t2=5+6(insert, alter add column), now alter also creates a transaction
    Map<String, Long> tables = new HashMap<>();
    tables.put("t1", numTxns + 2L);
    tables.put("t2", numTxns + 6L);
    List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tables, txnHandler, txns, primaryConf);
    // Bootstrap dump with open txn timeout as 1s.
    List<String> withConfigs = new LinkedList<>(dumpWithAcidBootstrapClause);
    withConfigs.add("'hive.repl.bootstrap.dump.open.txn.timeout'='1s'");
    WarehouseInstance.Tuple incDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
    // After bootstrap dump, all the opened txns should be aborted. Verify it.
    verifyAllOpenTxnsAborted(txns, primaryConf);
    releaseLocks(txnHandler, lockIds);
    verifyNextId(tables, primaryDbName, primaryConf);
    // Incremental load with ACID bootstrap should also replicate the aborted write ids on
    // tables t1 and t2
    HiveConf replicaConf = replica.getConf();
    LOG.info(testName.getMethodName() + ": loading incremental dump with ACID bootstrap.");
    replica.load(replicatedDbName, primaryDbName);
    verifyIncLoad(replicatedDbName, incDump.lastReplicationId);
    // Verify if HWM is properly set after REPL LOAD
    verifyNextId(tables, replicatedDbName, replicaConf);
    // Verify if all the aborted write ids are replicated to the replicated DB
    for (Map.Entry<String, Long> entry : tables.entrySet()) {
        entry.setValue((long) numTxns);
    }
    verifyWriteIdsForTables(tables, replicaConf, replicatedDbName);
    // Verify if entries added in COMPACTION_QUEUE for each table/partition
    // t1-> 1 entry and t2-> 2 entries (1 per partition)
    tables.clear();
    tables.put("t1", 1L);
    tables.put("t2", 4L);
    verifyCompactionQueue(tables, replicatedDbName, replicaConf);
}

Also used : HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 5 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestCrudCompactorOnTez method testMinorCompactionShouldBeRefusedOnTablesWithOriginalFiles.

@Test
public void testMinorCompactionShouldBeRefusedOnTablesWithOriginalFiles() throws Exception {
    conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
    // Set delta numbuer threshold to 2 to avoid skipping compaction because of too few deltas
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 2);
    // Set delta percentage to a high value to suppress selecting major compression based on that
    conf.setFloatVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD, 1000f);
    final String dbName = "default";
    final String tableName = "compaction_test";
    executeStatementOnDriver("drop table if exists " + tableName, driver);
    executeStatementOnDriver("CREATE TABLE " + tableName + "(id string, value string) CLUSTERED BY(id) " + "INTO 10 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='false')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('1','one'),('2','two'),('3','three')," + "('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten')," + "('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen')," + "('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty')", driver);
    executeStatementOnDriver("alter table " + tableName + " set TBLPROPERTIES('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('21', 'value21'),('84', 'value84')," + "('66', 'value66'),('54', 'value54')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('22', 'value22'),('34', 'value34')," + "('35', 'value35')", driver);
    executeStatementOnDriver("insert into " + tableName + " values ('75', 'value75'),('99', 'value99')", driver);
    execSelectAndDumpData("select * from " + tableName, driver, "Dumping data for " + tableName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    // Prevent initiator from submitting the compaction requests
    TxnStore mockedHandler = spy(txnHandler);
    doThrow(new RuntimeException("")).when(mockedHandler).compact(nullable(CompactionRequest.class));
    Initiator initiator = new Initiator();
    initiator.setConf(conf);
    initiator.init(new AtomicBoolean(true));
    FieldSetter.setField(initiator, MetaStoreCompactorThread.class.getDeclaredField("txnHandler"), mockedHandler);
    // Run initiator and capture compaction requests
    initiator.run();
    // Check captured compaction request and if the type for the table was MAJOR
    ArgumentCaptor<CompactionRequest> requests = ArgumentCaptor.forClass(CompactionRequest.class);
    verify(mockedHandler).compact(requests.capture());
    Assert.assertTrue(requests.getAllValues().stream().anyMatch(r -> r.getTablename().equals(tableName) && r.getType().equals(CompactionType.MAJOR)));
    // Try to do a minor compaction directly
    CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MINOR);
    txnHandler.compact(rqst);
    runWorker(conf);
    // Check if both compactions were failed with the expected error message
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    if (2 != compacts.size()) {
        Assert.fail("Expecting 2 rows and found " + compacts.size() + " files " + compacts);
    }
    Assert.assertEquals("did not initiate", compacts.get(0).getState());
    Assert.assertTrue(compacts.get(0).getErrorMessage().startsWith("Caught exception while trying to determine if we should compact"));
    Assert.assertEquals("refused", compacts.get(1).getState());
    Assert.assertTrue(compacts.get(1).getErrorMessage().startsWith("Query based Minor compaction is not possible for full acid tables having raw format (non-acid) data in them."));
}

Also used : DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) TestHiveProtoLoggingHook(org.apache.hadoop.hive.ql.hooks.TestHiveProtoLoggingHook) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) TxnCommandsBaseForTests.runWorker(org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker) Arrays(java.util.Arrays) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) HiveProtoLoggingHook(org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) StringUtils(org.apache.commons.lang3.StringUtils) ExecutionMode(org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook.ExecutionMode) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AcidDirectory(org.apache.hadoop.hive.ql.io.AcidDirectory) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TestCompactor.executeStatementOnDriver(org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) RecordReaderImpl(org.apache.orc.impl.RecordReaderImpl) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) OrcProto(org.apache.orc.OrcProto) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) OrcFile(org.apache.orc.OrcFile) StripeInformation(org.apache.orc.StripeInformation) ArrayList(java.util.ArrayList) Reader(org.apache.orc.Reader) Lists(com.google.common.collect.Lists) ArgumentCaptor(org.mockito.ArgumentCaptor) Hive(org.apache.hadoop.hive.ql.metadata.Hive) ProtoMessageReader(org.apache.tez.dag.history.logging.proto.ProtoMessageReader) RecordReader(org.apache.orc.RecordReader) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) HiveHookEvents(org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents) Test(org.junit.Test) CompactorTestUtil.executeStatementOnDriverAndReturnResults(org.apache.hadoop.hive.ql.txn.compactor.CompactorTestUtil.executeStatementOnDriverAndReturnResults) Table(org.apache.hadoop.hive.metastore.api.Table) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Mockito(org.mockito.Mockito) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) TxnCommandsBaseForTests(org.apache.hadoop.hive.ql.TxnCommandsBaseForTests) Assert(org.junit.Assert) Collections(java.util.Collections) TestCompactor.execSelectAndDumpData(org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)61 Test (org.junit.Test)52 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)36 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)36 Path (org.apache.hadoop.fs.Path)26 FileSystem (org.apache.hadoop.fs.FileSystem)24 HiveConf (org.apache.hadoop.hive.conf.HiveConf)22 FileStatus (org.apache.hadoop.fs.FileStatus)16 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)16 Table (org.apache.hadoop.hive.metastore.api.Table)16 ArrayList (java.util.ArrayList)15 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)15 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)15 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)14 HashMap (java.util.HashMap)13 IOException (java.io.IOException)12 List (java.util.List)11 Map (java.util.Map)11 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 TxnUtils (org.apache.hadoop.hive.metastore.txn.TxnUtils)10