Search in sources :

Example 31 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsAbortDisabled.

@Test
public void testAcidTablesBootstrapWithOpenTxnsAbortDisabled() throws Throwable {
    int numTxns = 5;
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    // Open 5 txns
    List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
    // Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
    primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    // Allocate write ids for both tables t1 and t2 for all txns
    // t1=5+1(insert) and t2=5+2(insert)
    Map<String, Long> tables = new HashMap<>();
    tables.put("t1", numTxns + 1L);
    tables.put("t2", numTxns + 2L);
    List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tables, txnHandler, txns, primaryConf);
    // Bootstrap dump with open txn timeout as 1s.
    List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='1s'", "'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_ABORT_WRITE_TXN_AFTER_TIMEOUT + "'='false'");
    try {
        WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
    } catch (Exception e) {
        Assert.assertEquals("REPL DUMP cannot proceed. Force abort all the open txns is disabled. Enable " + "hive.repl.bootstrap.dump.abort.write.txn.after.timeout to proceed.", e.getMessage());
    }
    // After bootstrap dump, all the opened txns should not be aborted as it belongs to diff db. Verify it.
    verifyAllOpenTxnsNotAborted(txns, primaryConf);
    // Abort the txns
    txnHandler.abortTxns(new AbortTxnsRequest(txns));
    verifyAllOpenTxnsAborted(txns, primaryConf);
    // Release the locks
    releaseLocks(txnHandler, lockIds);
}
Also used : HashMap(java.util.HashMap) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) AbortTxnsRequest(org.apache.hadoop.hive.metastore.api.AbortTxnsRequest) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 32 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsDiffDb.

@Test
public void testAcidTablesBootstrapWithOpenTxnsDiffDb() throws Throwable {
    int numTxns = 5;
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    // Open 5 txns
    List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
    // Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
    primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    // Allocate write ids for both tables of secondary db for all txns
    // t1=5 and t2=5
    Map<String, Long> tablesInSecDb = new HashMap<>();
    tablesInSecDb.put("t1", (long) numTxns);
    tablesInSecDb.put("t2", (long) numTxns);
    List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txns, primaryConf);
    // Bootstrap dump with open txn timeout as 300s.
    // Since transactions belong to different db it won't wait.
    List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='300s'");
    long timeStarted = System.currentTimeMillis();
    WarehouseInstance.Tuple bootstrapDump = null;
    try {
        bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
    } finally {
        // Dump shouldn't wait for 300s. It should check in the 30 secs itself that those txns belong to different db
        Assert.assertTrue(System.currentTimeMillis() - timeStarted < 300000);
    }
    // After bootstrap dump, all the opened txns should not be aborted as itr belongs to a diff db. Verify it.
    verifyAllOpenTxnsNotAborted(txns, primaryConf);
    Map<String, Long> tablesInPrimary = new HashMap<>();
    tablesInPrimary.put("t1", 1L);
    tablesInPrimary.put("t2", 2L);
    verifyNextId(tablesInPrimary, primaryDbName, primaryConf);
    // Bootstrap load which should not replicate the write ids on both tables as they are on different db.
    HiveConf replicaConf = replica.getConf();
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
    // Verify if HWM is properly set after REPL LOAD
    verifyNextId(tablesInPrimary, replicatedDbName, replicaConf);
    // Verify if none of the write ids are not replicated to the replicated DB as they belong to diff db
    for (Map.Entry<String, Long> entry : tablesInPrimary.entrySet()) {
        entry.setValue((long) 0);
    }
    verifyWriteIdsForTables(tablesInPrimary, replicaConf, replicatedDbName);
    // Abort the txns
    txnHandler.abortTxns(new AbortTxnsRequest(txns));
    verifyAllOpenTxnsAborted(txns, primaryConf);
    // Release the locks
    releaseLocks(txnHandler, lockIds);
}
Also used : HashMap(java.util.HashMap) AbortTxnsRequest(org.apache.hadoop.hive.metastore.api.AbortTxnsRequest) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 33 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsPrimaryAndSecondaryDb.

@Test
public void testAcidTablesBootstrapWithOpenTxnsPrimaryAndSecondaryDb() throws Throwable {
    int numTxns = 5;
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    // Open 5 txns for secondary db
    List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
    // Open 5 txns for primary db
    List<Long> txnsSameDb = openTxns(numTxns, txnHandler, primaryConf);
    // Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
    primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    // Allocate write ids for both tables of secondary db for all txns
    // t1=5 and t2=5
    Map<String, Long> tablesInSecDb = new HashMap<>();
    tablesInSecDb.put("t1", (long) numTxns);
    tablesInSecDb.put("t2", (long) numTxns);
    List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txns, primaryConf);
    // Allocate write ids for both tables of primary db for all txns
    // t1=5+1L and t2=5+2L inserts
    Map<String, Long> tablesInPrimDb = new HashMap<>();
    tablesInPrimDb.put("t1", (long) numTxns + 1L);
    tablesInPrimDb.put("t2", (long) numTxns + 2L);
    lockIds.addAll(allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tablesInPrimDb, txnHandler, txnsSameDb, primaryConf));
    // Bootstrap dump with open txn timeout as 1s.
    List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='1s'");
    WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
    // After bootstrap dump, all the opened txns should not be aborted as it belongs to a diff db. Verify it.
    verifyAllOpenTxnsNotAborted(txns, primaryConf);
    // After bootstrap dump, all the opened txns should be aborted as it belongs to db under replication. Verify it.
    verifyAllOpenTxnsAborted(txnsSameDb, primaryConf);
    verifyNextId(tablesInPrimDb, primaryDbName, primaryConf);
    // Bootstrap load which should replicate the write ids on both tables as they are on same db and
    // not on different db.
    HiveConf replicaConf = replica.getConf();
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
    // Verify if HWM is properly set after REPL LOAD
    verifyNextId(tablesInPrimDb, replicatedDbName, replicaConf);
    // Verify if only the write ids belonging to primary db are replicated to the replicated DB.
    for (Map.Entry<String, Long> entry : tablesInPrimDb.entrySet()) {
        entry.setValue((long) numTxns);
    }
    verifyWriteIdsForTables(tablesInPrimDb, replicaConf, replicatedDbName);
    // Abort the txns for secondary db
    txnHandler.abortTxns(new AbortTxnsRequest(txns));
    verifyAllOpenTxnsAborted(txns, primaryConf);
    // Release the locks
    releaseLocks(txnHandler, lockIds);
}
Also used : HashMap(java.util.HashMap) AbortTxnsRequest(org.apache.hadoop.hive.metastore.api.AbortTxnsRequest) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 34 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnNoBuckets method testNonAcidToAcidVectorzied.

/**
 * Tests to check that we are able to use vectorized acid reader,
 * VectorizedOrcAcidRowBatchReader, when reading "original" files,
 * i.e. those that were written before the table was converted to acid.
 * See also acid_vectorization_original*.q
 */
@Test
public void testNonAcidToAcidVectorzied() throws Exception {
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
    hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
    // this enables vectorization of ROW__ID
    // HIVE-12631
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')");
    int[][] values = { { 1, 2 }, { 2, 4 }, { 5, 6 }, { 6, 8 }, { 9, 10 } };
    runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values));
    // , 'transactional_properties'='default'
    runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')");
    // Execution mode: vectorized
    // this uses VectorizedOrcAcidRowBatchReader
    String query = "select a from T where b > 6 order by a";
    List<String> rs = runStatementOnDriver(query);
    String[][] expected = { { "6", "" }, { "9", "" } };
    checkExpected(rs, expected, "After conversion");
    Assert.assertEquals(Integer.toString(6), rs.get(0));
    Assert.assertEquals(Integer.toString(9), rs.get(1));
    assertVectorized(shouldVectorize(), query);
    // why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected1 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected1, "After conversion with VC1");
    assertVectorized(shouldVectorize(), query);
    // this uses VectorizedOrcAcidRowBatchReader
    query = "select ROW__ID, a from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected2 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
    checkExpected(rs, expected2, "After conversion with VC2");
    assertVectorized(shouldVectorize(), query);
    // doesn't vectorize (uses neither of the Vectorzied Acid readers)
    query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a";
    rs = runStatementOnDriver(query);
    Assert.assertEquals("", 2, rs.size());
    String[][] expected3 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0" } };
    checkExpected(rs, expected3, "After non-vectorized read");
    Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
    runStatementOnDriver("update T set b = 17 where a = 1");
    // this should use VectorizedOrcAcidRowReader
    query = "select ROW__ID, b from T where b > 0 order by a";
    rs = runStatementOnDriver(query);
    String[][] expected4 = { { "{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}", "17" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "4" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "8" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "10" } };
    checkExpected(rs, expected4, "After conversion with VC4");
    assertVectorized(shouldVectorize(), query);
    runStatementOnDriver("alter table T compact 'major'");
    TestTxnCommands2.runWorker(hiveConf);
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    // this should not vectorize at all
    query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
    rs = runStatementOnDriver(query);
    String[][] expected5 = { // the row__ids are the same after compaction
    { "{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_10000001_v0000030/bucket_00000" } };
    checkExpected(rs, expected5, "After major compaction");
    // vectorized because there is INPUT__FILE__NAME
    assertVectorized(false, query);
}
Also used : ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 35 with TxnStore

use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.

the class TestTxnCommands method acidVersionTest.

private void acidVersionTest(boolean enableVersionFile) throws Exception {
    boolean originalEnableVersionFile = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE);
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, enableVersionFile);
    hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true");
    // Need to close the thread local Hive object so that configuration change is reflected to HMS.
    Hive.closeCurrent();
    runStatementOnDriver("drop table if exists T");
    runStatementOnDriver("create table T (a int, b int) stored as orc");
    int[][] data = { { 1, 2 } };
    // create 1 delta file bucket_00000
    runStatementOnDriver("insert into T" + makeValuesClause(data));
    runStatementOnDriver("update T set a=3 where b=2");
    FileSystem fs = FileSystem.get(hiveConf);
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
    CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
    runStatementOnDriver("alter table T compact 'minor'");
    runWorker(hiveConf);
    // Check status of compaction job
    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
    Assert.assertTrue(resp.getCompacts().get(0).getType().equals(CompactionType.MINOR));
    // Check the files after minor compaction
    files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
    CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
    runStatementOnDriver("insert into T" + makeValuesClause(data));
    runStatementOnDriver("alter table T compact 'major'");
    runWorker(hiveConf);
    // Check status of compaction job
    txnHandler = TxnUtils.getTxnStore(hiveConf);
    resp = txnHandler.showCompact(new ShowCompactRequest());
    Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
    Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
    Assert.assertTrue(resp.getCompacts().get(1).getHadoopJobId().startsWith("job_local"));
    // Check the files after major compaction
    files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
    CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX, AcidUtils.BASE_PREFIX });
    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, originalEnableVersionFile);
}
Also used : Path(org.apache.hadoop.fs.Path) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore)

Aggregations

TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)61 Test (org.junit.Test)52 ShowCompactRequest (org.apache.hadoop.hive.metastore.api.ShowCompactRequest)36 ShowCompactResponse (org.apache.hadoop.hive.metastore.api.ShowCompactResponse)36 Path (org.apache.hadoop.fs.Path)26 FileSystem (org.apache.hadoop.fs.FileSystem)24 HiveConf (org.apache.hadoop.hive.conf.HiveConf)22 FileStatus (org.apache.hadoop.fs.FileStatus)16 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)16 Table (org.apache.hadoop.hive.metastore.api.Table)16 ArrayList (java.util.ArrayList)15 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)15 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)15 ShowCompactResponseElement (org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)14 HashMap (java.util.HashMap)13 IOException (java.io.IOException)12 List (java.util.List)11 Map (java.util.Map)11 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)10 TxnUtils (org.apache.hadoop.hive.metastore.txn.TxnUtils)10