use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.
the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsAbortDisabled.
@Test
public void testAcidTablesBootstrapWithOpenTxnsAbortDisabled() throws Throwable {
int numTxns = 5;
HiveConf primaryConf = primary.getConf();
TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
// Open 5 txns
List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
// Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
// Allocate write ids for both tables t1 and t2 for all txns
// t1=5+1(insert) and t2=5+2(insert)
Map<String, Long> tables = new HashMap<>();
tables.put("t1", numTxns + 1L);
tables.put("t2", numTxns + 2L);
List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tables, txnHandler, txns, primaryConf);
// Bootstrap dump with open txn timeout as 1s.
List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='1s'", "'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_ABORT_WRITE_TXN_AFTER_TIMEOUT + "'='false'");
try {
WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
} catch (Exception e) {
Assert.assertEquals("REPL DUMP cannot proceed. Force abort all the open txns is disabled. Enable " + "hive.repl.bootstrap.dump.abort.write.txn.after.timeout to proceed.", e.getMessage());
}
// After bootstrap dump, all the opened txns should not be aborted as it belongs to diff db. Verify it.
verifyAllOpenTxnsNotAborted(txns, primaryConf);
// Abort the txns
txnHandler.abortTxns(new AbortTxnsRequest(txns));
verifyAllOpenTxnsAborted(txns, primaryConf);
// Release the locks
releaseLocks(txnHandler, lockIds);
}
use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.
the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsDiffDb.
@Test
public void testAcidTablesBootstrapWithOpenTxnsDiffDb() throws Throwable {
int numTxns = 5;
HiveConf primaryConf = primary.getConf();
TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
// Open 5 txns
List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
// Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
// Allocate write ids for both tables of secondary db for all txns
// t1=5 and t2=5
Map<String, Long> tablesInSecDb = new HashMap<>();
tablesInSecDb.put("t1", (long) numTxns);
tablesInSecDb.put("t2", (long) numTxns);
List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txns, primaryConf);
// Bootstrap dump with open txn timeout as 300s.
// Since transactions belong to different db it won't wait.
List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='300s'");
long timeStarted = System.currentTimeMillis();
WarehouseInstance.Tuple bootstrapDump = null;
try {
bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
} finally {
// Dump shouldn't wait for 300s. It should check in the 30 secs itself that those txns belong to different db
Assert.assertTrue(System.currentTimeMillis() - timeStarted < 300000);
}
// After bootstrap dump, all the opened txns should not be aborted as itr belongs to a diff db. Verify it.
verifyAllOpenTxnsNotAborted(txns, primaryConf);
Map<String, Long> tablesInPrimary = new HashMap<>();
tablesInPrimary.put("t1", 1L);
tablesInPrimary.put("t2", 2L);
verifyNextId(tablesInPrimary, primaryDbName, primaryConf);
// Bootstrap load which should not replicate the write ids on both tables as they are on different db.
HiveConf replicaConf = replica.getConf();
replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
// Verify if HWM is properly set after REPL LOAD
verifyNextId(tablesInPrimary, replicatedDbName, replicaConf);
// Verify if none of the write ids are not replicated to the replicated DB as they belong to diff db
for (Map.Entry<String, Long> entry : tablesInPrimary.entrySet()) {
entry.setValue((long) 0);
}
verifyWriteIdsForTables(tablesInPrimary, replicaConf, replicatedDbName);
// Abort the txns
txnHandler.abortTxns(new AbortTxnsRequest(txns));
verifyAllOpenTxnsAborted(txns, primaryConf);
// Release the locks
releaseLocks(txnHandler, lockIds);
}
use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.
the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithOpenTxnsPrimaryAndSecondaryDb.
@Test
public void testAcidTablesBootstrapWithOpenTxnsPrimaryAndSecondaryDb() throws Throwable {
int numTxns = 5;
HiveConf primaryConf = primary.getConf();
TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
// Open 5 txns for secondary db
List<Long> txns = openTxns(numTxns, txnHandler, primaryConf);
// Open 5 txns for primary db
List<Long> txnsSameDb = openTxns(numTxns, txnHandler, primaryConf);
// Create 2 tables, one partitioned and other not. Also, have both types of full ACID and MM tables.
primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
// Allocate write ids for both tables of secondary db for all txns
// t1=5 and t2=5
Map<String, Long> tablesInSecDb = new HashMap<>();
tablesInSecDb.put("t1", (long) numTxns);
tablesInSecDb.put("t2", (long) numTxns);
List<Long> lockIds = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txns, primaryConf);
// Allocate write ids for both tables of primary db for all txns
// t1=5+1L and t2=5+2L inserts
Map<String, Long> tablesInPrimDb = new HashMap<>();
tablesInPrimDb.put("t1", (long) numTxns + 1L);
tablesInPrimDb.put("t2", (long) numTxns + 2L);
lockIds.addAll(allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tablesInPrimDb, txnHandler, txnsSameDb, primaryConf));
// Bootstrap dump with open txn timeout as 1s.
List<String> withConfigs = Arrays.asList("'" + HiveConf.ConfVars.REPL_BOOTSTRAP_DUMP_OPEN_TXN_TIMEOUT + "'='1s'");
WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName).dump(primaryDbName, withConfigs);
// After bootstrap dump, all the opened txns should not be aborted as it belongs to a diff db. Verify it.
verifyAllOpenTxnsNotAborted(txns, primaryConf);
// After bootstrap dump, all the opened txns should be aborted as it belongs to db under replication. Verify it.
verifyAllOpenTxnsAborted(txnsSameDb, primaryConf);
verifyNextId(tablesInPrimDb, primaryDbName, primaryConf);
// Bootstrap load which should replicate the write ids on both tables as they are on same db and
// not on different db.
HiveConf replicaConf = replica.getConf();
replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
// Verify if HWM is properly set after REPL LOAD
verifyNextId(tablesInPrimDb, replicatedDbName, replicaConf);
// Verify if only the write ids belonging to primary db are replicated to the replicated DB.
for (Map.Entry<String, Long> entry : tablesInPrimDb.entrySet()) {
entry.setValue((long) numTxns);
}
verifyWriteIdsForTables(tablesInPrimDb, replicaConf, replicatedDbName);
// Abort the txns for secondary db
txnHandler.abortTxns(new AbortTxnsRequest(txns));
verifyAllOpenTxnsAborted(txns, primaryConf);
// Release the locks
releaseLocks(txnHandler, lockIds);
}
use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.
the class TestTxnNoBuckets method testNonAcidToAcidVectorzied.
/**
* Tests to check that we are able to use vectorized acid reader,
* VectorizedOrcAcidRowBatchReader, when reading "original" files,
* i.e. those that were written before the table was converted to acid.
* See also acid_vectorization_original*.q
*/
@Test
public void testNonAcidToAcidVectorzied() throws Exception {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
// this enables vectorization of ROW__ID
// HIVE-12631
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')");
int[][] values = { { 1, 2 }, { 2, 4 }, { 5, 6 }, { 6, 8 }, { 9, 10 } };
runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values));
// , 'transactional_properties'='default'
runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional'='true')");
// Execution mode: vectorized
// this uses VectorizedOrcAcidRowBatchReader
String query = "select a from T where b > 6 order by a";
List<String> rs = runStatementOnDriver(query);
String[][] expected = { { "6", "" }, { "9", "" } };
checkExpected(rs, expected, "After conversion");
Assert.assertEquals(Integer.toString(6), rs.get(0));
Assert.assertEquals(Integer.toString(9), rs.get(1));
assertVectorized(shouldVectorize(), query);
// why isn't PPD working.... - it is working but storage layer doesn't do row level filtering; only row group level
// this uses VectorizedOrcAcidRowBatchReader
query = "select ROW__ID, a from T where b > 6 order by a";
rs = runStatementOnDriver(query);
String[][] expected1 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
checkExpected(rs, expected1, "After conversion with VC1");
assertVectorized(shouldVectorize(), query);
// this uses VectorizedOrcAcidRowBatchReader
query = "select ROW__ID, a from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected2 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9" } };
checkExpected(rs, expected2, "After conversion with VC2");
assertVectorized(shouldVectorize(), query);
// doesn't vectorize (uses neither of the Vectorzied Acid readers)
query = "select ROW__ID, a, INPUT__FILE__NAME from T where b > 6 order by a";
rs = runStatementOnDriver(query);
Assert.assertEquals("", 2, rs.size());
String[][] expected3 = { { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0" } };
checkExpected(rs, expected3, "After non-vectorized read");
Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
// vectorized because there is INPUT__FILE__NAME
assertVectorized(false, query);
runStatementOnDriver("update T set b = 17 where a = 1");
// this should use VectorizedOrcAcidRowReader
query = "select ROW__ID, b from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected4 = { { "{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}", "17" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "4" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "6" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "8" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "10" } };
checkExpected(rs, expected4, "After conversion with VC4");
assertVectorized(shouldVectorize(), query);
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
// this should not vectorize at all
query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
rs = runStatementOnDriver(query);
String[][] expected5 = { // the row__ids are the same after compaction
{ "{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_10000001_v0000030/bucket_00000" }, { "{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_10000001_v0000030/bucket_00000" } };
checkExpected(rs, expected5, "After major compaction");
// vectorized because there is INPUT__FILE__NAME
assertVectorized(false, query);
}
use of org.apache.hadoop.hive.metastore.txn.TxnStore in project hive by apache.
the class TestTxnCommands method acidVersionTest.
private void acidVersionTest(boolean enableVersionFile) throws Exception {
boolean originalEnableVersionFile = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, enableVersionFile);
hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true");
// Need to close the thread local Hive object so that configuration change is reflected to HMS.
Hive.closeCurrent();
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T (a int, b int) stored as orc");
int[][] data = { { 1, 2 } };
// create 1 delta file bucket_00000
runStatementOnDriver("insert into T" + makeValuesClause(data));
runStatementOnDriver("update T set a=3 where b=2");
FileSystem fs = FileSystem.get(hiveConf);
RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
runStatementOnDriver("alter table T compact 'minor'");
runWorker(hiveConf);
// Check status of compaction job
TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
Assert.assertTrue(resp.getCompacts().get(0).getType().equals(CompactionType.MINOR));
// Check the files after minor compaction
files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX });
runStatementOnDriver("insert into T" + makeValuesClause(data));
runStatementOnDriver("alter table T compact 'major'");
runWorker(hiveConf);
// Check status of compaction job
txnHandler = TxnUtils.getTxnStore(hiveConf);
resp = txnHandler.showCompact(new ShowCompactRequest());
Assert.assertEquals("Unexpected number of compactions in history", 2, resp.getCompactsSize());
Assert.assertEquals("Unexpected 1 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(1).getState());
Assert.assertTrue(resp.getCompacts().get(1).getHadoopJobId().startsWith("job_local"));
// Check the files after major compaction
files = fs.listFiles(new Path(getWarehouseDir(), "t"), true);
CompactorTestUtilities.checkAcidVersion(files, fs, enableVersionFile, new String[] { AcidUtils.DELTA_PREFIX, AcidUtils.DELETE_DELTA_PREFIX, AcidUtils.BASE_PREFIX });
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_WRITE_ACID_VERSION_FILE, originalEnableVersionFile);
}
Aggregations