use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testMoveOptimizationIncrementalFailureAfterCopyReplace.
@Test
public void testMoveOptimizationIncrementalFailureAfterCopyReplace() throws Throwable {
String replicatedDbName_CM = replicatedDbName + "_CM";
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("create table t1 (place string) partitioned by (country string)").dump(primaryDbName);
replica.load(replicatedDbName, primaryDbName);
// delete load ack to reuse the dump
new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(tuple.dumpLocation + Path.SEPARATOR + ReplUtils.REPL_HIVE_BASE_DIR + Path.SEPARATOR + LOAD_ACKNOWLEDGEMENT.toString()), true);
replica.load(replicatedDbName_CM, primaryDbName);
replica.run("alter database " + replicatedDbName + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')").run("alter database " + replicatedDbName_CM + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')");
tuple = primary.run("use " + primaryDbName).run("insert overwrite table t1 select * from t2").dump(primaryDbName, Collections.emptyList());
testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t1", "ADD_PARTITION", tuple);
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForFunctions.
@Test
public void testBootstrapReplLoadRetryAfterFailureForFunctions() throws Throwable {
String funcName1 = "f1";
String funcName2 = "f2";
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("CREATE FUNCTION " + primaryDbName + "." + funcName1 + " as 'hivemall.tools.string.StopwordUDF' " + "using jar 'ivy://io.github.myui:hivemall:0.4.0-2'").run("CREATE FUNCTION " + primaryDbName + "." + funcName2 + " as 'hivemall.tools.string.SplitWordsUDF' " + "using jar 'ivy://io.github.myui:hivemall:0.4.0-1'").dump(primaryDbName);
// Allow create function only on f1. Create should fail for the second function.
BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {
@Override
public Boolean apply(CallerArguments args) {
injectionPathCalled = true;
if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
return false;
}
if (args.funcName != null) {
LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
return args.funcName.equals(funcName1);
}
return true;
}
};
InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
// Trigger bootstrap dump which just creates function f1 but not f2
List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'");
try {
replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
callerVerifier.assertInjectionsPerformed(true, false);
} finally {
// reset the behaviour
InjectableBehaviourObjectStore.resetCallerVerifier();
}
// Verify that only f1 got loaded
replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show functions like '" + replicatedDbName + "%'").verifyResult(replicatedDbName + "." + funcName1);
// Verify no calls to load f1 only f2.
callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {
@Override
public Boolean apply(CallerArguments args) {
injectionPathCalled = true;
if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
return false;
}
if (args.funcName != null) {
LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
return args.funcName.equals(funcName2);
}
return true;
}
};
InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
try {
// Retry with same dump with which it was already loaded should resume the bootstrap load.
// This time, it completes by adding just the function f2
replica.load(replicatedDbName, primaryDbName);
callerVerifier.assertInjectionsPerformed(true, false);
} finally {
// reset the behaviour
InjectableBehaviourObjectStore.resetCallerVerifier();
}
// Verify that both the functions are available.
replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show functions like '" + replicatedDbName + "%'").verifyResults(new String[] { replicatedDbName + "." + funcName1, replicatedDbName + "." + funcName2 });
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testReplWithRetryDisabledIterators.
@Test
public void testReplWithRetryDisabledIterators() throws Throwable {
List<String> clause = new ArrayList<>();
// NS replacement parameters has no effect when data is also copied to staging
clause.add("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET + "'='false'");
clause.add("'" + HiveConf.ConfVars.REPL_COPY_FILE_LIST_ITERATOR_RETRY + "'='false'");
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table acid_table (key int, value int) partitioned by (load_date date) " + "clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')").run("create table table1 (i String)").run("insert into table1 values (1)").run("insert into table1 values (2)").dump(primaryDbName, clause);
ReplicationTestUtils.assertFalseExternalFileList(primary, tuple.dumpLocation);
replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2" });
tuple = primary.run("use " + primaryDbName).run("insert into table1 values (3)").dump(primaryDbName, clause);
ReplicationTestUtils.assertFalseExternalFileList(primary, tuple.dumpLocation);
replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2", "3" });
clause.add("'" + HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE.varname + "'='false'");
tuple = primary.run("use " + primaryDbName).run("create external table ext_table1 (id int)").run("insert into ext_table1 values (3)").run("insert into ext_table1 values (4)").run("create external table ext_table2 (key int, value int) partitioned by (load_time timestamp)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.123') values(1,2)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.124') values(1,3)").run("show partitions ext_table2").verifyResults(new String[] { "load_time=2012-02-21 07%3A08%3A09.123", "load_time=2012-02-21 07%3A08%3A09.124" }).dump(primaryDbName, clause);
ReplicationTestUtils.assertExternalFileList(Arrays.asList("ext_table1", "ext_table2"), tuple.dumpLocation, primary);
replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1", "ext_table1", "ext_table2" }).run("select * from ext_table1").verifyResults(new String[] { "3", "4" }).run("select value from ext_table2").verifyResults(new String[] { "2", "3" });
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testMoveOptimizationBootstrapReplLoadRetryAfterFailure.
@Test
public void testMoveOptimizationBootstrapReplLoadRetryAfterFailure() throws Throwable {
String replicatedDbName_CM = replicatedDbName + "_CM";
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").dump(primaryDbName);
testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t2", "ADD_PARTITION", tuple);
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForTablesAndConstraints.
@Test
public void testBootstrapReplLoadRetryAfterFailureForTablesAndConstraints() throws Throwable {
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1(a string, b string, primary key (a, b) disable novalidate rely)").run("create table t2(a string, b string, foreign key (a, b) references t1(a, b) disable novalidate)").run("create table t3(a string, b string not null disable, unique (a) disable)").dump(primaryDbName);
// Need to drop the primary DB as metastore is shared by both primary/replica. So, constraints
// conflict when loaded. Some issue with framework which needs to be relook into later.
primary.run("drop database if exists " + primaryDbName + " cascade");
// Allow create table only on t1. Create should fail for rest of the tables and hence constraints
// also not loaded.
BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {
@Override
public Boolean apply(CallerArguments args) {
injectionPathCalled = true;
if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.constraintTblName != null)) {
LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Constraint Table: " + String.valueOf(args.constraintTblName));
return false;
}
if (args.tblName != null) {
LOG.warn("Verifier - Table: " + String.valueOf(args.tblName));
return args.tblName.equals("t1");
}
return true;
}
};
InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
// Trigger bootstrap dump which just creates table t1 and other tables (t2, t3) and constraints not loaded.
List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'");
try {
replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
callerVerifier.assertInjectionsPerformed(true, false);
} finally {
// reset the behaviour
InjectableBehaviourObjectStore.resetCallerVerifier();
}
replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null");
assertEquals(0, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
assertEquals(0, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
assertEquals(0, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
assertEquals(0, replica.getForeignKeyList(replicatedDbName, "t2").size());
// Verify if create table is not called on table t1 but called for t2 and t3.
// Also, allow constraint creation only on t1 and t3. Foreign key creation on t2 fails.
callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {
@Override
public Boolean apply(CallerArguments args) {
injectionPathCalled = true;
if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.funcName != null)) {
LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Func: " + String.valueOf(args.funcName));
return false;
}
if (args.constraintTblName != null) {
LOG.warn("Verifier - Constraint Table: " + String.valueOf(args.constraintTblName));
return (args.constraintTblName.equals("t1") || args.constraintTblName.equals("t3"));
}
return true;
}
};
InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
try {
// Retry with same dump with which it was already loaded should resume the bootstrap load.
// This time, it fails when try to load the foreign key constraints. All other constraints are loaded.
replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
callerVerifier.assertInjectionsPerformed(true, false);
} finally {
// reset the behaviour
InjectableBehaviourObjectStore.resetCallerVerifier();
}
replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show tables").verifyResults(new String[] { "t1", "t2", "t3" });
assertEquals(2, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
assertEquals(1, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
assertEquals(1, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
assertEquals(0, replica.getForeignKeyList(replicatedDbName, "t2").size());
// Verify if no create table/function calls. Only add foreign key constraints on table t2.
callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {
@Override
public Boolean apply(CallerArguments args) {
injectionPathCalled = true;
if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.tblName != null)) {
LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Table: " + String.valueOf(args.tblName));
return false;
}
if (args.constraintTblName != null) {
LOG.warn("Verifier - Constraint Table: " + String.valueOf(args.constraintTblName));
return args.constraintTblName.equals("t2");
}
return true;
}
};
InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
try {
// Retry with same dump with which it was already loaded should resume the bootstrap load.
// This time, it completes by adding just foreign key constraints for table t2.
replica.load(replicatedDbName, primaryDbName);
callerVerifier.assertInjectionsPerformed(true, false);
} finally {
// reset the behaviour
InjectableBehaviourObjectStore.resetCallerVerifier();
}
replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" });
assertEquals(2, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
assertEquals(1, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
assertEquals(1, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
assertEquals(2, replica.getForeignKeyList(replicatedDbName, "t2").size());
}
Aggregations