use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testMultipleStagesOfReplicationLoadTask.
/*
From the hive logs(hive.log) we can also check for the info statement
fgrep "Total Tasks" [location of hive.log]
each line indicates one run of loadTask.
*/
@Test
public void testMultipleStagesOfReplicationLoadTask() throws Throwable {
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into t1 values (1), (2)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='us') values ('austin')").run("insert into table t2 partition(country='france') values ('paris')").run("create table t3 (rank int)").dump(primaryDbName);
// each table creation itself takes more than one task, give we are giving a max of 1, we should hit multiple runs.
List<String> withClause = Collections.singletonList("'" + HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS.varname + "'='1'");
replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(new String[] { "france", "india", "us" });
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete.
@Test
public void testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete() throws Throwable {
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into table t1 values (10)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='uk') values ('london')").run("insert into table t2 partition(country='us') values ('sfo')").dump(primaryDbName);
replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("select id from t1").verifyResults(Collections.singletonList("10")).run("select country from t2 order by country").verifyResults(Arrays.asList("india", "uk", "us"));
String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR;
replica.verifyIfCkptSet(replicatedDbName, hiveDumpLocation);
// To retry with same dump delete the load ack
new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(hiveDumpLocation, LOAD_ACKNOWLEDGEMENT.toString()), true);
// Retry with same dump with which it was already loaded also fails.
replica.loadFailure(replicatedDbName, primaryDbName);
// To retry with same dump delete the load ack
new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(hiveDumpLocation, LOAD_ACKNOWLEDGEMENT.toString()), true);
// Retry from same dump when the database is empty is also not allowed.
replica.run("drop table t1").run("drop table t2").loadFailure(replicatedDbName, primaryDbName);
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testBootstrapFunctionOnHDFSLazyCopy.
@Test
public void testBootstrapFunctionOnHDFSLazyCopy() throws Throwable {
Dependencies dependencies = dependencies("ivy://io.github.myui:hivemall:0.4.0-2", primary);
String jarSubString = dependencies.toJarSubSql();
List<String> withClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'");
primary.run("CREATE FUNCTION " + primaryDbName + ".anotherFunction as 'hivemall.tools.string.StopwordUDF' " + "using " + jarSubString);
WarehouseInstance.Tuple tuple = primary.dump(primaryDbName, withClause);
replica.load(replicatedDbName, primaryDbName, withClause).run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'").verifyResult(replicatedDbName + ".anotherFunction");
assertFunctionJarsOnTarget("anotherFunction", dependencies.jarNames());
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testHdfsNSLazyCopyIncrExtTbls.
@Test
public void testHdfsNSLazyCopyIncrExtTbls() throws Throwable {
List<String> clause = getHdfsNameserviceClause();
clause.add("'" + HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE.varname + "'='false'");
primary.run("use " + primaryDbName).run("create table acid_table (key int, value int) partitioned by (load_time timestamp) " + "clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')").run("create table table1 (i String)").run("insert into table1 values (1)").run("insert into table1 values (2)").dump(primaryDbName);
replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2" });
Tuple tuple = primary.run("use " + primaryDbName).run("create external table ext_table1 (id int)").run("insert into ext_table1 values (3)").run("insert into ext_table1 values (4)").run("create external table ext_table2 (key int, value int) partitioned by (load_time timestamp)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.123') values(1,2)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.124') values(1,3)").run("show partitions ext_table2").verifyResults(new String[] { "load_time=2012-02-21 07%3A08%3A09.123", "load_time=2012-02-21 07%3A08%3A09.124" }).dump(primaryDbName, clause);
ReplicationTestUtils.assertExternalFileList(Arrays.asList("ext_table1", "ext_table2"), tuple.dumpLocation, primary);
// SecurityException expected from DirCopyTask
try {
replica.load(replicatedDbName, primaryDbName, clause);
Assert.fail("Expected the UnknownHostException to be thrown.");
} catch (SecurityException ex) {
assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote"));
}
}
use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.
the class TestReplicationScenariosAcrossInstances method testMultipleStagesOfReplicationLoadTaskWithPartitionBatching.
@Test
public void testMultipleStagesOfReplicationLoadTaskWithPartitionBatching() throws Throwable {
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into t1 values (1), (2)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='us') values ('austin')").run("insert into table t2 partition(country='france') values ('paris')").run("create table t3 (rank int)").dump(primaryDbName);
// each table creation itself takes more than one task, give we are giving a max of 1, we should hit multiple runs.
List<String> withClause = new ArrayList<>();
withClause.add("'" + HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS.varname + "'='1'");
withClause.add("'" + HiveConf.ConfVars.REPL_LOAD_PARTITIONS_WITH_DATA_COPY_BATCH_SIZE.varname + "'='1'");
replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(new String[] { "france", "india", "us" });
}
Aggregations