Search in sources :

Example 11 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testMultipleStagesOfReplicationLoadTask.

/*
  From the hive logs(hive.log) we can also check for the info statement
  fgrep "Total Tasks" [location of hive.log]
  each line indicates one run of loadTask.
   */
@Test
public void testMultipleStagesOfReplicationLoadTask() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into t1 values (1), (2)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='us') values ('austin')").run("insert into table t2 partition(country='france') values ('paris')").run("create table t3 (rank int)").dump(primaryDbName);
    // each table creation itself takes more than one task, give we are giving a max of 1, we should hit multiple runs.
    List<String> withClause = Collections.singletonList("'" + HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS.varname + "'='1'");
    replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(new String[] { "france", "india", "us" });
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 12 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete.

@Test
public void testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into table t1 values (10)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='uk') values ('london')").run("insert into table t2 partition(country='us') values ('sfo')").dump(primaryDbName);
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("select id from t1").verifyResults(Collections.singletonList("10")).run("select country from t2 order by country").verifyResults(Arrays.asList("india", "uk", "us"));
    String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR;
    replica.verifyIfCkptSet(replicatedDbName, hiveDumpLocation);
    // To retry with same dump delete the load ack
    new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(hiveDumpLocation, LOAD_ACKNOWLEDGEMENT.toString()), true);
    // Retry with same dump with which it was already loaded also fails.
    replica.loadFailure(replicatedDbName, primaryDbName);
    // To retry with same dump delete the load ack
    new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(hiveDumpLocation, LOAD_ACKNOWLEDGEMENT.toString()), true);
    // Retry from same dump when the database is empty is also not allowed.
    replica.run("drop table t1").run("drop table t2").loadFailure(replicatedDbName, primaryDbName);
}
Also used : Path(org.apache.hadoop.fs.Path) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 13 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapFunctionOnHDFSLazyCopy.

@Test
public void testBootstrapFunctionOnHDFSLazyCopy() throws Throwable {
    Dependencies dependencies = dependencies("ivy://io.github.myui:hivemall:0.4.0-2", primary);
    String jarSubString = dependencies.toJarSubSql();
    List<String> withClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'");
    primary.run("CREATE FUNCTION " + primaryDbName + ".anotherFunction as 'hivemall.tools.string.StopwordUDF' " + "using " + jarSubString);
    WarehouseInstance.Tuple tuple = primary.dump(primaryDbName, withClause);
    replica.load(replicatedDbName, primaryDbName, withClause).run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'").verifyResult(replicatedDbName + ".anotherFunction");
    assertFunctionJarsOnTarget("anotherFunction", dependencies.jarNames());
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 14 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testHdfsNSLazyCopyIncrExtTbls.

@Test
public void testHdfsNSLazyCopyIncrExtTbls() throws Throwable {
    List<String> clause = getHdfsNameserviceClause();
    clause.add("'" + HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE.varname + "'='false'");
    primary.run("use " + primaryDbName).run("create table  acid_table (key int, value int) partitioned by (load_time timestamp) " + "clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')").run("create table table1 (i String)").run("insert into table1 values (1)").run("insert into table1 values (2)").dump(primaryDbName);
    replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2" });
    Tuple tuple = primary.run("use " + primaryDbName).run("create external table ext_table1 (id int)").run("insert into ext_table1 values (3)").run("insert into ext_table1 values (4)").run("create external table  ext_table2 (key int, value int) partitioned by (load_time timestamp)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.123') values(1,2)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.124') values(1,3)").run("show partitions ext_table2").verifyResults(new String[] { "load_time=2012-02-21 07%3A08%3A09.123", "load_time=2012-02-21 07%3A08%3A09.124" }).dump(primaryDbName, clause);
    ReplicationTestUtils.assertExternalFileList(Arrays.asList("ext_table1", "ext_table2"), tuple.dumpLocation, primary);
    // SecurityException expected from DirCopyTask
    try {
        replica.load(replicatedDbName, primaryDbName, clause);
        Assert.fail("Expected the UnknownHostException to be thrown.");
    } catch (SecurityException ex) {
        assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote"));
    }
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 15 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testMultipleStagesOfReplicationLoadTaskWithPartitionBatching.

@Test
public void testMultipleStagesOfReplicationLoadTaskWithPartitionBatching() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("insert into t1 values (1), (2)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='us') values ('austin')").run("insert into table t2 partition(country='france') values ('paris')").run("create table t3 (rank int)").dump(primaryDbName);
    // each table creation itself takes more than one task, give we are giving a max of 1, we should hit multiple runs.
    List<String> withClause = new ArrayList<>();
    withClause.add("'" + HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS.varname + "'='1'");
    withClause.add("'" + HiveConf.ConfVars.REPL_LOAD_PARTITIONS_WITH_DATA_COPY_BATCH_SIZE.varname + "'='1'");
    replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(new String[] { "france", "india", "us" });
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

Tuple (org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple)21 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)5 BehaviourInjection (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection)4 CallerArguments (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments)4 ArrayList (java.util.ArrayList)3 Nullable (javax.annotation.Nullable)2 List (java.util.List)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)1