Search in sources :

Example 6 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testHdfsNSLazyCopyBootStrapExtTbls.

@Test
public void testHdfsNSLazyCopyBootStrapExtTbls() throws Throwable {
    List<String> clause = getHdfsNameserviceClause();
    clause.add("'" + HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE.varname + "'='false'");
    Tuple tuple = primary.run("use " + primaryDbName).run("create external table ext_table1 (id int)").run("insert into ext_table1 values (3)").run("insert into ext_table1 values (4)").run("create external table  ext_table2 (key int, value int) partitioned by (load_time timestamp)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.123') values(1,2)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.124') values(1,3)").run("show partitions ext_table2").verifyResults(new String[] { "load_time=2012-02-21 07%3A08%3A09.123", "load_time=2012-02-21 07%3A08%3A09.124" }).dump(primaryDbName, clause);
    ReplicationTestUtils.assertExternalFileList(Arrays.asList("ext_table1", "ext_table2"), tuple.dumpLocation, primary);
    // SecurityException expected from DirCopyTask
    try {
        replica.load(replicatedDbName, primaryDbName, clause);
        Assert.fail("Expected the UnknownHostException to be thrown.");
    } catch (SecurityException ex) {
        assertTrue(ex.getMessage().contains("java.net.UnknownHostException: nsRemote"));
    }
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 7 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForPartitions.

@Test
public void testBootstrapReplLoadRetryAfterFailureForPartitions() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='uk') values ('london')").run("insert into table t2 partition(country='us') values ('sfo')").run("CREATE FUNCTION " + primaryDbName + ".testFunctionOne as 'hivemall.tools.string.StopwordUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-2'").dump(primaryDbName);
    // Inject a behavior where REPL LOAD failed when try to load table "t2" and partition "uk".
    // So, table "t2" will exist and partition "india" will exist, rest failed as operation failed.
    BehaviourInjection<List<Partition>, Boolean> addPartitionStub = new BehaviourInjection<List<Partition>, Boolean>() {

        @Override
        public Boolean apply(List<Partition> ptns) {
            for (Partition ptn : ptns) {
                if (ptn.getValues().get(0).equals("uk")) {
                    injectionPathCalled = true;
                    LOG.warn("####getPartition Stub called");
                    return false;
                }
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setAddPartitionsBehaviour(addPartitionStub);
    // Make sure that there's some order in which the objects are loaded.
    List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'", "'" + HiveConf.ConfVars.REPL_LOAD_PARTITIONS_WITH_DATA_COPY_BATCH_SIZE + "' = '1'");
    replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
    // reset the behaviour
    InjectableBehaviourObjectStore.resetAddPartitionModifier();
    addPartitionStub.assertInjectionsPerformed(true, false);
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show tables").verifyResults(new String[] { "t2" }).run("select country from t2 order by country").verifyResults(Collections.singletonList("india"));
    // Verify if no create table calls. Add partitions and create function calls expected.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.tblName != null)) {
                injectionPathCalled = true;
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Table: " + String.valueOf(args.tblName));
                return false;
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it completes by adding remaining partitions and function.
        replica.load(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(false, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t2" }).run("select country from t2 order by country").verifyResults(Arrays.asList("india", "uk", "us")).run("show functions like '" + replicatedDbName + "%'").verifyResult(replicatedDbName + ".testFunctionOne");
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) List(java.util.List) ArrayList(java.util.ArrayList) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 8 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootStrapDumpOfWarehouse.

@Test
public void testBootStrapDumpOfWarehouse() throws Throwable {
    // Clear the repl base dir
    Path replBootstrapDumpDir = new Path(primary.hiveConf.get(MetastoreConf.ConfVars.REPLDIR.getHiveName()), "*");
    replBootstrapDumpDir.getFileSystem(primary.hiveConf).delete(replBootstrapDumpDir, true);
    String randomOne = RandomStringUtils.random(10, true, false);
    String randomTwo = RandomStringUtils.random(10, true, false);
    String dbOne = primaryDbName + randomOne;
    String dbTwo = primaryDbName + randomTwo;
    primary.run("alter database default set dbproperties ('repl.source.for' = '1, 2, 3')");
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (i int, j int)").run("create database " + dbOne + " WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')").run("use " + dbOne).run("create table t1 (i int, j int) partitioned by (load_date date) " + "clustered by(i) into 2 buckets stored as orc tblproperties ('transactional'='true') ").run("create database " + dbTwo + " WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')").run("use " + dbTwo).run("create table t1 (i int, j int)").dump("`*`", Collections.singletonList("'hive.repl.dump.metadata.only'='true'"));
    /*
      Due to the limitation that we can only have one instance of Persistence Manager Factory in a JVM
      we are not able to create multiple embedded derby instances for two different MetaStore instances.
    */
    primary.run("drop database " + primaryDbName + " cascade");
    primary.run("drop database " + dbOne + " cascade");
    primary.run("drop database " + dbTwo + " cascade");
    /*
       End of additional steps
    */
    // Reset ckpt and last repl ID keys to empty set for allowing bootstrap load
    replica.run("show databases").verifyFailure(new String[] { primaryDbName, dbOne, dbTwo }).run("alter database default set dbproperties ('hive.repl.ckpt.key'='', 'repl.last.id'='')");
    try {
        replica.load("", "`*`");
        Assert.fail();
    } catch (HiveException e) {
        assertEquals("MetaException(message:Database name cannot be null.)", e.getMessage());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 9 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testParallelExecutionOfReplicationBootStrapLoad.

@Test
public void testParallelExecutionOfReplicationBootStrapLoad() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (id int)").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='australia') values ('sydney')").run("insert into table t2 partition(country='russia') values ('moscow')").run("insert into table t2 partition(country='uk') values ('london')").run("insert into table t2 partition(country='us') values ('sfo')").run("insert into table t2 partition(country='france') values ('paris')").run("insert into table t2 partition(country='japan') values ('tokyo')").run("insert into table t2 partition(country='china') values ('hkg')").run("create table t3 (rank int)").dump(primaryDbName);
    replica.hiveConf.setBoolVar(HiveConf.ConfVars.EXECPARALLEL, true);
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("select country from t2").verifyResults(Arrays.asList("india", "australia", "russia", "uk", "us", "france", "japan", "china"));
    replica.hiveConf.setBoolVar(HiveConf.ConfVars.EXECPARALLEL, false);
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 10 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapLoadRetryAfterFailureForAlterTable.

// This requires the tables are loaded in a fixed sorted order.
@Test
public void testBootstrapLoadRetryAfterFailureForAlterTable() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (place string)").run("insert into table t1 values ('testCheck')").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='china') values ('shenzhen')").run("insert into table t2 partition(country='india') values ('banaglore')").dump(primaryDbName);
    // fail setting ckpt directory property for table t1.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (args.tblName.equalsIgnoreCase("t1") && args.dbName.equalsIgnoreCase(replicatedDbName)) {
                injectionPathCalled = true;
                LOG.warn("Verifier - DB : " + args.dbName + " TABLE : " + args.tblName);
                return false;
            }
            return true;
        }
    };
    // Fail repl load before the ckpt proeprty is set for t1 and after it is set for t2. So in the next run, for
    // t2 it goes directly to partion load with no task for table tracker and for t1 it loads the table
    // again from start.
    InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier);
    try {
        replica.loadFailure(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        InjectableBehaviourObjectStore.resetAlterTableModifier();
    }
    // Retry with same dump with which it was already loaded should resume the bootstrap load. Make sure that table t1,
    // is loaded before t2. So that scope is set to table in first iteration for table t1. In the next iteration, it
    // loads only remaining partitions of t2, so that the table tracker has no tasks.
    Path baseDumpDir = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR));
    Path nonRecoverablePath = getNonRecoverablePath(baseDumpDir, primaryDbName);
    if (nonRecoverablePath != null) {
        baseDumpDir.getFileSystem(primary.hiveConf).delete(nonRecoverablePath, true);
    }
    List<String> withConfigs = Arrays.asList("'hive.in.repl.test.files.sorted'='true'");
    replica.load(replicatedDbName, primaryDbName, withConfigs);
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(Arrays.asList("china", "india"));
}
Also used : Path(org.apache.hadoop.fs.Path) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Aggregations

Tuple (org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple)21 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)5 BehaviourInjection (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection)4 CallerArguments (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments)4 ArrayList (java.util.ArrayList)3 Nullable (javax.annotation.Nullable)2 List (java.util.List)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)1