Search in sources :

Example 16 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testMoveOptimizationIncrementalFailureAfterCopyReplace.

@Test
public void testMoveOptimizationIncrementalFailureAfterCopyReplace() throws Throwable {
    String replicatedDbName_CM = replicatedDbName + "_CM";
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("create table t1 (place string) partitioned by (country string)").dump(primaryDbName);
    replica.load(replicatedDbName, primaryDbName);
    // delete load ack to reuse the dump
    new Path(tuple.dumpLocation).getFileSystem(conf).delete(new Path(tuple.dumpLocation + Path.SEPARATOR + ReplUtils.REPL_HIVE_BASE_DIR + Path.SEPARATOR + LOAD_ACKNOWLEDGEMENT.toString()), true);
    replica.load(replicatedDbName_CM, primaryDbName);
    replica.run("alter database " + replicatedDbName + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')").run("alter database " + replicatedDbName_CM + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')");
    tuple = primary.run("use " + primaryDbName).run("insert overwrite table t1 select * from t2").dump(primaryDbName, Collections.emptyList());
    testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t1", "ADD_PARTITION", tuple);
}
Also used : Path(org.apache.hadoop.fs.Path) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 17 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForFunctions.

@Test
public void testBootstrapReplLoadRetryAfterFailureForFunctions() throws Throwable {
    String funcName1 = "f1";
    String funcName2 = "f2";
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("CREATE FUNCTION " + primaryDbName + "." + funcName1 + " as 'hivemall.tools.string.StopwordUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-2'").run("CREATE FUNCTION " + primaryDbName + "." + funcName2 + " as 'hivemall.tools.string.SplitWordsUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-1'").dump(primaryDbName);
    // Allow create function only on f1. Create should fail for the second function.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
                return false;
            }
            if (args.funcName != null) {
                LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
                return args.funcName.equals(funcName1);
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    // Trigger bootstrap dump which just creates function f1 but not f2
    List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'");
    try {
        replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    // Verify that only f1 got loaded
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show functions like '" + replicatedDbName + "%'").verifyResult(replicatedDbName + "." + funcName1);
    // Verify no calls to load f1 only f2.
    callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
                return false;
            }
            if (args.funcName != null) {
                LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
                return args.funcName.equals(funcName2);
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it completes by adding just the function f2
        replica.load(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    // Verify that both the functions are available.
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show functions like '" + replicatedDbName + "%'").verifyResults(new String[] { replicatedDbName + "." + funcName1, replicatedDbName + "." + funcName2 });
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) Test(org.junit.Test)

Example 18 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testReplWithRetryDisabledIterators.

@Test
public void testReplWithRetryDisabledIterators() throws Throwable {
    List<String> clause = new ArrayList<>();
    // NS replacement parameters has no effect when data is also copied to staging
    clause.add("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET + "'='false'");
    clause.add("'" + HiveConf.ConfVars.REPL_COPY_FILE_LIST_ITERATOR_RETRY + "'='false'");
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table  acid_table (key int, value int) partitioned by (load_date date) " + "clustered by(key) into 2 buckets stored as orc tblproperties ('transactional'='true')").run("create table table1 (i String)").run("insert into table1 values (1)").run("insert into table1 values (2)").dump(primaryDbName, clause);
    ReplicationTestUtils.assertFalseExternalFileList(primary, tuple.dumpLocation);
    replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2" });
    tuple = primary.run("use " + primaryDbName).run("insert into table1 values (3)").dump(primaryDbName, clause);
    ReplicationTestUtils.assertFalseExternalFileList(primary, tuple.dumpLocation);
    replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1" }).run("select * from table1").verifyResults(new String[] { "1", "2", "3" });
    clause.add("'" + HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE.varname + "'='false'");
    tuple = primary.run("use " + primaryDbName).run("create external table ext_table1 (id int)").run("insert into ext_table1 values (3)").run("insert into ext_table1 values (4)").run("create external table  ext_table2 (key int, value int) partitioned by (load_time timestamp)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.123') values(1,2)").run("insert into ext_table2 partition(load_time = '2012-02-21 07:08:09.124') values(1,3)").run("show partitions ext_table2").verifyResults(new String[] { "load_time=2012-02-21 07%3A08%3A09.123", "load_time=2012-02-21 07%3A08%3A09.124" }).dump(primaryDbName, clause);
    ReplicationTestUtils.assertExternalFileList(Arrays.asList("ext_table1", "ext_table2"), tuple.dumpLocation, primary);
    replica.load(replicatedDbName, primaryDbName, clause).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "acid_table", "table1", "ext_table1", "ext_table2" }).run("select * from ext_table1").verifyResults(new String[] { "3", "4" }).run("select value from ext_table2").verifyResults(new String[] { "2", "3" });
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 19 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testMoveOptimizationBootstrapReplLoadRetryAfterFailure.

@Test
public void testMoveOptimizationBootstrapReplLoadRetryAfterFailure() throws Throwable {
    String replicatedDbName_CM = replicatedDbName + "_CM";
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").dump(primaryDbName);
    testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t2", "ADD_PARTITION", tuple);
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) Test(org.junit.Test)

Example 20 with Tuple

use of org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForTablesAndConstraints.

@Test
public void testBootstrapReplLoadRetryAfterFailureForTablesAndConstraints() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1(a string, b string, primary key (a, b) disable novalidate rely)").run("create table t2(a string, b string, foreign key (a, b) references t1(a, b) disable novalidate)").run("create table t3(a string, b string not null disable, unique (a) disable)").dump(primaryDbName);
    // Need to drop the primary DB as metastore is shared by both primary/replica. So, constraints
    // conflict when loaded. Some issue with framework which needs to be relook into later.
    primary.run("drop database if exists " + primaryDbName + " cascade");
    // Allow create table only on t1. Create should fail for rest of the tables and hence constraints
    // also not loaded.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.constraintTblName != null)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Constraint Table: " + String.valueOf(args.constraintTblName));
                return false;
            }
            if (args.tblName != null) {
                LOG.warn("Verifier - Table: " + String.valueOf(args.tblName));
                return args.tblName.equals("t1");
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    // Trigger bootstrap dump which just creates table t1 and other tables (t2, t3) and constraints not loaded.
    List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'");
    try {
        replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null");
    assertEquals(0, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
    assertEquals(0, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
    assertEquals(0, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
    assertEquals(0, replica.getForeignKeyList(replicatedDbName, "t2").size());
    // Verify if create table is not called on table t1 but called for t2 and t3.
    // Also, allow constraint creation only on t1 and t3. Foreign key creation on t2 fails.
    callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.funcName != null)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Func: " + String.valueOf(args.funcName));
                return false;
            }
            if (args.constraintTblName != null) {
                LOG.warn("Verifier - Constraint Table: " + String.valueOf(args.constraintTblName));
                return (args.constraintTblName.equals("t1") || args.constraintTblName.equals("t3"));
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it fails when try to load the foreign key constraints. All other constraints are loaded.
        replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show tables").verifyResults(new String[] { "t1", "t2", "t3" });
    assertEquals(2, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
    assertEquals(1, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
    assertEquals(1, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
    assertEquals(0, replica.getForeignKeyList(replicatedDbName, "t2").size());
    // Verify if no create table/function calls. Only add foreign key constraints on table t2.
    callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.tblName != null)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Table: " + String.valueOf(args.tblName));
                return false;
            }
            if (args.constraintTblName != null) {
                LOG.warn("Verifier - Constraint Table: " + String.valueOf(args.constraintTblName));
                return args.constraintTblName.equals("t2");
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it completes by adding just foreign key constraints for table t2.
        replica.load(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" });
    assertEquals(2, replica.getPrimaryKeyList(replicatedDbName, "t1").size());
    assertEquals(1, replica.getUniqueConstraintList(replicatedDbName, "t3").size());
    assertEquals(1, replica.getNotNullConstraintList(replicatedDbName, "t3").size());
    assertEquals(2, replica.getForeignKeyList(replicatedDbName, "t2").size());
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) Test(org.junit.Test)

Aggregations

Tuple (org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple)21 Test (org.junit.Test)21 Path (org.apache.hadoop.fs.Path)5 BehaviourInjection (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection)4 CallerArguments (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments)4 ArrayList (java.util.ArrayList)3 Nullable (javax.annotation.Nullable)2 List (java.util.List)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)1