Search in sources :

Example 1 with CallerArguments

use of org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments in project hive by apache.

the class TestReplicationScenariosExternalTables method retryBootstrapExternalTablesFromDifferentDump.

@Test
public void retryBootstrapExternalTablesFromDifferentDump() throws Throwable {
    List<String> loadWithClause = ReplicationTestUtils.includeExternalTableClause(true);
    List<String> dumpWithClause = ReplicationTestUtils.includeExternalTableClause(false);
    WarehouseInstance.Tuple tupleBootstrapWithoutExternal = primary.run("use " + primaryDbName).run("create external table t1 (id int)").run("insert into table t1 values (1)").run("create external table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='us') values ('austin')").run("create table t3 as select * from t1").dump(primaryDbName, dumpWithClause);
    replica.load(replicatedDbName, primaryDbName, loadWithClause).status(replicatedDbName).verifyResult(tupleBootstrapWithoutExternal.lastReplicationId).run("use " + replicatedDbName).run("show tables").verifyResult("t3").run("select id from t3").verifyResult("1").verifyReplTargetProperty(replicatedDbName);
    dumpWithClause = ReplicationTestUtils.externalTableWithClause(new ArrayList<>(), true, true);
    primary.run("use " + primaryDbName).run("drop table t1").run("create external table t4 (id int)").run("insert into table t4 values (10)").run("create table t5 as select * from t4").dump(primaryDbName, dumpWithClause);
    // Fail setting ckpt property for table t4 but success for t2.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (args.tblName.equalsIgnoreCase("t4") && args.dbName.equalsIgnoreCase(replicatedDbName)) {
                injectionPathCalled = true;
                LOG.warn("Verifier - DB : " + args.dbName + " TABLE : " + args.tblName);
                return false;
            }
            return true;
        }
    };
    // Fail repl load before the ckpt property is set for t4 and after it is set for t2.
    // In the retry, these half baked tables should be dropped and bootstrap should be successful.
    InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier);
    try {
        replica.loadFailure(replicatedDbName, primaryDbName, loadWithClause);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        InjectableBehaviourObjectStore.resetAlterTableModifier();
    }
    Path baseDumpDir = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR));
    Path nonRecoverablePath = TestReplicationScenarios.getNonRecoverablePath(baseDumpDir, primaryDbName, primary.hiveConf);
    if (nonRecoverablePath != null) {
        baseDumpDir.getFileSystem(primary.hiveConf).delete(nonRecoverablePath, true);
    }
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables like 't1'").verifyFailure(new String[] { "t1" }).run("show tables like 't2'").verifyResult("t2").run("select country from t2 order by country").verifyResults(new String[] { "india", "us" }).run("select id from t4").verifyResults(Arrays.asList("10")).run("select id from t5").verifyResult("10").verifyReplTargetProperty(replicatedDbName);
    // Insert into existing external table and then Drop it, add another managed table with same name
    // and dump another bootstrap dump for external tables.
    dumpWithClause = ReplicationTestUtils.includeExternalTableClause(true);
    primary.run("use " + primaryDbName).run("insert into table t2 partition(country='india') values ('chennai')").run("drop table t2").run("create table t2 as select * from t4").run("insert into table t4 values (20)").dump(primaryDbName, dumpWithClause);
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables like 't1'").verifyFailure(new String[] { "t1" }).run("select id from t2").verifyResult("10").run("select id from t4").verifyResults(Arrays.asList("10", "20")).run("select id from t5").verifyResult("10").verifyReplTargetProperty(replicatedDbName);
}
Also used : Path(org.apache.hadoop.fs.Path) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) ArrayList(java.util.ArrayList) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 2 with CallerArguments

use of org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForPartitions.

@Test
public void testBootstrapReplLoadRetryAfterFailureForPartitions() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='india') values ('bangalore')").run("insert into table t2 partition(country='uk') values ('london')").run("insert into table t2 partition(country='us') values ('sfo')").run("CREATE FUNCTION " + primaryDbName + ".testFunctionOne as 'hivemall.tools.string.StopwordUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-2'").dump(primaryDbName);
    // Inject a behavior where REPL LOAD failed when try to load table "t2" and partition "uk".
    // So, table "t2" will exist and partition "india" will exist, rest failed as operation failed.
    BehaviourInjection<List<Partition>, Boolean> addPartitionStub = new BehaviourInjection<List<Partition>, Boolean>() {

        @Override
        public Boolean apply(List<Partition> ptns) {
            for (Partition ptn : ptns) {
                if (ptn.getValues().get(0).equals("uk")) {
                    injectionPathCalled = true;
                    LOG.warn("####getPartition Stub called");
                    return false;
                }
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setAddPartitionsBehaviour(addPartitionStub);
    // Make sure that there's some order in which the objects are loaded.
    List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'", "'" + HiveConf.ConfVars.REPL_LOAD_PARTITIONS_WITH_DATA_COPY_BATCH_SIZE + "' = '1'");
    replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
    // reset the behaviour
    InjectableBehaviourObjectStore.resetAddPartitionModifier();
    addPartitionStub.assertInjectionsPerformed(true, false);
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show tables").verifyResults(new String[] { "t2" }).run("select country from t2 order by country").verifyResults(Collections.singletonList("india"));
    // Verify if no create table calls. Add partitions and create function calls expected.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (!args.dbName.equalsIgnoreCase(replicatedDbName) || (args.tblName != null)) {
                injectionPathCalled = true;
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName) + " Table: " + String.valueOf(args.tblName));
                return false;
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it completes by adding remaining partitions and function.
        replica.load(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(false, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show tables").verifyResults(new String[] { "t2" }).run("select country from t2 order by country").verifyResults(Arrays.asList("india", "uk", "us")).run("show functions like '" + replicatedDbName + "%'").verifyResult(replicatedDbName + ".testFunctionOne");
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) List(java.util.List) ArrayList(java.util.ArrayList) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 3 with CallerArguments

use of org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments in project hive by apache.

the class TestReplicationScenariosAcidTables method testAcidTablesBootstrapWithConcurrentWrites.

@Test
public void testAcidTablesBootstrapWithConcurrentWrites() throws Throwable {
    HiveConf primaryConf = primary.getConf();
    primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)");
    // Perform concurrent write on the acid table t1 when bootstrap dump in progress. Bootstrap
    // won't see the written data but the subsequent incremental repl should see it.
    BehaviourInjection<CallerArguments, Boolean> callerInjectedBehavior = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (injectionPathCalled) {
                nonInjectedPathCalled = true;
            } else {
                // Insert another row to t1 from another txn when bootstrap dump in progress.
                injectionPathCalled = true;
                Thread t = new Thread(new Runnable() {

                    @Override
                    public void run() {
                        LOG.info("Entered new thread");
                        IDriver driver = DriverFactory.newDriver(primaryConf);
                        SessionState.start(new CliSessionState(primaryConf));
                        try {
                            driver.run("insert into " + primaryDbName + ".t1 values(2)");
                        } catch (CommandProcessorException e) {
                            throw new RuntimeException(e);
                        }
                        LOG.info("Exit new thread success");
                    }
                });
                t.start();
                LOG.info("Created new thread {}", t.getName());
                try {
                    t.join();
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerInjectedBehavior);
    WarehouseInstance.Tuple bootstrapDump = null;
    try {
        bootstrapDump = primary.dump(primaryDbName);
        callerInjectedBehavior.assertInjectionsPerformed(true, true);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    // Bootstrap dump has taken snapshot before concurrent tread performed write. So, it won't see data "2".
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(bootstrapDump.lastReplicationId).run("select id from t1 order by id").verifyResults(new String[] { "1" });
    // Incremental should include the concurrent write of data "2" from another txn.
    WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName);
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(incrementalDump.lastReplicationId).run("select id from t1 order by id").verifyResults(new String[] { "1", "2" });
}
Also used : CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) IDriver(org.apache.hadoop.hive.ql.IDriver) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 4 with CallerArguments

use of org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapLoadRetryAfterFailureForAlterTable.

// This requires the tables are loaded in a fixed sorted order.
@Test
public void testBootstrapLoadRetryAfterFailureForAlterTable() throws Throwable {
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create table t1 (place string)").run("insert into table t1 values ('testCheck')").run("create table t2 (place string) partitioned by (country string)").run("insert into table t2 partition(country='china') values ('shenzhen')").run("insert into table t2 partition(country='india') values ('banaglore')").dump(primaryDbName);
    // fail setting ckpt directory property for table t1.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Nullable
        @Override
        public Boolean apply(@Nullable CallerArguments args) {
            if (args.tblName.equalsIgnoreCase("t1") && args.dbName.equalsIgnoreCase(replicatedDbName)) {
                injectionPathCalled = true;
                LOG.warn("Verifier - DB : " + args.dbName + " TABLE : " + args.tblName);
                return false;
            }
            return true;
        }
    };
    // Fail repl load before the ckpt proeprty is set for t1 and after it is set for t2. So in the next run, for
    // t2 it goes directly to partion load with no task for table tracker and for t1 it loads the table
    // again from start.
    InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier);
    try {
        replica.loadFailure(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        InjectableBehaviourObjectStore.resetAlterTableModifier();
    }
    // Retry with same dump with which it was already loaded should resume the bootstrap load. Make sure that table t1,
    // is loaded before t2. So that scope is set to table in first iteration for table t1. In the next iteration, it
    // loads only remaining partitions of t2, so that the table tracker has no tasks.
    Path baseDumpDir = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR));
    Path nonRecoverablePath = getNonRecoverablePath(baseDumpDir, primaryDbName);
    if (nonRecoverablePath != null) {
        baseDumpDir.getFileSystem(primary.hiveConf).delete(nonRecoverablePath, true);
    }
    List<String> withConfigs = Arrays.asList("'hive.in.repl.test.files.sorted'='true'");
    replica.load(replicatedDbName, primaryDbName, withConfigs);
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("select country from t2 order by country").verifyResults(Arrays.asList("china", "india"));
}
Also used : Path(org.apache.hadoop.fs.Path) Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 5 with CallerArguments

use of org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments in project hive by apache.

the class TestReplicationScenariosAcrossInstances method testBootstrapReplLoadRetryAfterFailureForFunctions.

@Test
public void testBootstrapReplLoadRetryAfterFailureForFunctions() throws Throwable {
    String funcName1 = "f1";
    String funcName2 = "f2";
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("CREATE FUNCTION " + primaryDbName + "." + funcName1 + " as 'hivemall.tools.string.StopwordUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-2'").run("CREATE FUNCTION " + primaryDbName + "." + funcName2 + " as 'hivemall.tools.string.SplitWordsUDF' " + "using jar  'ivy://io.github.myui:hivemall:0.4.0-1'").dump(primaryDbName);
    // Allow create function only on f1. Create should fail for the second function.
    BehaviourInjection<CallerArguments, Boolean> callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
                return false;
            }
            if (args.funcName != null) {
                LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
                return args.funcName.equals(funcName1);
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    // Trigger bootstrap dump which just creates function f1 but not f2
    List<String> withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'");
    try {
        replica.loadFailure(replicatedDbName, primaryDbName, withConfigs);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    // Verify that only f1 got loaded
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult("null").run("show functions like '" + replicatedDbName + "%'").verifyResult(replicatedDbName + "." + funcName1);
    // Verify no calls to load f1 only f2.
    callerVerifier = new BehaviourInjection<CallerArguments, Boolean>() {

        @Override
        public Boolean apply(CallerArguments args) {
            injectionPathCalled = true;
            if (!args.dbName.equalsIgnoreCase(replicatedDbName)) {
                LOG.warn("Verifier - DB: " + String.valueOf(args.dbName));
                return false;
            }
            if (args.funcName != null) {
                LOG.debug("Verifier - Function: " + String.valueOf(args.funcName));
                return args.funcName.equals(funcName2);
            }
            return true;
        }
    };
    InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier);
    try {
        // Retry with same dump with which it was already loaded should resume the bootstrap load.
        // This time, it completes by adding just the function f2
        replica.load(replicatedDbName, primaryDbName);
        callerVerifier.assertInjectionsPerformed(true, false);
    } finally {
        // reset the behaviour
        InjectableBehaviourObjectStore.resetCallerVerifier();
    }
    // Verify that both the functions are available.
    replica.run("use " + replicatedDbName).run("repl status " + replicatedDbName).verifyResult(tuple.lastReplicationId).run("show functions like '" + replicatedDbName + "%'").verifyResults(new String[] { replicatedDbName + "." + funcName1, replicatedDbName + "." + funcName2 });
}
Also used : Tuple(org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple) BehaviourInjection(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection) CallerArguments(org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments) Test(org.junit.Test)

Aggregations

BehaviourInjection (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection)8 CallerArguments (org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments)8 Test (org.junit.Test)8 Nullable (javax.annotation.Nullable)6 Tuple (org.apache.hadoop.hive.ql.parse.WarehouseInstance.Tuple)4 Path (org.apache.hadoop.fs.Path)3 ArrayList (java.util.ArrayList)2 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 IDriver (org.apache.hadoop.hive.ql.IDriver)2 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)2 List (java.util.List)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1