use of org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService in project hive by apache.
the class TestScheduledReplicationScenarios method testSetPolicyId.
@Test
@Ignore("HIVE-25720")
public void testSetPolicyId() throws Throwable {
String withClause = " WITH('" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
// Create a table with some data at source DB.
primary.run("use " + primaryDbName).run("create table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
// Remove the SOURCE_OF_REPLICATION property from the database.
primary.run("ALTER DATABASE " + primaryDbName + " Set DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '')");
assertFalse(ReplChangeManager.isSourceOfReplication(primary.getDatabase(primaryDbName)));
// Schedule Dump & Load and verify the data is replicated properly.
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
Path dumpRoot = ReplUtils.getEncodedDumpRootPath(primary.hiveConf, primaryDbName.toLowerCase());
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
// Check the database got the SOURCE_OF_REPLICATION property set.
assertTrue(ReplChangeManager.getReplPolicyIdString(primary.getDatabase(primaryDbName)).equals("s1_t2"));
// Remove the SOURCE_OF_REPLICATION property from the database.
primary.run("ALTER DATABASE " + primaryDbName + " Set DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '')");
assertFalse(ReplChangeManager.isSourceOfReplication(primary.getDatabase(primaryDbName)));
// Test to ensure that repl.source.for is added in incremental iteration of replication also.
GenericTestUtils.waitFor(() -> {
try {
return ReplChangeManager.getReplPolicyIdString(primary.getDatabase(primaryDbName)).equals("s1_t2");
} catch (Throwable e) {
return false;
}
}, 100, 10000);
// Test the new policy id is appended
primary.run("drop scheduled query s1_t2");
fs.delete(dumpRoot, true);
primary.run("create scheduled query s1_t2_new every 5 seconds as repl " + "dump " + primaryDbName + withClause);
GenericTestUtils.waitFor(() -> {
try {
return ReplChangeManager.getReplPolicyIdString(primary.getDatabase(primaryDbName)).equals("s1_t2, s1_t2_new");
} catch (Throwable e) {
return false;
}
}, 100, 10000);
} finally {
primary.run("drop scheduled query s1_t2_new");
replica.run("drop scheduled query s2_t2");
}
}
use of org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService in project hive by apache.
the class TestScheduledReplicationScenarios method testCompleteFailoverWithReverseBootstrap.
@Test
@Ignore("HIVE-25720")
public void testCompleteFailoverWithReverseBootstrap() throws Throwable {
String withClause = "'" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true','" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'";
String sourceDbName = "sourceDbName";
String replicaDbName = "replicaDbName";
// Create a table with some data at source DB.
primary.run("create database " + sourceDbName + " with dbproperties('repl.source.for'='a')").run("use " + sourceDbName).run("create table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
// Schedule Dump & Load and verify the data is replicated properly.
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query repl_dump_p1 every 5 seconds as repl dump " + sourceDbName + " WITH(" + withClause + ')');
replica.run("create scheduled query repl_load_p1 every 5 seconds as repl load " + sourceDbName + " INTO " + replicaDbName + " WITH(" + withClause + ')');
Path dumpRoot = ReplUtils.getEncodedDumpRootPath(primary.hiveConf, sourceDbName.toLowerCase());
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicaDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
// Start failover from here.
String startFailoverClause = withClause.concat(",'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
primary.run("alter scheduled query repl_dump_p1 defined as repl dump " + sourceDbName + " WITH(" + startFailoverClause + ')');
replica.run("alter scheduled query repl_load_p1 defined as repl load " + sourceDbName + " INTO " + replicaDbName + " WITH(" + startFailoverClause + ')');
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
Path failoverReadyMarker = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.FAILOVER_READY_MARKER.toString());
assertTrue(fs.exists(failoverReadyMarker));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(sourceDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicaDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
primary.run("alter scheduled query repl_dump_p1 disabled").run("alter scheduled query repl_dump_p1 defined as repl dump " + sourceDbName + " WITH(" + withClause + ')').run("alter database " + sourceDbName + " set dbproperties('" + SOURCE_OF_REPLICATION + "'='')").run("drop database " + sourceDbName + " cascade");
assertTrue(primary.getDatabase(sourceDbName) == null);
replica.run("alter scheduled query repl_load_p1 disabled").run("alter scheduled query repl_load_p1 defined as repl load " + sourceDbName + " INTO " + replicaDbName + " WITH(" + withClause + ')').run("create scheduled query repl_dump_p2 every 5 seconds as repl dump " + replicaDbName + " WITH(" + withClause + ')');
primary.run("create scheduled query repl_load_p2 every 5 seconds as repl load " + replicaDbName + " INTO " + sourceDbName + " WITH(" + withClause + ')');
dumpRoot = ReplUtils.getEncodedDumpRootPath(replica.hiveConf, replicaDbName.toLowerCase());
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
assertFalse(MetaStoreUtils.isTargetOfReplication(replica.getDatabase(replicaDbName)));
Database primaryDb = primary.getDatabase(sourceDbName);
assertFalse(primaryDb == null);
assertTrue(MetaStoreUtils.isTargetOfReplication(primaryDb));
assertFalse(MetaStoreUtils.isDbBeingFailedOver(primaryDb));
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
assertFalse(ReplUtils.isFirstIncPending(primary.getDatabase(sourceDbName).getParameters()));
assertFalse(MetaStoreUtils.isDbBeingFailedOver(replica.getDatabase(replicaDbName)));
// Start failback from here.
replica.run("alter scheduled query repl_dump_p2 defined as repl dump " + replicaDbName + " WITH(" + startFailoverClause + ')');
primary.run("alter scheduled query repl_load_p2 defined as repl load " + replicaDbName + " INTO " + sourceDbName + " WITH(" + startFailoverClause + ')');
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
failoverReadyMarker = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.FAILOVER_READY_MARKER.toString());
assertTrue(fs.exists(failoverReadyMarker));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicaDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(sourceDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
replica.run("alter scheduled query repl_dump_p2 disabled").run("alter scheduled query repl_dump_p2 defined as repl dump " + replicaDbName + " WITH(" + withClause + ')').run("alter database " + replicaDbName + " set dbproperties('" + SOURCE_OF_REPLICATION + "'='')").run("drop database " + replicaDbName + " cascade").run("alter scheduled query repl_load_p1 enabled");
assertTrue(replica.getDatabase(replicaDbName) == null);
primary.run("alter scheduled query repl_load_p2 disabled").run("alter scheduled query repl_load_p2 defined as repl load " + replicaDbName + " INTO " + sourceDbName + " WITH(" + withClause + ')').run("alter scheduled query repl_dump_p1 enabled");
dumpRoot = ReplUtils.getEncodedDumpRootPath(primary.hiveConf, sourceDbName.toLowerCase());
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
assertFalse(MetaStoreUtils.isTargetOfReplication(primary.getDatabase(sourceDbName)));
Database replicaDb = replica.getDatabase(replicaDbName);
assertFalse(replicaDb == null);
assertTrue(MetaStoreUtils.isTargetOfReplication(replicaDb));
assertFalse(MetaStoreUtils.isDbBeingFailedOver(replicaDb));
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicaDbName).getParameters()));
assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(sourceDbName)));
} finally {
primary.run("drop database if exists " + sourceDbName + " cascade").run("drop scheduled query repl_dump_p1");
replica.run("drop database if exists " + replicaDbName + " cascade").run("drop scheduled query repl_load_p1");
primary.run("drop scheduled query repl_load_p2");
replica.run("drop scheduled query repl_dump_p2");
}
}
use of org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService in project hive by apache.
the class TestScheduledReplicationScenarios method testAcidTablesReplLoadBootstrapIncr.
@Test
@Ignore("HIVE-23395")
public void testAcidTablesReplLoadBootstrapIncr() throws Throwable {
// Bootstrap
primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("insert into t1 values(1)").run("insert into t1 values(2)");
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query s1_t1 every 5 seconds as repl dump " + primaryDbName);
replica.run("create scheduled query s2_t1 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName);
Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't1'").verifyResult("t1").run("select id from t1 order by id").verifyResults(new String[] { "1", "2" });
// First incremental, after bootstrap
primary.run("use " + primaryDbName).run("insert into t1 values(3)").run("insert into t1 values(4)");
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't1'").verifyResult("t1").run("select id from t1 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
// Second incremental
primary.run("use " + primaryDbName).run("insert into t1 values(5)").run("insert into t1 values(6)");
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't1'").verifyResult("t1").run("select id from t1 order by id").verifyResults(new String[] { "1", "2", "3", "4", "5", "6" }).run("drop table t1");
} finally {
primary.run("drop scheduled query s1_t1");
replica.run("drop scheduled query s2_t1");
}
}
use of org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService in project hive by apache.
the class TestScheduledReplicationScenarios method testExternalTablesReplLoadBootstrapIncr.
@Test
@Ignore("HIVE-23395")
public void testExternalTablesReplLoadBootstrapIncr() throws Throwable {
// Bootstrap
String withClause = " WITH('" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "' = 'true' ,'" + HiveConf.ConfVars.REPL_INCLUDE_ATLAS_METADATA + "' = 'true' , '" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_ENDPOINT + "' = 'http://localhost:21000/atlas'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_REPLICATED_TO_DB + "' = 'tgt'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
primary.run("use " + primaryDbName).run("create external table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
long lastReplId = Long.parseLong(primary.status(replicatedDbName).getOutput().get(0));
DumpMetaData dumpMetaData = new DumpMetaData(ackPath.getParent(), primary.hiveConf);
List<ReplicationMetric> replicationMetrics = MetricCollector.getInstance().getMetrics();
Assert.assertEquals(2, replicationMetrics.size());
// Generate expected metrics
List<ReplicationMetric> expectedReplicationMetrics = new ArrayList<>();
expectedReplicationMetrics.add(generateExpectedMetric("s1_t2", 0, primaryDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateDumpStages(true)));
expectedReplicationMetrics.add(generateExpectedMetric("s2_t2", dumpMetaData.getDumpExecutionId(), replicatedDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateLoadStages(true)));
checkMetrics(expectedReplicationMetrics, replicationMetrics);
// First incremental, after bootstrap
primary.run("use " + primaryDbName).run("insert into t2 values(3)").run("insert into t2 values(4)");
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
} finally {
primary.run("drop scheduled query s1_t2");
replica.run("drop scheduled query s2_t2");
}
}
use of org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService in project hive by apache.
the class TestScheduledQueryService method testScheduledQueryExecution.
@Test
public void testScheduledQueryExecution() throws ParseException, Exception {
IDriver driver = createDriver();
ExecutorService executor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("SchQ %d").build());
HiveConf conf = env_setup.getTestCtx().hiveConf;
MockScheduledQueryService qService = new MockScheduledQueryService("insert into tu values(1),(2),(3),(4),(5)");
ScheduledQueryExecutionContext ctx = new ScheduledQueryExecutionContext(executor, conf, qService);
try (ScheduledQueryExecutionService sQ = ScheduledQueryExecutionService.startScheduledQueryExecutorService(ctx)) {
// Wait for the scheduled query to finish. Hopefully 30 seconds should be more than enough.
SessionState.getConsole().logInfo("Waiting for query execution to finish ...");
synchronized (qService.notifier) {
qService.notifier.wait(30000);
}
SessionState.getConsole().logInfo("Done waiting for query execution!");
}
assertThat(qService.lastProgressInfo.isSetExecutorQueryId(), is(true));
assertThat(qService.lastProgressInfo.getExecutorQueryId(), Matchers.containsString(ctx.executorHostName + "/"));
int nr = getNumRowsReturned(driver, "select 1 from tu");
assertThat(nr, Matchers.equalTo(5));
}
Aggregations