use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.
the class TestReplicationScenarios method testIncrementalStatisticsMetrics.
@Test
public void testIncrementalStatisticsMetrics() throws Throwable {
isMetricsEnabledForTests(true);
ReplLoadWork.setMbeansParamsForTesting(true, false);
MetricCollector collector = MetricCollector.getInstance();
String testName = "testIncrementalStatisticsMetrics";
String dbName = createDB(testName, driver);
String replDbName = dbName + "_dupe";
String nameStri = "Hadoop:" + "service=HiveServer2" + "," + "name=" + "Database-" + replDbName + " Policy-pol";
// Do a bootstrap dump & load
Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName);
ReplLoadWork.setMbeansParamsForTesting(true, true);
// 10 create table
for (int i = 0; i < 10; i++) {
run("CREATE TABLE " + dbName + ".ptned" + i + "(a string) partitioned by (b int) STORED AS TEXTFILE", driver);
for (int j = 0; j < 5; j++) {
// Create 5 partitoins per table.
run("ALTER TABLE " + dbName + ".ptned" + i + " ADD PARTITION(b=" + j + ")", driver);
}
}
verifyRun("SHOW PARTITIONS " + dbName + ".ptned1", new String[] { "b=0", "b=1", "b=2", "b=3", "b=4" }, driver);
// Do an incremental load & verify the metrics.
Tuple incrementalDump = incrementalLoadAndVerify(dbName, replDbName);
String[] events = new String[] { "[[Event Name: EVENT_CREATE_TABLE; " + "Total Number: 10;", "[[Event Name: EVENT_ADD_PARTITION; Total Number: 50;" };
Iterator<ReplicationMetric> itr = collector.getMetrics().iterator();
while (itr.hasNext()) {
ReplicationMetric elem = itr.next();
assertEquals(Metadata.ReplicationType.INCREMENTAL, elem.getMetadata().getReplicationType());
List<Stage> stages = elem.getProgress().getStages();
assertTrue(stages.size() != 0);
for (Stage stage : stages) {
for (String event : events) {
assertTrue(stage.getReplStats(), stage.getReplStats().contains(event));
}
}
}
verifyMBeanStatistics(testName, replDbName, nameStri, events, incrementalDump);
// Do some drop table/drop partition & rename table operations.
for (int i = 0; i < 3; i++) {
// Drop 3 tables
run("DROP TABLE " + dbName + ".ptned" + i, driver);
}
for (int i = 3; i < 6; i++) {
// Rename 3 tables
run("ALTER TABLE " + dbName + ".ptned" + i + " RENAME TO " + dbName + ".ptned" + i + "_renamed", driver);
}
for (int i = 6; i < 10; i++) {
// Drop partitions from 4 tables
run("ALTER TABLE " + dbName + ".ptned" + i + " DROP PARTITION(b=1)", driver);
}
for (int i = 10; i < 12; i++) {
// Create 2 tables
run("CREATE TABLE " + dbName + ".ptned" + i + "(a string) partitioned by (b int) STORED AS TEXTFILE", driver);
}
incrementalDump = incrementalLoadAndVerify(dbName, replDbName);
events = new String[] { "[[Event Name: EVENT_CREATE_TABLE; " + "Total Number: 2;", "[[Event Name: EVENT_DROP_TABLE; " + "Total Number: 3;", "[[Event Name: EVENT_RENAME_TABLE; " + "Total Number: 3;", "[[Event Name: EVENT_DROP_PARTITION; Total Number: 4;" };
itr = collector.getMetrics().iterator();
while (itr.hasNext()) {
ReplicationMetric elem = itr.next();
assertEquals(Metadata.ReplicationType.INCREMENTAL, elem.getMetadata().getReplicationType());
List<Stage> stages = elem.getProgress().getStages();
assertTrue(stages.size() != 0);
for (Stage stage : stages) {
for (String event : events) {
assertTrue(stage.getReplStats(), stage.getReplStats().contains(event));
}
}
}
verifyMBeanStatistics(testName, replDbName, nameStri, events, incrementalDump);
// Clean up the test setup.
ReplLoadWork.setMbeansParamsForTesting(false, false);
MBeans.unregister(ObjectName.getInstance(nameStri));
}
use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.
the class TestReplicationScenariosUsingSnapshots method assertIncrementalMetricsValues.
private void assertIncrementalMetricsValues(Metadata.ReplicationType replicationType, MetricCollector collector, int numCreated, int numDeleted) {
Iterator<ReplicationMetric> itr;
itr = collector.getMetrics().iterator();
while (itr.hasNext()) {
ReplicationMetric elem = itr.next();
assertEquals(replicationType, elem.getMetadata().getReplicationType());
List<Stage> stages = elem.getProgress().getStages();
for (Stage stage : stages) {
SnapshotUtils.ReplSnapshotCount count = stage.getReplSnapshotCount();
assertEquals(numCreated, count.getNumCreated());
assertEquals(numDeleted, count.getNumDeleted());
}
}
}
use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.
the class TestReplicationScenariosUsingSnapshots method testSnapshotMetrics.
@Test
public void testSnapshotMetrics() throws Throwable {
conf.set(Constants.SCHEDULED_QUERY_SCHEDULENAME, "metrics_test");
List<String> withClause = ReplicationTestUtils.includeExternalTableClause(true);
MetricCollector collector = MetricCollector.getInstance();
Path externalDatabaseLocation = new Path("/" + testName.getMethodName() + "/externalDatabase/");
DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
fs.mkdirs(externalDatabaseLocation, new FsPermission("777"));
Path externalTableLocation1 = new Path("/" + testName.getMethodName() + "/t1/");
fs = primary.miniDFSCluster.getFileSystem();
fs.mkdirs(externalTableLocation1, new FsPermission("777"));
withClause.add("'hive.repl.external.warehouse.single.copy.task.paths'='" + externalTableLocation1.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString() + "'");
WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create external table emp1 (id int)").run("insert into emp1 values(1),(2)").run("create external table exttab (place string) row format delimited fields terminated by ','" + " location '" + externalTableLocation1.toString() + "'").run("insert into exttab values('lucknow')").dump(primaryDbName, withClause);
// The boootstrap stage, 2 directories for which snapshot is enabled, the database directory and the one table
// as part of the config. This would be initial copy stage, so only 1 snapshot per directory and none to be deleted.
assertIncrementalMetricsValues(BOOTSTRAP, collector, 2, 0);
Iterator<ReplicationMetric> itr = collector.getMetrics().iterator();
while (itr.hasNext()) {
ReplicationMetric elem = itr.next();
assertEquals(BOOTSTRAP, elem.getMetadata().getReplicationType());
List<Stage> stages = elem.getProgress().getStages();
for (Stage stage : stages) {
SnapshotUtils.ReplSnapshotCount counts = stage.getReplSnapshotCount();
assertEquals(2, counts.getNumCreated());
assertEquals(0, counts.getNumDeleted());
}
}
// Load and check if the data and table are there.
replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables like 'emp1'").verifyResults(new String[] { "emp1" }).run("select id from emp1").verifyResults(new String[] { "1", "2" }).run("show tables like 'exttab'").verifyResults(new String[] { "exttab" }).run("select place from exttab").verifyResults(new String[] { "lucknow" }).verifyReplTargetProperty(replicatedDbName);
// Add some data and try incremental dump.
tuple = primary.run("use " + primaryDbName).run("insert into emp1 values(3),(4)").run("insert into exttab values('agra')").dump(primaryDbName, withClause);
// This is from the diff stage, 2 Directories where snapshots were enabled, 1 old snapshots got deleted and 1
// got created, so 2 created and 2 deleted.
assertIncrementalMetricsValues(INCREMENTAL, collector, 2, 2);
// Do a load
replica.load(replicatedDbName, primaryDbName, withClause);
// Remove the with clause, hence the external table specified as part of the config.
tuple = primary.run("use " + primaryDbName).run("insert into exttab values('lucknow')").dump(primaryDbName, null);
// Only one directory, i.e the database directory is going through snapshot based replication, so only 1 created
// for it and 1 old deleted for it, 2 deleted for the table removed from the snapshot based replication scope.
assertIncrementalMetricsValues(INCREMENTAL, collector, 1, 3);
}
use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.
the class TestScheduledReplicationScenarios method testExternalTablesReplLoadBootstrapIncr.
@Test
@Ignore("HIVE-23395")
public void testExternalTablesReplLoadBootstrapIncr() throws Throwable {
// Bootstrap
String withClause = " WITH('" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "' = 'true' ,'" + HiveConf.ConfVars.REPL_INCLUDE_ATLAS_METADATA + "' = 'true' , '" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_ENDPOINT + "' = 'http://localhost:21000/atlas'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_REPLICATED_TO_DB + "' = 'tgt'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
primary.run("use " + primaryDbName).run("create external table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
long lastReplId = Long.parseLong(primary.status(replicatedDbName).getOutput().get(0));
DumpMetaData dumpMetaData = new DumpMetaData(ackPath.getParent(), primary.hiveConf);
List<ReplicationMetric> replicationMetrics = MetricCollector.getInstance().getMetrics();
Assert.assertEquals(2, replicationMetrics.size());
// Generate expected metrics
List<ReplicationMetric> expectedReplicationMetrics = new ArrayList<>();
expectedReplicationMetrics.add(generateExpectedMetric("s1_t2", 0, primaryDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateDumpStages(true)));
expectedReplicationMetrics.add(generateExpectedMetric("s2_t2", dumpMetaData.getDumpExecutionId(), replicatedDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateLoadStages(true)));
checkMetrics(expectedReplicationMetrics, replicationMetrics);
// First incremental, after bootstrap
primary.run("use " + primaryDbName).run("insert into t2 values(3)").run("insert into t2 values(4)");
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
} finally {
primary.run("drop scheduled query s1_t2");
replica.run("drop scheduled query s2_t2");
}
}
use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.
the class TestScheduledReplicationScenarios method generateExpectedMetric.
private ReplicationMetric generateExpectedMetric(String policy, long dumpExecId, String dbName, Metadata.ReplicationType replicationType, String staging, long lastReplId, Status status, List<Stage> stages) {
Metadata metadata = new Metadata(dbName, replicationType, staging);
metadata.setLastReplId(lastReplId);
ReplicationMetric replicationMetric = new ReplicationMetric(0, policy, dumpExecId, metadata);
Progress progress = new Progress();
progress.setStatus(status);
for (Stage stage : stages) {
progress.addStage(stage);
}
replicationMetric.setProgress(progress);
return replicationMetric;
}
Aggregations