Search in sources :

Example 11 with ReplicationMetric

use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.

the class TestReplicationScenarios method testIncrementalStatisticsMetrics.

@Test
public void testIncrementalStatisticsMetrics() throws Throwable {
    isMetricsEnabledForTests(true);
    ReplLoadWork.setMbeansParamsForTesting(true, false);
    MetricCollector collector = MetricCollector.getInstance();
    String testName = "testIncrementalStatisticsMetrics";
    String dbName = createDB(testName, driver);
    String replDbName = dbName + "_dupe";
    String nameStri = "Hadoop:" + "service=HiveServer2" + "," + "name=" + "Database-" + replDbName + " Policy-pol";
    // Do a bootstrap dump & load
    Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName);
    ReplLoadWork.setMbeansParamsForTesting(true, true);
    // 10 create table
    for (int i = 0; i < 10; i++) {
        run("CREATE TABLE " + dbName + ".ptned" + i + "(a string) partitioned by (b int) STORED AS TEXTFILE", driver);
        for (int j = 0; j < 5; j++) {
            // Create 5 partitoins per table.
            run("ALTER TABLE " + dbName + ".ptned" + i + " ADD PARTITION(b=" + j + ")", driver);
        }
    }
    verifyRun("SHOW PARTITIONS " + dbName + ".ptned1", new String[] { "b=0", "b=1", "b=2", "b=3", "b=4" }, driver);
    // Do an incremental load & verify the metrics.
    Tuple incrementalDump = incrementalLoadAndVerify(dbName, replDbName);
    String[] events = new String[] { "[[Event Name: EVENT_CREATE_TABLE; " + "Total Number: 10;", "[[Event Name: EVENT_ADD_PARTITION; Total Number: 50;" };
    Iterator<ReplicationMetric> itr = collector.getMetrics().iterator();
    while (itr.hasNext()) {
        ReplicationMetric elem = itr.next();
        assertEquals(Metadata.ReplicationType.INCREMENTAL, elem.getMetadata().getReplicationType());
        List<Stage> stages = elem.getProgress().getStages();
        assertTrue(stages.size() != 0);
        for (Stage stage : stages) {
            for (String event : events) {
                assertTrue(stage.getReplStats(), stage.getReplStats().contains(event));
            }
        }
    }
    verifyMBeanStatistics(testName, replDbName, nameStri, events, incrementalDump);
    // Do some drop table/drop partition & rename table operations.
    for (int i = 0; i < 3; i++) {
        // Drop 3 tables
        run("DROP TABLE " + dbName + ".ptned" + i, driver);
    }
    for (int i = 3; i < 6; i++) {
        // Rename 3 tables
        run("ALTER TABLE " + dbName + ".ptned" + i + " RENAME TO " + dbName + ".ptned" + i + "_renamed", driver);
    }
    for (int i = 6; i < 10; i++) {
        // Drop partitions from 4 tables
        run("ALTER TABLE " + dbName + ".ptned" + i + " DROP PARTITION(b=1)", driver);
    }
    for (int i = 10; i < 12; i++) {
        // Create 2 tables
        run("CREATE TABLE " + dbName + ".ptned" + i + "(a string) partitioned by (b int) STORED AS TEXTFILE", driver);
    }
    incrementalDump = incrementalLoadAndVerify(dbName, replDbName);
    events = new String[] { "[[Event Name: EVENT_CREATE_TABLE; " + "Total Number: 2;", "[[Event Name: EVENT_DROP_TABLE; " + "Total Number: 3;", "[[Event Name: EVENT_RENAME_TABLE; " + "Total Number: 3;", "[[Event Name: EVENT_DROP_PARTITION; Total Number: 4;" };
    itr = collector.getMetrics().iterator();
    while (itr.hasNext()) {
        ReplicationMetric elem = itr.next();
        assertEquals(Metadata.ReplicationType.INCREMENTAL, elem.getMetadata().getReplicationType());
        List<Stage> stages = elem.getProgress().getStages();
        assertTrue(stages.size() != 0);
        for (Stage stage : stages) {
            for (String event : events) {
                assertTrue(stage.getReplStats(), stage.getReplStats().contains(event));
            }
        }
    }
    verifyMBeanStatistics(testName, replDbName, nameStri, events, incrementalDump);
    // Clean up the test setup.
    ReplLoadWork.setMbeansParamsForTesting(false, false);
    MBeans.unregister(ObjectName.getInstance(nameStri));
}
Also used : BootstrapLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector) ReplicationMetricCollector(org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector) MetricCollector(org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) Stage(org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) Test(org.junit.Test)

Example 12 with ReplicationMetric

use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.

the class TestReplicationScenariosUsingSnapshots method assertIncrementalMetricsValues.

private void assertIncrementalMetricsValues(Metadata.ReplicationType replicationType, MetricCollector collector, int numCreated, int numDeleted) {
    Iterator<ReplicationMetric> itr;
    itr = collector.getMetrics().iterator();
    while (itr.hasNext()) {
        ReplicationMetric elem = itr.next();
        assertEquals(replicationType, elem.getMetadata().getReplicationType());
        List<Stage> stages = elem.getProgress().getStages();
        for (Stage stage : stages) {
            SnapshotUtils.ReplSnapshotCount count = stage.getReplSnapshotCount();
            assertEquals(numCreated, count.getNumCreated());
            assertEquals(numDeleted, count.getNumDeleted());
        }
    }
}
Also used : SnapshotUtils(org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils) Stage(org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric)

Example 13 with ReplicationMetric

use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.

the class TestReplicationScenariosUsingSnapshots method testSnapshotMetrics.

@Test
public void testSnapshotMetrics() throws Throwable {
    conf.set(Constants.SCHEDULED_QUERY_SCHEDULENAME, "metrics_test");
    List<String> withClause = ReplicationTestUtils.includeExternalTableClause(true);
    MetricCollector collector = MetricCollector.getInstance();
    Path externalDatabaseLocation = new Path("/" + testName.getMethodName() + "/externalDatabase/");
    DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
    fs.mkdirs(externalDatabaseLocation, new FsPermission("777"));
    Path externalTableLocation1 = new Path("/" + testName.getMethodName() + "/t1/");
    fs = primary.miniDFSCluster.getFileSystem();
    fs.mkdirs(externalTableLocation1, new FsPermission("777"));
    withClause.add("'hive.repl.external.warehouse.single.copy.task.paths'='" + externalTableLocation1.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString() + "'");
    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName).run("create external table emp1 (id int)").run("insert into emp1 values(1),(2)").run("create external table exttab (place string) row format delimited fields terminated by ','" + " location '" + externalTableLocation1.toString() + "'").run("insert into exttab values('lucknow')").dump(primaryDbName, withClause);
    // The boootstrap stage, 2 directories for which snapshot is enabled, the database directory and the one table
    // as part of the config. This would be initial copy stage, so only 1 snapshot per directory and none to be deleted.
    assertIncrementalMetricsValues(BOOTSTRAP, collector, 2, 0);
    Iterator<ReplicationMetric> itr = collector.getMetrics().iterator();
    while (itr.hasNext()) {
        ReplicationMetric elem = itr.next();
        assertEquals(BOOTSTRAP, elem.getMetadata().getReplicationType());
        List<Stage> stages = elem.getProgress().getStages();
        for (Stage stage : stages) {
            SnapshotUtils.ReplSnapshotCount counts = stage.getReplSnapshotCount();
            assertEquals(2, counts.getNumCreated());
            assertEquals(0, counts.getNumDeleted());
        }
    }
    // Load and check if the data and table are there.
    replica.load(replicatedDbName, primaryDbName, withClause).run("use " + replicatedDbName).run("show tables like 'emp1'").verifyResults(new String[] { "emp1" }).run("select id from emp1").verifyResults(new String[] { "1", "2" }).run("show tables like 'exttab'").verifyResults(new String[] { "exttab" }).run("select place from exttab").verifyResults(new String[] { "lucknow" }).verifyReplTargetProperty(replicatedDbName);
    // Add some data and try incremental dump.
    tuple = primary.run("use " + primaryDbName).run("insert into emp1 values(3),(4)").run("insert into exttab values('agra')").dump(primaryDbName, withClause);
    // This is from the diff stage, 2 Directories where snapshots were enabled, 1 old snapshots got deleted and 1
    // got created, so 2 created and 2 deleted.
    assertIncrementalMetricsValues(INCREMENTAL, collector, 2, 2);
    // Do a load
    replica.load(replicatedDbName, primaryDbName, withClause);
    // Remove the with clause, hence the external table specified as part of the config.
    tuple = primary.run("use " + primaryDbName).run("insert into exttab values('lucknow')").dump(primaryDbName, null);
    // Only one directory, i.e the database directory is going through snapshot based replication, so only 1 created
    // for it and 1 old deleted for it, 2 deleted for the table removed from the snapshot based replication scope.
    assertIncrementalMetricsValues(INCREMENTAL, collector, 1, 3);
}
Also used : Path(org.apache.hadoop.fs.Path) ReplExternalTables.externalTableDataPath(org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables.externalTableDataPath) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric) SnapshotUtils(org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils) MetricCollector(org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector) Stage(org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.junit.Test)

Example 14 with ReplicationMetric

use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.

the class TestScheduledReplicationScenarios method testExternalTablesReplLoadBootstrapIncr.

@Test
@Ignore("HIVE-23395")
public void testExternalTablesReplLoadBootstrapIncr() throws Throwable {
    // Bootstrap
    String withClause = " WITH('" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "' = 'true' ,'" + HiveConf.ConfVars.REPL_INCLUDE_ATLAS_METADATA + "' = 'true' , '" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_ENDPOINT + "' = 'http://localhost:21000/atlas'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_REPLICATED_TO_DB + "' = 'tgt'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
    primary.run("use " + primaryDbName).run("create external table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
    try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
        int next = -1;
        ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
        primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
        replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
        Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
        FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
        next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
        Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
        waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
        replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
        long lastReplId = Long.parseLong(primary.status(replicatedDbName).getOutput().get(0));
        DumpMetaData dumpMetaData = new DumpMetaData(ackPath.getParent(), primary.hiveConf);
        List<ReplicationMetric> replicationMetrics = MetricCollector.getInstance().getMetrics();
        Assert.assertEquals(2, replicationMetrics.size());
        // Generate expected metrics
        List<ReplicationMetric> expectedReplicationMetrics = new ArrayList<>();
        expectedReplicationMetrics.add(generateExpectedMetric("s1_t2", 0, primaryDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateDumpStages(true)));
        expectedReplicationMetrics.add(generateExpectedMetric("s2_t2", dumpMetaData.getDumpExecutionId(), replicatedDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateLoadStages(true)));
        checkMetrics(expectedReplicationMetrics, replicationMetrics);
        // First incremental, after bootstrap
        primary.run("use " + primaryDbName).run("insert into t2 values(3)").run("insert into t2 values(4)");
        next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
        ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
        waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
        replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
    } finally {
        primary.run("drop scheduled query s1_t2");
        replica.run("drop scheduled query s2_t2");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ScheduledQueryExecutionService(org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService) FileSystem(org.apache.hadoop.fs.FileSystem) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ArrayList(java.util.ArrayList) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 15 with ReplicationMetric

use of org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric in project hive by apache.

the class TestScheduledReplicationScenarios method generateExpectedMetric.

private ReplicationMetric generateExpectedMetric(String policy, long dumpExecId, String dbName, Metadata.ReplicationType replicationType, String staging, long lastReplId, Status status, List<Stage> stages) {
    Metadata metadata = new Metadata(dbName, replicationType, staging);
    metadata.setLastReplId(lastReplId);
    ReplicationMetric replicationMetric = new ReplicationMetric(0, policy, dumpExecId, metadata);
    Progress progress = new Progress();
    progress.setStatus(status);
    for (Stage stage : stages) {
        progress.addStage(stage);
    }
    replicationMetric.setProgress(progress);
    return replicationMetric;
}
Also used : Progress(org.apache.hadoop.hive.ql.parse.repl.metric.event.Progress) Metadata(org.apache.hadoop.hive.ql.parse.repl.metric.event.Metadata) Stage(org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric)

Aggregations

ReplicationMetric (org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric)21 Stage (org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage)14 Progress (org.apache.hadoop.hive.ql.parse.repl.metric.event.Progress)12 Test (org.junit.Test)12 Metric (org.apache.hadoop.hive.ql.parse.repl.metric.event.Metric)10 HashMap (java.util.HashMap)8 SnapshotUtils (org.apache.hadoop.hive.ql.exec.repl.util.SnapshotUtils)8 Metadata (org.apache.hadoop.hive.ql.parse.repl.metric.event.Metadata)8 ReplStatsTracker (org.apache.hadoop.hive.ql.exec.repl.ReplStatsTracker)6 BootstrapDumpMetricCollector (org.apache.hadoop.hive.ql.parse.repl.dump.metric.BootstrapDumpMetricCollector)5 IncrementalDumpMetricCollector (org.apache.hadoop.hive.ql.parse.repl.dump.metric.IncrementalDumpMetricCollector)4 Path (org.apache.hadoop.fs.Path)3 BootstrapLoadMetricCollector (org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector)3 IncrementalLoadMetricCollector (org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector)3 Map (java.util.Map)2 MetricCollector (org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 FileSystem (org.apache.hadoop.fs.FileSystem)1