Search in sources :

Example 16 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class UpdatePartColStatHandler method handle.

@Override
public void handle(Context withinContext) throws Exception {
    LOG.info("Processing#{} UpdatePartitionTableColumnStat message : {}", fromEventId(), eventMessageAsJSON);
    org.apache.hadoop.hive.metastore.api.Table tableObj = eventMessage.getTableObject();
    if (tableObj == null) {
        LOG.debug("Event#{} was an event of type {} with no table listed", fromEventId(), event.getEventType());
        return;
    }
    // Statistics without any data does not make sense.
    if (withinContext.replicationSpec.isMetadataOnly() || Utils.shouldDumpMetaDataOnlyForExternalTables(new Table(tableObj), withinContext.hiveConf)) {
        return;
    }
    if (!Utils.shouldReplicate(withinContext.replicationSpec, new Table(tableObj), true, withinContext.getTablesForBootstrap(), withinContext.oldReplScope, withinContext.hiveConf)) {
        return;
    }
    DumpMetaData dmd = withinContext.createDmd(this);
    dmd.setPayload(eventMessageAsJSON);
    dmd.write();
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData)

Example 17 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class DropFunctionHandler method handle.

@Override
public void handle(Context withinContext) throws Exception {
    LOG.info("Processing#{} DROP_TABLE message : {}", fromEventId(), eventMessageAsJSON);
    DumpMetaData dmd = withinContext.createDmd(this);
    dmd.setPayload(eventMessageAsJSON);
    dmd.write();
}
Also used : DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData)

Example 18 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class ReplDumpTask method execute.

@Override
protected int execute(DriverContext driverContext) {
    try {
        Path dumpRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), getNextDumpDir());
        DumpMetaData dmd = new DumpMetaData(dumpRoot, conf);
        Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR));
        Long lastReplId;
        if (work.isBootStrapDump()) {
            lastReplId = bootStrapDump(dumpRoot, dmd, cmRoot);
        } else {
            lastReplId = incrementalDump(dumpRoot, dmd, cmRoot);
        }
        prepareReturnValues(Arrays.asList(dumpRoot.toUri().toString(), String.valueOf(lastReplId)), dumpSchema);
    } catch (Exception e) {
        LOG.error("failed", e);
        setException(e);
        return 1;
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Example 19 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestReplicationScenariosAcidTables method testFailoverFailureInReverseReplication.

@Test
public void testFailoverFailureInReverseReplication() throws Throwable {
    List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
    List<String> retainPrevDumpDir = Arrays.asList("'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
    WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
    // This dump is not failover ready as target db can be used for replication only after first incremental load.
    FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
    Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    assertTrue(MetaStoreUtils.isTargetOfReplication(replica.getDatabase(replicatedDbName)));
    dumpData = primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").dump(primaryDbName, failoverConfigs);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    assertTrue(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11" });
    Database db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    primary.run("drop database if exists " + primaryDbName + " cascade");
    WarehouseInstance.Tuple reverseDumpData = replica.run("use " + replicatedDbName).run("insert into t2 partition(name='Bob') values(20)").run("create table t3 (id int)").run("insert into t3 values (10)").dump(replicatedDbName, retainPrevDumpDir);
    assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    assertTrue(fs.exists(dumpPath));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    Path dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
    assertTrue(fs.exists(dumpAckFile));
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(MetaStoreUtils.isTargetOfReplication(db));
    fs.delete(dumpAckFile, false);
    assertFalse(fs.exists(dumpAckFile));
    WarehouseInstance.Tuple preFailoverDumpData = dumpData;
    dumpData = replica.dump(replicatedDbName, retainPrevDumpDir);
    assertNotEquals(dumpData.dumpLocation, preFailoverDumpData.dumpLocation);
    assertTrue(fs.exists(new Path(preFailoverDumpData.dumpLocation)));
    assertEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
    assertTrue(fs.exists(dumpAckFile));
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(MetaStoreUtils.isTargetOfReplication(db));
    primary.load(primaryDbName, replicatedDbName).run("use " + primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + primaryDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11", "20" }).run("select id from t3").verifyResults(new String[] { "10" });
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
    reverseDumpData = replica.run("insert into t3 values (3)").run("insert into t2 partition(name='Bob') values(30)").dump(replicatedDbName, retainPrevDumpDir);
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
    assertFalse(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
    fs.delete(dumpAckFile);
    replica.run("ALTER DATABASE " + replicatedDbName + " SET DBPROPERTIES('" + ReplConst.REPL_FAILOVER_ENDPOINT + "'='" + MetaStoreUtils.FailoverEndpoint.TARGET + "')");
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(fs.exists(dumpAckFile));
    dumpData = replica.dump(replicatedDbName, retainPrevDumpDir);
    assertEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    fs.exists(dumpAckFile);
    assertFalse(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
    primary.load(primaryDbName, replicatedDbName).run("select rank from t2 order by rank").verifyResults(new String[] { "11", "20", "30" }).run("select id from t3").verifyResults(new String[] { "10", "3" });
    ;
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) Database(org.apache.hadoop.hive.metastore.api.Database) Test(org.junit.Test)

Example 20 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestScheduledReplicationScenarios method testExternalTablesReplLoadBootstrapIncr.

@Test
@Ignore("HIVE-23395")
public void testExternalTablesReplLoadBootstrapIncr() throws Throwable {
    // Bootstrap
    String withClause = " WITH('" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "' = 'true' ,'" + HiveConf.ConfVars.REPL_INCLUDE_ATLAS_METADATA + "' = 'true' , '" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_ENDPOINT + "' = 'http://localhost:21000/atlas'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_REPLICATED_TO_DB + "' = 'tgt'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
    primary.run("use " + primaryDbName).run("create external table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
    try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
        int next = -1;
        ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
        primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
        replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
        Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
        FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
        next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
        Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
        waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
        replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
        long lastReplId = Long.parseLong(primary.status(replicatedDbName).getOutput().get(0));
        DumpMetaData dumpMetaData = new DumpMetaData(ackPath.getParent(), primary.hiveConf);
        List<ReplicationMetric> replicationMetrics = MetricCollector.getInstance().getMetrics();
        Assert.assertEquals(2, replicationMetrics.size());
        // Generate expected metrics
        List<ReplicationMetric> expectedReplicationMetrics = new ArrayList<>();
        expectedReplicationMetrics.add(generateExpectedMetric("s1_t2", 0, primaryDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateDumpStages(true)));
        expectedReplicationMetrics.add(generateExpectedMetric("s2_t2", dumpMetaData.getDumpExecutionId(), replicatedDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateLoadStages(true)));
        checkMetrics(expectedReplicationMetrics, replicationMetrics);
        // First incremental, after bootstrap
        primary.run("use " + primaryDbName).run("insert into t2 values(3)").run("insert into t2 values(4)");
        next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
        ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
        waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
        replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
    } finally {
        primary.run("drop scheduled query s1_t2");
        replica.run("drop scheduled query s2_t2");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ScheduledQueryExecutionService(org.apache.hadoop.hive.ql.scheduled.ScheduledQueryExecutionService) FileSystem(org.apache.hadoop.fs.FileSystem) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ArrayList(java.util.ArrayList) ReplicationMetric(org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

DumpMetaData (org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData)39 Path (org.apache.hadoop.fs.Path)17 FileSystem (org.apache.hadoop.fs.FileSystem)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 Table (org.apache.hadoop.hive.ql.metadata.Table)5 Database (org.apache.hadoop.hive.metastore.api.Database)4 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 IOException (java.io.IOException)3 ReplScope (org.apache.hadoop.hive.common.repl.ReplScope)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 Partition (org.apache.hadoop.hive.ql.metadata.Partition)3 HashMap (java.util.HashMap)2 List (java.util.List)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 FailoverMetaData (org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData)2 FileNotFoundException (java.io.FileNotFoundException)1 URI (java.net.URI)1