Search in sources :

Example 1 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestReplicationScenarios method deleteNewMetadataFields.

private void deleteNewMetadataFields(Tuple dump) throws SemanticException {
    Path dumpHiveDir = new Path(dump.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    DumpMetaData dmd = new DumpMetaData(dumpHiveDir, hconf);
    Path dumpMetaPath = new Path(dumpHiveDir, DUMP_METADATA);
    List<List<String>> listValues = new ArrayList<>();
    DumpType dumpType = dmd.getDumpType();
    Long eventFrom = dmd.getEventFrom();
    Long eventTo = dmd.getEventTo();
    String cmRoot = "testCmRoot";
    String payload = dmd.getPayload();
    Long dumpExecutionId = dmd.getDumpExecutionId();
    ReplScope replScope = dmd.getReplScope();
    listValues.add(Arrays.asList(dumpType.toString(), eventFrom.toString(), eventTo.toString(), cmRoot, dumpExecutionId.toString(), payload));
    if (replScope != null) {
        listValues.add(dmd.prepareReplScopeValues());
    }
    org.apache.hadoop.hive.ql.parse.repl.dump.Utils.writeOutput(listValues, dumpMetaPath, hconf, true);
}
Also used : Path(org.apache.hadoop.fs.Path) ReplScope(org.apache.hadoop.hive.common.repl.ReplScope) DumpType(org.apache.hadoop.hive.ql.parse.repl.DumpType) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestReplicationScenariosAcidTables method testFailoverFailureBeforeReverseReplication.

@Test
public void testFailoverFailureBeforeReverseReplication() throws Throwable {
    List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
    List<String> retainPrevDumpDir = Arrays.asList("'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
    WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
    // This dump is not failover ready as target db can be used for replication only after first incremental load.
    FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
    Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    assertTrue(MetaStoreUtils.isTargetOfReplication(replica.getDatabase(replicatedDbName)));
    dumpData = primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").dump(primaryDbName, failoverConfigs);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    Path dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
    Path failoverMdFile = new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA);
    assertTrue(fs.exists(dumpAckFile));
    assertTrue(fs.exists(failoverMdFile));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    FailoverMetaData previousFmd = new FailoverMetaData(dumpPath, conf);
    Long failoverEventId = previousFmd.getFailoverEventId();
    assertTrue(failoverEventId >= Long.parseLong(dumpData.lastReplicationId));
    Long failoverMdModifTime = fs.getFileStatus(failoverMdFile).getModificationTime();
    fs.delete(dumpAckFile, false);
    dumpData = primary.run("use " + primaryDbName).run("insert into t2 partition(name='Carl') values(10)").run("create table t3 (id int)").run("insert into t3 values (2)").dump(primaryDbName, failoverConfigs);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    failoverMdFile = new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    assertTrue(fs.exists(failoverMdFile));
    Assert.assertEquals(failoverMdModifTime, (Long) fs.getFileStatus(failoverMdFile).getModificationTime());
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    assertTrue(failoverEventId >= Long.parseLong(dumpData.lastReplicationId));
    FailoverMetaData currentFmd = new FailoverMetaData(dumpPath, conf);
    assertTrue(currentFmd.equals(previousFmd));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11" });
    Database db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    primary.run("drop database if exists " + primaryDbName + " cascade");
    WarehouseInstance.Tuple reverseDumpData = replica.run("use " + replicatedDbName).run("insert into t2 partition(name='Carl') values(12)").run("create table t3 (id int)").run("insert into t3 values (10)").dump(replicatedDbName, retainPrevDumpDir);
    assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    assertTrue(fs.exists(dumpPath));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
    assertTrue(fs.exists(dumpAckFile));
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(MetaStoreUtils.isTargetOfReplication(db));
    primary.load(primaryDbName, replicatedDbName).run("use " + primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + primaryDbName).verifyResult(reverseDumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11", "12" }).run("select id from t3").verifyResults(new String[] { "10" });
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
    reverseDumpData = replica.run("insert into t3 values (15)").dump(replicatedDbName, retainPrevDumpDir);
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString()));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(replica.getDatabase(replicatedDbName)));
    primary.load(primaryDbName, replicatedDbName).run("select id from t3").verifyResults(new String[] { "10", "15" });
    ;
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
}
Also used : Path(org.apache.hadoop.fs.Path) FailoverMetaData(org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData) FileSystem(org.apache.hadoop.fs.FileSystem) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) Database(org.apache.hadoop.hive.metastore.api.Database) Test(org.junit.Test)

Example 3 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestReplicationScenariosAcidTables method testFailoverRollback.

@Test
public void testFailoverRollback() throws Throwable {
    List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
    WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
    FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
    Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    Database db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(db));
    dumpData = primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)").dump(primaryDbName, failoverConfigs);
    primary.run("insert into t2 partition(name='Marie') values(40)");
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    assertTrue(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" }).run("show partitions t2").verifyResults(new String[] { "name=Bob", "name=Carl" });
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    dumpData = primary.run("create table t3(id int)").run("insert into t3 values (3)").dump(primaryDbName);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertEquals(new DumpMetaData(dumpPath, conf).getDumpType(), DumpType.INCREMENTAL);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("select id from t3").verifyResults(new String[] { "3" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11", "40" }).run("show partitions t2").verifyResults(new String[] { "name=Bob", "name=Carl", "name=Marie" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(db));
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) Database(org.apache.hadoop.hive.metastore.api.Database) Test(org.junit.Test)

Example 4 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class TestReplicationScenariosAcidTables method testCompleteFailoverWithReverseBootstrap.

@Test
public void testCompleteFailoverWithReverseBootstrap() throws Throwable {
    HiveConf primaryConf = primary.getConf();
    TxnStore txnHandler = TxnUtils.getTxnStore(primary.getConf());
    List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
    WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
    // This dump is not failover ready as target db can be used for replication only after first incremental load.
    FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
    Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
    Database db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(db));
    primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").run("insert into t2 partition(name='Carl') values(10)");
    /**
     *Open transactions can be of two types:
     *     Case 1) Txns that have not acquired HIVE LOCKS or they belong to different db: These txns would be captured in
     *     _failovermetadata file inside dump directory.
     *     Case 2) Txns that have acquired HIVE LOCKS and belong to db under replication: These txns would be aborted by hive
     *     as part of dump operation.
     */
    // Open 3 txns for Database which is not under replication
    int numTxnsForSecDb = 3;
    List<Long> txnsForSecDb = openTxns(numTxnsForSecDb, txnHandler, primaryConf);
    // Allocate write ids for both tables of secondary db for 3 txns
    // t1=5 and t2=5
    Map<String, Long> tablesInSecDb = new HashMap<>();
    tablesInSecDb.put("t1", (long) numTxnsForSecDb);
    tablesInSecDb.put("t2", (long) numTxnsForSecDb);
    List<Long> lockIdsForSecDb = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName + "_extra", tablesInSecDb, txnHandler, txnsForSecDb, primaryConf);
    // Open 2 txns for Primary Db
    int numTxnsForPrimaryDb = 2;
    List<Long> txnsForPrimaryDb = openTxns(numTxnsForPrimaryDb, txnHandler, primaryConf);
    // Allocate write ids for both tables of primary db for 2 txns
    // t1=5 and t2=5
    Map<String, Long> tablesInPrimaryDb = new HashMap<>();
    tablesInPrimaryDb.put("t1", (long) numTxnsForPrimaryDb + 1);
    tablesInPrimaryDb.put("t2", (long) numTxnsForPrimaryDb + 2);
    List<Long> lockIdsForPrimaryDb = allocateWriteIdsForTablesAndAcquireLocks(primaryDbName, tablesInPrimaryDb, txnHandler, txnsForPrimaryDb, primaryConf);
    // Open 1 txn with no hive locks acquired
    List<Long> txnsWithNoLocks = openTxns(1, txnHandler, primaryConf);
    dumpData = primary.dump(primaryDbName, failoverConfigs);
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    assertTrue(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    FailoverMetaData failoverMD = new FailoverMetaData(dumpPath, conf);
    List<Long> openTxns = failoverMD.getOpenTxns();
    List<Long> txnsAborted = failoverMD.getAbortedTxns();
    assertTrue(txnsAborted.size() == 2);
    assertTrue(txnsAborted.containsAll(txnsForPrimaryDb));
    assertTrue(openTxns.size() == 4);
    assertTrue(openTxns.containsAll(txnsForSecDb));
    assertTrue(openTxns.containsAll(txnsWithNoLocks));
    assertTrue(failoverMD.getTxnsWithoutLock().equals(txnsWithNoLocks));
    // TxnsForPrimaryDb and txnsWithNoLocks would have been aborted by dump operation.
    verifyAllOpenTxnsAborted(txnsForPrimaryDb, primaryConf);
    verifyAllOpenTxnsNotAborted(txnsForSecDb, primaryConf);
    verifyAllOpenTxnsNotAborted(txnsWithNoLocks, primaryConf);
    // Abort the txns
    txnHandler.abortTxns(new AbortTxnsRequest(txnsForSecDb));
    txnHandler.abortTxns(new AbortTxnsRequest(txnsWithNoLocks));
    verifyAllOpenTxnsAborted(txnsForSecDb, primaryConf);
    verifyAllOpenTxnsAborted(txnsWithNoLocks, primaryConf);
    releaseLocks(txnHandler, lockIdsForSecDb);
    replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11" });
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isTargetOfReplication(db));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    Path dbRootDir = new Path(dumpData.dumpLocation).getParent();
    long prevDumpDirModifTime = getLatestDumpDirModifTime(dbRootDir);
    primary.run("REPL DUMP " + primaryDbName + " with ('" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "' = 'true')");
    Assert.assertEquals(dumpData.dumpLocation, ReplUtils.getLatestDumpPath(dbRootDir, conf).toString());
    Assert.assertEquals(prevDumpDirModifTime, getLatestDumpDirModifTime(dbRootDir));
    dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
    primary.run("drop database if exists " + primaryDbName + " cascade");
    assertTrue(primary.getDatabase(primaryDbName) == null);
    assertFalse(ReplChangeManager.isSourceOfReplication(replica.getDatabase(replicatedDbName)));
    WarehouseInstance.Tuple reverseDumpData = replica.run("create table t3 (id int)").run("insert into t2 partition(name='Bob') values(20)").run("insert into t3 values (2)").dump(replicatedDbName);
    assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
    assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
    assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
    assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
    db = replica.getDatabase(replicatedDbName);
    assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
    assertFalse(MetaStoreUtils.isTargetOfReplication(db));
    primary.load(primaryDbName, replicatedDbName).run("use " + primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + primaryDbName).verifyResult(reverseDumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11", "20" }).run("select id from t3").verifyResults(new String[] { "2" });
    Database primaryDb = primary.getDatabase(primaryDbName);
    assertFalse(primaryDb == null);
    assertTrue(ReplUtils.isFirstIncPending(primaryDb.getParameters()));
    assertTrue(MetaStoreUtils.isTargetOfReplication(primaryDb));
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(primaryDb));
    assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
    assertFalse(ReplChangeManager.isSourceOfReplication(primaryDb));
    assertTrue(ReplChangeManager.isSourceOfReplication(replica.getDatabase(replicatedDbName)));
    reverseDumpData = replica.run("insert into t3 values (3)").run("insert into t2 partition(name='Bob') values(30)").dump(replicatedDbName);
    assertFalse(MetaStoreUtils.isDbBeingFailedOver(replica.getDatabase(replicatedDbName)));
    primary.load(primaryDbName, replicatedDbName).run("select rank from t2 order by rank").verifyResults(new String[] { "10", "11", "20", "30" }).run("select id from t3").verifyResults(new String[] { "2", "3" }).run("repl status " + primaryDbName).verifyResult(reverseDumpData.lastReplicationId);
    assertFalse(ReplUtils.isFirstIncPending(primary.getDatabase(primaryDbName).getParameters()));
}
Also used : Path(org.apache.hadoop.fs.Path) FailoverMetaData(org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData) HashMap(java.util.HashMap) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) AbortTxnsRequest(org.apache.hadoop.hive.metastore.api.AbortTxnsRequest) FileSystem(org.apache.hadoop.fs.FileSystem) Database(org.apache.hadoop.hive.metastore.api.Database) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) Test(org.junit.Test)

Example 5 with DumpMetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.

the class ReplDumpTask method execute.

@Override
public int execute() {
    try {
        SecurityUtils.reloginExpiringKeytabUser();
        if (work.dataCopyIteratorsInitialized()) {
            initiateDataCopyTasks();
        } else {
            Path dumpRoot = ReplUtils.getEncodedDumpRootPath(conf, work.dbNameOrPattern.toLowerCase());
            if (ReplUtils.failedWithNonRecoverableError(ReplUtils.getLatestDumpPath(dumpRoot, conf), conf)) {
                LOG.error("Previous dump failed with non recoverable error. Needs manual intervention. ");
                setException(new SemanticException(ErrorMsg.REPL_FAILED_WITH_NON_RECOVERABLE_ERROR.format()));
                return ErrorMsg.REPL_FAILED_WITH_NON_RECOVERABLE_ERROR.getErrorCode();
            }
            Path previousValidHiveDumpPath = getPreviousValidDumpMetadataPath(dumpRoot);
            boolean isFailoverMarkerPresent = false;
            boolean isFailover = isFailover(work.dbNameOrPattern, getHive());
            LOG.debug("Database is {} going through failover", isFailover ? "" : "not");
            if (previousValidHiveDumpPath == null && !isFailover) {
                work.setBootstrap(true);
            } else {
                work.setOldReplScope(isFailover ? null : new DumpMetaData(previousValidHiveDumpPath, conf).getReplScope());
                isFailoverMarkerPresent = !isFailover && isDumpFailoverReady(previousValidHiveDumpPath);
            }
            // 2. Previous dump is already loaded and it is not in failover ready status.
            if (shouldDump(previousValidHiveDumpPath, isFailoverMarkerPresent, isFailover)) {
                Path currentDumpPath = getCurrentDumpPath(dumpRoot, work.isBootstrap());
                Path hiveDumpRoot = new Path(currentDumpPath, ReplUtils.REPL_HIVE_BASE_DIR);
                if (!work.isBootstrap() && !isFailover) {
                    preProcessFailoverIfRequired(previousValidHiveDumpPath, isFailoverMarkerPresent);
                }
                // Set distCp custom name corresponding to the replication policy.
                String mapRedCustomName = ReplUtils.getDistCpCustomName(conf, work.dbNameOrPattern);
                conf.set(JobContext.JOB_NAME, mapRedCustomName);
                work.setCurrentDumpPath(currentDumpPath);
                work.setMetricCollector(initMetricCollection(work.isBootstrap(), hiveDumpRoot));
                if (shouldDumpAtlasMetadata()) {
                    addAtlasDumpTask(work.isBootstrap(), previousValidHiveDumpPath);
                    LOG.info("Added task to dump atlas metadata.");
                }
                if (shouldDumpAuthorizationMetadata()) {
                    initiateAuthorizationDumpTask();
                }
                DumpMetaData dmd = new DumpMetaData(hiveDumpRoot, conf);
                // Initialize ReplChangeManager instance since we will require it to encode file URI.
                ReplChangeManager.getInstance(conf);
                Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR));
                Long lastReplId;
                LOG.info("Data copy at load enabled : {}", conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET));
                if (isFailover) {
                    if (createEventMarker) {
                        LOG.info("Optimised Bootstrap Dump triggered for {}.", work.dbNameOrPattern);
                        // Before starting optimised bootstrap, check if the first incremental is done to ensure database is in
                        // consistent state.
                        isFirstIncrementalPending(work.dbNameOrPattern, getHive());
                        // Get the last replicated event id from the database.
                        String dbEventId = getReplEventIdFromDatabase(work.dbNameOrPattern, getHive());
                        // Get the last replicated event id from the database with respect to target.
                        String targetDbEventId = getTargetEventId(work.dbNameOrPattern, getHive());
                        // Check if the tableDiff directory is present or not.
                        boolean isTableDiffDirectoryPresent = checkFileExists(currentDumpPath, conf, TABLE_DIFF_COMPLETE_DIRECTORY);
                        LOG.info("Creating event_ack file for database {} with event id {}.", work.dbNameOrPattern, dbEventId);
                        lastReplId = createAndGetEventAckFile(currentDumpPath, dmd, cmRoot, dbEventId, targetDbEventId, conf, work);
                        finishRemainingTasks();
                    } else {
                        // We should be here only if TableDiff is Present.
                        boolean isTableDiffDirectoryPresent = checkFileExists(previousValidHiveDumpPath.getParent(), conf, TABLE_DIFF_COMPLETE_DIRECTORY);
                        assert isTableDiffDirectoryPresent;
                        // Set boolean to determine the db properties need to sorted once dump is complete
                        unsetDbPropertiesForOptimisedBootstrap = true;
                        long fromEventId = Long.parseLong(getEventIdFromFile(previousValidHiveDumpPath.getParent(), conf)[1]);
                        LOG.info("Starting optimised bootstrap from event id {} for database {}", fromEventId, work.dbNameOrPattern);
                        work.setEventFrom(fromEventId);
                        // Get the tables to be bootstrapped from the table diff
                        tablesForBootstrap = getTablesFromTableDiffFile(previousValidHiveDumpPath.getParent(), conf);
                        // Generate the bootstrapped table list and put it in the new dump directory for the load to consume.
                        createBootstrapTableList(currentDumpPath, tablesForBootstrap, conf);
                        // Call the normal dump with the tablesForBootstrap set.
                        lastReplId = incrementalDump(hiveDumpRoot, dmd, cmRoot, getHive());
                    }
                } else if (work.isBootstrap()) {
                    lastReplId = bootStrapDump(hiveDumpRoot, dmd, cmRoot, getHive());
                } else {
                    work.setEventFrom(getEventFromPreviousDumpMetadata(previousValidHiveDumpPath));
                    lastReplId = incrementalDump(hiveDumpRoot, dmd, cmRoot, getHive());
                }
                // The datacopy doesn't need to be initialised in case of optimised bootstrap first dump.
                if (lastReplId >= 0) {
                    work.setResultValues(Arrays.asList(currentDumpPath.toUri().toString(), String.valueOf(lastReplId)));
                    initiateDataCopyTasks();
                }
            } else {
                if (isFailoverMarkerPresent) {
                    LOG.info("Previous Dump is failover ready. Skipping this iteration.");
                } else {
                    LOG.info("Previous Dump is not yet loaded. Skipping this iteration.");
                }
            }
        }
    } catch (RuntimeException e) {
        LOG.error("replication failed with run time exception", e);
        setException(e);
        try {
            ReplUtils.handleException(true, e, work.getCurrentDumpPath().toString(), work.getMetricCollector(), getName(), conf);
        } catch (Exception ex) {
            LOG.error("Failed to collect replication metrics: ", ex);
        }
        throw e;
    } catch (Exception e) {
        setException(e);
        int errorCode;
        if (e instanceof SnapshotException) {
            errorCode = ErrorMsg.getErrorMsg("SNAPSHOT_ERROR").getErrorCode();
        } else {
            errorCode = ErrorMsg.getErrorMsg(e.getMessage()).getErrorCode();
        }
        try {
            return ReplUtils.handleException(true, e, work.getCurrentDumpPath().toString(), work.getMetricCollector(), getName(), conf);
        } catch (Exception ex) {
            LOG.error("Failed to collect replication metrics: ", ex);
            return errorCode;
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ReplChangeManager.getReplPolicyIdString(org.apache.hadoop.hive.metastore.ReplChangeManager.getReplPolicyIdString) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) TException(org.apache.thrift.TException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SnapshotException(org.apache.hadoop.hdfs.protocol.SnapshotException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SnapshotException(org.apache.hadoop.hdfs.protocol.SnapshotException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

DumpMetaData (org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData)39 Path (org.apache.hadoop.fs.Path)17 FileSystem (org.apache.hadoop.fs.FileSystem)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 Table (org.apache.hadoop.hive.ql.metadata.Table)5 Database (org.apache.hadoop.hive.metastore.api.Database)4 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 IOException (java.io.IOException)3 ReplScope (org.apache.hadoop.hive.common.repl.ReplScope)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 Partition (org.apache.hadoop.hive.ql.metadata.Partition)3 HashMap (java.util.HashMap)2 List (java.util.List)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 FailoverMetaData (org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData)2 FileNotFoundException (java.io.FileNotFoundException)1 URI (java.net.URI)1