use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.
the class UpdatePartColStatHandler method handle.
@Override
public void handle(Context withinContext) throws Exception {
LOG.info("Processing#{} UpdatePartitionTableColumnStat message : {}", fromEventId(), eventMessageAsJSON);
org.apache.hadoop.hive.metastore.api.Table tableObj = eventMessage.getTableObject();
if (tableObj == null) {
LOG.debug("Event#{} was an event of type {} with no table listed", fromEventId(), event.getEventType());
return;
}
// Statistics without any data does not make sense.
if (withinContext.replicationSpec.isMetadataOnly() || Utils.shouldDumpMetaDataOnlyForExternalTables(new Table(tableObj), withinContext.hiveConf)) {
return;
}
if (!Utils.shouldReplicate(withinContext.replicationSpec, new Table(tableObj), true, withinContext.getTablesForBootstrap(), withinContext.oldReplScope, withinContext.hiveConf)) {
return;
}
DumpMetaData dmd = withinContext.createDmd(this);
dmd.setPayload(eventMessageAsJSON);
dmd.write();
}
use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.
the class DropFunctionHandler method handle.
@Override
public void handle(Context withinContext) throws Exception {
LOG.info("Processing#{} DROP_TABLE message : {}", fromEventId(), eventMessageAsJSON);
DumpMetaData dmd = withinContext.createDmd(this);
dmd.setPayload(eventMessageAsJSON);
dmd.write();
}
use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.
the class ReplDumpTask method execute.
@Override
protected int execute(DriverContext driverContext) {
try {
Path dumpRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), getNextDumpDir());
DumpMetaData dmd = new DumpMetaData(dumpRoot, conf);
Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR));
Long lastReplId;
if (work.isBootStrapDump()) {
lastReplId = bootStrapDump(dumpRoot, dmd, cmRoot);
} else {
lastReplId = incrementalDump(dumpRoot, dmd, cmRoot);
}
prepareReturnValues(Arrays.asList(dumpRoot.toUri().toString(), String.valueOf(lastReplId)), dumpSchema);
} catch (Exception e) {
LOG.error("failed", e);
setException(e);
return 1;
}
return 0;
}
use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.
the class TestReplicationScenariosAcidTables method testFailoverFailureInReverseReplication.
@Test
public void testFailoverFailureInReverseReplication() throws Throwable {
List<String> failoverConfigs = Arrays.asList("'" + HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
List<String> retainPrevDumpDir = Arrays.asList("'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'", "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName).run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + "tblproperties (\"transactional\"=\"true\")").run("create table t2 (rank int) partitioned by (name string) tblproperties(\"transactional\"=\"true\", " + "\"transactional_properties\"=\"insert_only\")").dump(primaryDbName, failoverConfigs);
// This dump is not failover ready as target db can be used for replication only after first incremental load.
FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
Path dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
assertFalse(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
replica.load(replicatedDbName, primaryDbName).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId);
assertTrue(MetaStoreUtils.isTargetOfReplication(replica.getDatabase(replicatedDbName)));
dumpData = primary.run("use " + primaryDbName).run("insert into t1 values(1)").run("insert into t2 partition(name='Bob') values(11)").dump(primaryDbName, failoverConfigs);
dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString())));
assertTrue(fs.exists(new Path(dumpPath, FailoverMetaData.FAILOVER_METADATA)));
assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName), MetaStoreUtils.FailoverEndpoint.SOURCE));
replica.load(replicatedDbName, primaryDbName, failoverConfigs).run("use " + replicatedDbName).run("show tables").verifyResults(new String[] { "t1", "t2" }).run("repl status " + replicatedDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11" });
Database db = replica.getDatabase(replicatedDbName);
assertTrue(MetaStoreUtils.isTargetOfReplication(db));
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
assertTrue(fs.exists(new Path(dumpPath, ReplAck.LOAD_ACKNOWLEDGEMENT.toString())));
primary.run("drop database if exists " + primaryDbName + " cascade");
WarehouseInstance.Tuple reverseDumpData = replica.run("use " + replicatedDbName).run("insert into t2 partition(name='Bob') values(20)").run("create table t3 (id int)").run("insert into t3 values (10)").dump(replicatedDbName, retainPrevDumpDir);
assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
assertTrue(fs.exists(dumpPath));
assertTrue(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
Path dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
assertTrue(fs.exists(dumpAckFile));
assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
db = replica.getDatabase(replicatedDbName);
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
assertFalse(MetaStoreUtils.isTargetOfReplication(db));
fs.delete(dumpAckFile, false);
assertFalse(fs.exists(dumpAckFile));
WarehouseInstance.Tuple preFailoverDumpData = dumpData;
dumpData = replica.dump(replicatedDbName, retainPrevDumpDir);
assertNotEquals(dumpData.dumpLocation, preFailoverDumpData.dumpLocation);
assertTrue(fs.exists(new Path(preFailoverDumpData.dumpLocation)));
assertEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
assertFalse(fs.exists(new Path(dumpPath, ReplAck.FAILOVER_READY_MARKER.toString())));
assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == DumpType.BOOTSTRAP);
assertTrue(fs.exists(dumpAckFile));
db = replica.getDatabase(replicatedDbName);
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET));
assertFalse(MetaStoreUtils.isTargetOfReplication(db));
primary.load(primaryDbName, replicatedDbName).run("use " + primaryDbName).run("show tables").verifyResults(new String[] { "t1", "t2", "t3" }).run("repl status " + primaryDbName).verifyResult(dumpData.lastReplicationId).run("select id from t1").verifyResults(new String[] { "1" }).run("select rank from t2 order by rank").verifyResults(new String[] { "11", "20" }).run("select id from t3").verifyResults(new String[] { "10" });
assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
reverseDumpData = replica.run("insert into t3 values (3)").run("insert into t2 partition(name='Bob') values(30)").dump(replicatedDbName, retainPrevDumpDir);
dumpPath = new Path(reverseDumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
assertFalse(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
fs.delete(dumpAckFile);
replica.run("ALTER DATABASE " + replicatedDbName + " SET DBPROPERTIES('" + ReplConst.REPL_FAILOVER_ENDPOINT + "'='" + MetaStoreUtils.FailoverEndpoint.TARGET + "')");
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
assertFalse(fs.exists(dumpAckFile));
dumpData = replica.dump(replicatedDbName, retainPrevDumpDir);
assertEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
fs.exists(dumpAckFile);
assertFalse(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName), MetaStoreUtils.FailoverEndpoint.TARGET));
primary.load(primaryDbName, replicatedDbName).run("select rank from t2 order by rank").verifyResults(new String[] { "11", "20", "30" }).run("select id from t3").verifyResults(new String[] { "10", "3" });
;
assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString())));
}
use of org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData in project hive by apache.
the class TestScheduledReplicationScenarios method testExternalTablesReplLoadBootstrapIncr.
@Test
@Ignore("HIVE-23395")
public void testExternalTablesReplLoadBootstrapIncr() throws Throwable {
// Bootstrap
String withClause = " WITH('" + HiveConf.ConfVars.REPL_INCLUDE_AUTHORIZATION_METADATA + "' = 'true' ,'" + HiveConf.ConfVars.REPL_INCLUDE_ATLAS_METADATA + "' = 'true' , '" + HiveConf.ConfVars.HIVE_IN_TEST + "' = 'true'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_ENDPOINT + "' = 'http://localhost:21000/atlas'" + ",'" + HiveConf.ConfVars.REPL_ATLAS_REPLICATED_TO_DB + "' = 'tgt'" + ",'" + HiveConf.ConfVars.REPL_SOURCE_CLUSTER_NAME + "' = 'cluster0'" + ",'" + HiveConf.ConfVars.REPL_TARGET_CLUSTER_NAME + "' = 'cluster1')";
primary.run("use " + primaryDbName).run("create external table t2 (id int)").run("insert into t2 values(1)").run("insert into t2 values(2)");
try (ScheduledQueryExecutionService schqS = ScheduledQueryExecutionService.startScheduledQueryExecutorService(primary.hiveConf)) {
int next = -1;
ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next), true);
primary.run("create scheduled query s1_t2 every 5 seconds as repl dump " + primaryDbName + withClause);
replica.run("create scheduled query s2_t2 every 5 seconds as repl load " + primaryDbName + " INTO " + replicatedDbName + withClause);
Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), Base64.getEncoder().encodeToString(primaryDbName.toLowerCase().getBytes(StandardCharsets.UTF_8.name())));
FileSystem fs = FileSystem.get(dumpRoot.toUri(), primary.hiveConf);
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
Path ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2" });
long lastReplId = Long.parseLong(primary.status(replicatedDbName).getOutput().get(0));
DumpMetaData dumpMetaData = new DumpMetaData(ackPath.getParent(), primary.hiveConf);
List<ReplicationMetric> replicationMetrics = MetricCollector.getInstance().getMetrics();
Assert.assertEquals(2, replicationMetrics.size());
// Generate expected metrics
List<ReplicationMetric> expectedReplicationMetrics = new ArrayList<>();
expectedReplicationMetrics.add(generateExpectedMetric("s1_t2", 0, primaryDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateDumpStages(true)));
expectedReplicationMetrics.add(generateExpectedMetric("s2_t2", dumpMetaData.getDumpExecutionId(), replicatedDbName, Metadata.ReplicationType.BOOTSTRAP, ackPath.getParent().toString(), lastReplId, Status.SUCCESS, generateLoadStages(true)));
checkMetrics(expectedReplicationMetrics, replicationMetrics);
// First incremental, after bootstrap
primary.run("use " + primaryDbName).run("insert into t2 values(3)").run("insert into t2 values(4)");
next = Integer.parseInt(ReplDumpWork.getTestInjectDumpDir()) + 1;
ackPath = new Path(dumpRoot, String.valueOf(next) + File.separator + ReplUtils.REPL_HIVE_BASE_DIR + File.separator + ReplAck.LOAD_ACKNOWLEDGEMENT.toString());
waitForAck(fs, ackPath, DEFAULT_PROBE_TIMEOUT);
replica.run("use " + replicatedDbName).run("show tables like 't2'").verifyResult("t2").run("select id from t2 order by id").verifyResults(new String[] { "1", "2", "3", "4" });
} finally {
primary.run("drop scheduled query s1_t2");
replica.run("drop scheduled query s2_t2");
}
}
Aggregations