use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.
the class ReplDumpWork method managedTableCopyTasks.
public List<Task<?>> managedTableCopyTasks(TaskTracker tracker, HiveConf conf) throws IOException {
if (conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_SKIP_IMMUTABLE_DATA_COPY)) {
return Collections.emptyList();
}
List<Task<?>> tasks = new ArrayList<>();
Retryable retryable = Retryable.builder().withHiveConf(conf).withRetryOnException(UncheckedIOException.class).build();
try {
retryable.executeCallable((Callable<Void>) () -> {
try {
int numEntriesToSkip = tasks == null ? 0 : tasks.size();
while (managedTblCopyPathIterator.hasNext() && tracker.canAddMoreTasks()) {
if (numEntriesToSkip > 0) {
// skip tasks added in previous attempts of this retryable block
managedTblCopyPathIterator.next();
numEntriesToSkip--;
continue;
}
ReplicationSpec replSpec = new ReplicationSpec();
replSpec.setIsReplace(true);
replSpec.setInReplicationScope(true);
EximUtil.DataCopyPath managedTableCopyPath = new EximUtil.DataCopyPath(replSpec);
managedTableCopyPath.loadFromString(managedTblCopyPathIterator.next());
// If its incremental, in checkpointing case, dump dir may exist. We will delete the event dir.
// In case of bootstrap checkpointing we will not delete the entire dir and just do a sync
Task<?> copyTask = ReplCopyTask.getDumpCopyTask(managedTableCopyPath.getReplicationSpec(), managedTableCopyPath.getSrcPath(), managedTableCopyPath.getTargetPath(), conf, false, shouldOverwrite, !isBootstrap(), getCurrentDumpPath().toString(), getMetricCollector());
tasks.add(copyTask);
tracker.addTask(copyTask);
LOG.debug("added task for {}", managedTableCopyPath);
}
} catch (UncheckedIOException e) {
LOG.error("Reading entry for data copy failed for managed tables, attempting retry.", e);
throw e;
}
return null;
});
} catch (Exception e) {
throw new IOException(ErrorMsg.REPL_RETRY_EXHAUSTED.format(e.getMessage()));
}
return tasks;
}
use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.
the class ReplLoadTask method executeIncrementalLoad.
private int executeIncrementalLoad(long loadStartTime) throws Exception {
// that are excluded in the new replication policy.
if (work.replScopeModified) {
dropTablesExcludedInReplScope(work.currentReplScope);
}
Database targetDb = getHive().getDatabase(work.dbNameToLoadIn);
Map<String, String> props = new HashMap<>();
// Check if it is a optimised bootstrap failover.
if (work.isFirstFailover) {
// Check it should be marked as target of replication & not source of replication.
if (MetaStoreUtils.isTargetOfReplication(targetDb)) {
LOG.error("The database {} is already marked as target for replication", targetDb.getName());
throw new Exception("Failover target is already marked as target");
}
if (!ReplChangeManager.isSourceOfReplication(targetDb)) {
LOG.error("The database {} is already source of replication.", targetDb.getName());
throw new Exception("Failover target was not source of replication");
}
boolean isTableDiffPresent = checkFileExists(new Path(work.dumpDirectory).getParent(), conf, TABLE_DIFF_COMPLETE_DIRECTORY);
Long eventId = Long.parseLong(getEventIdFromFile(new Path(work.dumpDirectory).getParent(), conf)[0]);
if (!isTableDiffPresent) {
prepareTableDiffFile(eventId, getHive(), work, conf);
if (this.childTasks == null) {
this.childTasks = new ArrayList<>();
}
createReplLoadCompleteAckTask();
return 0;
}
} else if (work.isSecondFailover) {
// DROP the tables to be bootstrapped.
Hive db = getHive();
for (String table : work.tablesToBootstrap) {
db.dropTable(work.dbNameToLoadIn + "." + table, true);
}
}
if (!MetaStoreUtils.isTargetOfReplication(targetDb)) {
props.put(ReplConst.TARGET_OF_REPLICATION, ReplConst.TRUE);
}
if (!work.shouldFailover() && MetaStoreUtils.isDbBeingFailedOver(targetDb)) {
props.put(ReplConst.REPL_FAILOVER_ENDPOINT, "");
}
if (!props.isEmpty()) {
AlterDatabaseSetPropertiesDesc setTargetDesc = new AlterDatabaseSetPropertiesDesc(work.dbNameToLoadIn, props, null);
Task<?> addReplTargetPropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), setTargetDesc, true, work.dumpDirectory, work.getMetricCollector()), conf);
if (this.childTasks == null) {
this.childTasks = new ArrayList<>();
}
this.childTasks.add(addReplTargetPropTask);
}
IncrementalLoadTasksBuilder builder = work.incrementalLoadTasksBuilder();
// If incremental events are already applied, then check and perform if need to bootstrap any tables.
if (!builder.hasMoreWork() && work.isLastReplIDUpdated()) {
if (work.hasBootstrapLoadTasks()) {
LOG.debug("Current incremental dump have tables to be bootstrapped. Switching to bootstrap " + "mode after applying all events.");
return executeBootStrapLoad();
}
}
List<Task<?>> childTasks = new ArrayList<>();
int maxTasks = conf.getIntVar(HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS);
TaskTracker tracker = new TaskTracker(maxTasks);
addLazyDataCopyTask(tracker, builder.getReplLogger());
childTasks.add(builder.build(context, getHive(), LOG, tracker));
// incremental cycle won't consider the events in this dump again if it starts from this id.
if (!builder.hasMoreWork()) {
// The name of the database to be loaded into is either specified directly in REPL LOAD
// command i.e. when dbNameToLoadIn has a valid dbname or is available through dump
// metadata during table level replication.
String dbName = work.dbNameToLoadIn;
if (dbName == null || StringUtils.isBlank(dbName)) {
if (work.currentReplScope != null) {
String replScopeDbName = work.currentReplScope.getDbName();
if (replScopeDbName != null && !"*".equals(replScopeDbName)) {
dbName = replScopeDbName;
}
}
}
// update repl id in all those databases.
if (StringUtils.isNotBlank(dbName)) {
String lastEventid = builder.eventTo().toString();
Map<String, String> mapProp = new HashMap<>();
mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID_SOURCE.toString(), lastEventid);
AlterDatabaseSetPropertiesDesc alterDbDesc = new AlterDatabaseSetPropertiesDesc(dbName, mapProp, new ReplicationSpec(lastEventid, lastEventid));
Task<?> updateReplIdTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), alterDbDesc, true, (new Path(work.dumpDirectory).getParent()).toString(), work.getMetricCollector()), conf);
DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(updateReplIdTask));
work.setLastReplIDUpdated(true);
LOG.debug("Added task to set last repl id of db " + dbName + " to " + lastEventid);
}
}
// Once all the incremental events are applied, enable bootstrap of tables if exist.
if (builder.hasMoreWork() || work.hasBootstrapLoadTasks()) {
DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(TaskFactory.get(work, conf)));
}
if (this.childTasks == null) {
this.childTasks = new ArrayList<>();
}
this.childTasks.addAll(childTasks);
createReplLoadCompleteAckTask();
// Clean-up snapshots
if (conf.getBoolVar(REPL_SNAPSHOT_DIFF_FOR_EXTERNAL_TABLE_COPY)) {
cleanupSnapshots(new Path(work.getDumpDirectory()).getParent().getParent().getParent(), work.getSourceDbName().toLowerCase(), conf, null, true);
}
// pass the current time at the end of repl-load stage as the starting time of the first event.
long currentTimestamp = System.currentTimeMillis();
((IncrementalLoadLogger) work.incrementalLoadTasksBuilder().getReplLogger()).initiateEventTimestamp(currentTimestamp);
LOG.info("REPL_INCREMENTAL_LOAD stage duration : {} ms", currentTimestamp - loadStartTime);
return 0;
}
use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.
the class ReplDumpTask method getNewEventOnlyReplicationSpec.
private ReplicationSpec getNewEventOnlyReplicationSpec(Long eventId) {
ReplicationSpec rspec = getNewReplicationSpec(eventId.toString(), eventId.toString(), conf.getBoolean(REPL_DUMP_METADATA_ONLY.varname, false));
rspec.setReplSpecType(ReplicationSpec.Type.INCREMENTAL_DUMP);
return rspec;
}
use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.
the class IncrementalLoadTasksBuilder method dbUpdateReplStateTask.
private Task<?> dbUpdateReplStateTask(String dbName, String replState, Task<?> preCursor) {
HashMap<String, String> mapProp = new HashMap<>();
mapProp.put(ReplicationSpec.KEY.CURR_STATE_ID_SOURCE.toString(), replState);
AlterDatabaseSetPropertiesDesc alterDbDesc = new AlterDatabaseSetPropertiesDesc(dbName, mapProp, new ReplicationSpec(replState, replState));
Task<?> updateReplIdTask = TaskFactory.get(new DDLWork(inputs, outputs, alterDbDesc, true, dumpDirectory, metricCollector), conf);
// Link the update repl state task with dependency collection task
if (preCursor != null) {
preCursor.addDependentTask(updateReplIdTask);
log.debug("Added {}:{} as a precursor of {}:{}", preCursor.getClass(), preCursor.getId(), updateReplIdTask.getClass(), updateReplIdTask.getId());
}
return updateReplIdTask;
}
use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.
the class InsertHandler method handle.
@Override
public List<Task<?>> handle(Context withinContext) throws SemanticException {
try {
FileSystem fs = FileSystem.get(new Path(withinContext.location).toUri(), withinContext.hiveConf);
MetaData metaData = EximUtil.readMetaData(fs, new Path(withinContext.location, EximUtil.METADATA_NAME));
ReplicationSpec replicationSpec = metaData.getReplicationSpec();
if (replicationSpec.isNoop()) {
return Collections.emptyList();
}
} catch (Exception e) {
LOG.error("failed to load insert event", e);
throw new SemanticException(e);
}
InsertMessage insertMessage = deserializer.getInsertMessage(withinContext.dmd.getPayload());
String actualDbName = withinContext.isDbNameEmpty() ? insertMessage.getDB() : withinContext.dbName;
Context currentContext = new Context(withinContext, actualDbName, withinContext.getDumpDirectory(), withinContext.getMetricCollector());
// Piggybacking in Import logic for now
TableHandler tableHandler = new TableHandler();
List<Task<?>> tasks = tableHandler.handle(currentContext);
readEntitySet.addAll(tableHandler.readEntities());
writeEntitySet.addAll(tableHandler.writeEntities());
getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
return tasks;
}
Aggregations