use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.
the class LoadFunction method createFunctionReplLogTask.
private void createFunctionReplLogTask(List<Task<?>> functionTasks, String functionName) {
ReplStateLogWork replLogWork = new ReplStateLogWork(replLogger, functionName, dumpDirectory, metricCollector);
Task<ReplStateLogWork> replLogTask = TaskFactory.get(replLogWork, context.hiveConf);
DAGTraversal.traverse(functionTasks, new AddDependencyToLeaves(replLogTask));
}
use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.
the class AddDependencyToLeavesTest method shouldNotSkipIntermediateDependencyCollectionTasks.
@Test
public void shouldNotSkipIntermediateDependencyCollectionTasks() {
Task<DependencyCollectionWork> collectionWorkTaskOne = TaskFactory.get(new DependencyCollectionWork());
Task<DependencyCollectionWork> collectionWorkTaskTwo = TaskFactory.get(new DependencyCollectionWork());
Task<DependencyCollectionWork> collectionWorkTaskThree = TaskFactory.get(new DependencyCollectionWork());
@SuppressWarnings("unchecked") Task<?> rootTask = mock(Task.class);
when(rootTask.getDependentTasks()).thenReturn(Arrays.asList(collectionWorkTaskOne, collectionWorkTaskTwo, collectionWorkTaskThree));
@SuppressWarnings("unchecked") List<Task<?>> tasksPostCurrentGraph = Arrays.asList(mock(Task.class), mock(Task.class));
DAGTraversal.traverse(Collections.singletonList(rootTask), new AddDependencyToLeaves(tasksPostCurrentGraph));
List<Task<?>> dependentTasksForOne = collectionWorkTaskOne.getDependentTasks();
List<Task<?>> dependentTasksForTwo = collectionWorkTaskTwo.getDependentTasks();
List<Task<?>> dependentTasksForThree = collectionWorkTaskThree.getDependentTasks();
assertEquals(dependentTasksForOne.size(), 2);
assertEquals(dependentTasksForTwo.size(), 2);
assertEquals(dependentTasksForThree.size(), 2);
assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForOne));
assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForTwo));
assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForThree));
// assertTrue(dependentTasksForOne.iterator().next() instanceof DependencyCollectionTask);
// assertTrue(dependentTasksForTwo.iterator().next() instanceof DependencyCollectionTask);
// assertTrue(dependentTasksForThree.iterator().next() instanceof DependencyCollectionTask);
}
use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.
the class CommitTxnHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
context.log.error("Cannot load transaction events as acid is not enabled");
throw new SemanticException("Cannot load transaction events as acid is not enabled");
}
CommitTxnMessage msg = deserializer.getCommitTxnMessage(context.dmd.getPayload());
int numEntry = (msg.getTables() == null ? 0 : msg.getTables().size());
List<Task<?>> tasks = new ArrayList<>();
String dbName = context.dbName;
String tableNamePrev = null;
String tblName = null;
ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_COMMIT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
if (numEntry > 0) {
context.log.debug("Commit txn handler for txnid " + msg.getTxnId() + " databases : " + msg.getDatabases() + " tables : " + msg.getTables() + " partitions : " + msg.getPartitions() + " files : " + msg.getFilesList() + " write ids : " + msg.getWriteIds());
}
for (int idx = 0; idx < numEntry; idx++) {
String actualTblName = msg.getTables().get(idx);
String actualDBName = msg.getDatabases().get(idx);
String completeName = Table.getCompleteName(actualDBName, actualTblName);
// grouped together in commit txn message.
if (tableNamePrev == null || !(completeName.equals(tableNamePrev))) {
// The data location is created by source, so the location should be formed based on the table name in msg.
Path location = HiveUtils.getDumpPath(new Path(context.location), actualDBName, actualTblName);
tblName = actualTblName;
// for warehouse level dump, use db name from write event
dbName = (context.isDbNameEmpty() ? actualDBName : context.dbName);
Context currentContext = new Context(context, dbName, context.getDumpDirectory(), context.getMetricCollector());
currentContext.setLocation(location.toUri().toString());
// Piggybacking in Import logic for now
TableHandler tableHandler = new TableHandler();
tasks.addAll((tableHandler.handle(currentContext)));
readEntitySet.addAll(tableHandler.readEntities());
writeEntitySet.addAll(tableHandler.writeEntities());
getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
tableNamePrev = completeName;
}
try {
WriteEventInfo writeEventInfo = new WriteEventInfo(msg.getWriteIds().get(idx), dbName, tblName, msg.getFiles(idx));
if (msg.getPartitions().get(idx) != null && !msg.getPartitions().get(idx).isEmpty()) {
writeEventInfo.setPartition(msg.getPartitions().get(idx));
}
work.addWriteEventInfo(writeEventInfo);
} catch (Exception e) {
throw new SemanticException("Failed to extract write event info from commit txn message : " + e.getMessage());
}
}
Task<ReplTxnWork> commitTxnTask = TaskFactory.get(work, context.hiveConf);
// Anyways, if this event gets executed again, it is taken care of.
if (!context.isDbNameEmpty()) {
updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
}
context.log.debug("Added Commit txn task : {}", commitTxnTask.getId());
if (tasks.isEmpty()) {
// will be used for setting the last repl id.
return Collections.singletonList(commitTxnTask);
}
DAGTraversal.traverse(tasks, new AddDependencyToLeaves(commitTxnTask));
return tasks;
}
use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.
the class ReplLoadTask method createReplLoadCompleteAckTask.
private void createReplLoadCompleteAckTask() {
if (!work.hasBootstrapLoadTasks() && (work.isIncrementalLoad() ? !work.incrementalLoadTasksBuilder().hasMoreWork() : true)) {
// All repl load tasks are executed and status is 0, create the task to add the acknowledgement
List<PreAckTask> listOfPreAckTasks = new LinkedList<>();
listOfPreAckTasks.add(new PreAckTask() {
@Override
public void run() throws SemanticException {
try {
HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
long currentNotificationID = metaStoreClient.getCurrentNotificationEventId().getEventId();
Path loadMetadataFilePath = new Path(work.dumpDirectory, LOAD_METADATA.toString());
Utils.writeOutput(String.valueOf(currentNotificationID), loadMetadataFilePath, conf);
LOG.info("Created LOAD Metadata file : {} with NotificationID : {}", loadMetadataFilePath, currentNotificationID);
} catch (TException ex) {
throw new SemanticException(ex);
}
}
});
if (work.shouldFailover()) {
listOfPreAckTasks.add(new PreAckTask() {
@Override
public void run() throws SemanticException {
try {
Database db = getHive().getDatabase(work.getTargetDatabase());
if (MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET)) {
return;
}
Map<String, String> params = db.getParameters();
if (params == null) {
params = new HashMap<>();
db.setParameters(params);
}
LOG.info("Setting failover endpoint:{} to TARGET for database: {}", ReplConst.REPL_FAILOVER_ENDPOINT, db.getName());
params.put(ReplConst.REPL_FAILOVER_ENDPOINT, MetaStoreUtils.FailoverEndpoint.TARGET.toString());
getHive().alterDatabase(work.getTargetDatabase(), db);
} catch (HiveException e) {
throw new SemanticException(e);
}
}
});
}
if (work.isSecondFailover) {
// If it is the second load of optimised bootstrap that means this is the end of the cycle, add tasks to sort
// out the database properties.
listOfPreAckTasks.add(new PreAckTask() {
@Override
public void run() throws SemanticException {
try {
Hive hiveDb = getHive();
Database db = hiveDb.getDatabase(work.getTargetDatabase());
LinkedHashMap<String, String> params = new LinkedHashMap<>(db.getParameters());
LOG.debug("Database {} properties before removal {}", work.getTargetDatabase(), params);
params.remove(SOURCE_OF_REPLICATION);
db.setParameters(params);
LOG.info("Removed {} property from database {} after successful optimised bootstrap load.", SOURCE_OF_REPLICATION, work.getTargetDatabase());
hiveDb.alterDatabase(work.getTargetDatabase(), db);
LOG.debug("Database {} poperties after removal {}", work.getTargetDatabase(), params);
} catch (HiveException e) {
throw new SemanticException(e);
}
}
});
}
AckWork replLoadAckWork = new AckWork(new Path(work.dumpDirectory, LOAD_ACKNOWLEDGEMENT.toString()), work.getMetricCollector(), listOfPreAckTasks);
Task<AckWork> loadAckWorkTask = TaskFactory.get(replLoadAckWork, conf);
if (childTasks.isEmpty()) {
childTasks.add(loadAckWorkTask);
} else {
DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(Collections.singletonList(loadAckWorkTask)));
}
}
}
use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.
the class ReplLoadTask method updateDatabaseLastReplID.
/**
* There was a database update done before and we want to make sure we update the last repl
* id on this database as we are now going to switch to processing a new database.
* This has to be last task in the graph since if there are intermediate tasks and the last.repl.id
* is a root level task then in the execution phase the root level tasks will get executed first,
* however if any of the child tasks of the bootstrap load failed then even though the bootstrap has failed
* the last repl status of the target database will return a valid value, which will not represent
* the state of the database.
*/
private TaskTracker updateDatabaseLastReplID(int maxTasks, Context context, Scope scope) throws SemanticException {
/*
we don't want to put any limits on this task as this is essential before we start
processing new database events.
*/
TaskTracker taskTracker = new AlterDatabase(context, work.databaseEvent(context.hiveConf), work.dbNameToLoadIn, new TaskTracker(maxTasks), work.getMetricCollector()).tasks();
AddDependencyToLeaves function = new AddDependencyToLeaves(taskTracker.tasks());
DAGTraversal.traverse(scope.rootTasks, function);
return taskTracker;
}
Aggregations