use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* @param fsInput The FileSink operator.
* @param finalName the final destination path the merge job should output.
* @param dependencyTask
* @param mvTasks
* @param conf
* @param currTask
* @param lineageState
* @throws SemanticException
*
* create a Map-only merge job using CombineHiveInputFormat for all partitions with
* following operators:
* MR job J0:
* ...
* |
* v
* FileSinkOperator_1 (fsInput)
* |
* v
* Merge job J1:
* |
* v
* TableScan (using CombineHiveInputFormat) (tsMerge)
* |
* v
* FileSinkOperator (fsMerge)
*
* Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
* do
* not contain the dynamic partitions (their parent). So after the dynamic partitions are
* created (after the first job finished before the moveTask or ConditionalTask start),
* we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
* partition
* directories.
*/
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<? extends Serializable> currTask, LineageState lineageState) throws SemanticException {
//
// 1. create the operator tree
//
FileSinkDesc fsInputDesc = fsInput.getConf();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Creating merge work from " + System.identityHashCode(fsInput) + " with write ID " + (fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null) + " into " + finalName);
}
boolean isBlockMerge = (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class));
RowSchema inputRS = fsInput.getSchema();
Long srcMmWriteId = fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null;
FileSinkDesc fsOutputDesc = null;
TableScanOperator tsMerge = null;
if (!isBlockMerge) {
// Create a TableScan operator
tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
Path mergeDest = srcMmWriteId == null ? finalName : finalName.getParent();
fsOutputDesc = new FileSinkDesc(mergeDest, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
fsOutputDesc.setMmWriteId(srcMmWriteId);
fsOutputDesc.setIsMerge(true);
// Create and attach the filesink for the merge.
OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
}
// If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
// needs to include the partition column, and the fsOutput should have
// a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// adding DP ColumnInfo to the RowSchema signature
ArrayList<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
for (String dpCol : dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
true);
signature.add(colInfo);
}
inputRS.setSignature(signature);
if (!isBlockMerge) {
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
fsOutputDesc.setDynPartCtx(dpCtx2);
}
// update the FileSinkOperator to include partition columns
usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames());
} else {
// non-partitioned table
fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
}
//
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
Path inputDirName = fsInputDesc.getMergeInputDirName();
MapWork cplan;
Serializable work;
if (isBlockMerge) {
cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = cplan;
}
} else {
cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork) work).setMapWork(cplan);
}
}
// use CombineHiveInputFormat for map-only merging
cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
// NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
// know if merge MR2 will be triggered at execution time
MoveWork dummyMv = null;
if (srcMmWriteId == null) {
// Only create the movework for non-MM table. No action needed for a MM table.
dummyMv = new MoveWork(null, null, null, new LoadFileDesc(inputDirName, finalName, true, null, null, false), false);
}
// Use the original fsOp path here in case of MM - while the new FSOP merges files inside the
// MM directory, the original MoveTask still commits based on the parent. Note that this path
// can only be triggered for a merge that's part of insert for now; MM tables do not support
// concatenate. Keeping the old logic for non-MM tables with temp directories and stuff.
Path fsopPath = srcMmWriteId != null ? fsInputDesc.getFinalDirName() : finalName;
Task<MoveWork> mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsopPath, fsInputDesc.isMmTable());
ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getMergeInputDirName(), finalName, mvTask, dependencyTask, lineageState);
// keep the dynamic partition context in conditional task resolver context
ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class GenMapRedUtils method createCondTask.
/**
* Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
*
* @param conf
* HiveConf
* @param currTask
* current leaf task
* @param dummyMoveWork
* MoveWork for the move task
* @param mergeWork
* MapredWork for the merge task.
* @param condInputPath
* the input directory of the merge/move task
* @param condOutputPath
* the output directory of the merge/move task
* @param moveTaskToLink
* a MoveTask that may be linked to the conditional sub-tasks
* @param dependencyTask
* a dependency task that may be linked to the conditional sub-tasks
* @param lineageState
* to track activity
* @return The conditional task
*/
@SuppressWarnings("unchecked")
private static ConditionalTask createCondTask(HiveConf conf, Task<? extends Serializable> currTask, MoveWork mvWork, Serializable mergeWork, Path condInputPath, Path condOutputPath, Task<MoveWork> moveTaskToLink, DependencyCollectionTask dependencyTask, LineageState lineageState) {
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Creating conditional merge task for " + condInputPath);
}
// Create a dummy task if no move is needed.
Serializable moveWork = mvWork != null ? mvWork : new DependencyCollectionWork();
// Note: this should never happen for mm tables.
boolean shouldMergeMovePaths = (moveTaskToLink != null && dependencyTask == null && shouldMergeMovePaths(conf, condInputPath, condOutputPath, moveTaskToLink.getWork()));
Serializable workForMoveOnlyTask = moveWork;
if (shouldMergeMovePaths) {
workForMoveOnlyTask = mergeMovePaths(condInputPath, moveTaskToLink.getWork(), lineageState);
}
// There are 3 options for this ConditionalTask:
// 1) Merge the partitions
// 2) Move the partitions (i.e. don't merge the partitions)
// 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't
// merge others) in this case the merge is done first followed by the move to prevent
// conflicts.
// TODO: if we are not dealing with concatenate DDL, we should not create a merge+move path
// because it should be impossible to get incompatible outputs.
Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork);
Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(workForMoveOnlyTask);
Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork);
Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(moveWork);
// NOTE! It is necessary merge task is the parent of the move task, and not
// the other way around, for the proper execution of the execute method of
// ConditionalTask
mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask);
List<Serializable> listWorks = new ArrayList<Serializable>();
listWorks.add(workForMoveOnlyTask);
listWorks.add(mergeWork);
ConditionalWork cndWork = new ConditionalWork(listWorks);
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
listTasks.add(moveOnlyMoveTask);
listTasks.add(mergeOnlyMergeTask);
listTasks.add(mergeAndMoveMergeTask);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
// create resolver
cndTsk.setResolver(new ConditionalResolverMergeFiles());
ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, condInputPath.toString());
cndTsk.setResolverCtx(mrCtx);
// make the conditional task as the child of the current leaf task
currTask.addDependentTask(cndTsk);
if (shouldMergeMovePaths) {
// If a new MoveWork was created, then we should link all dependent tasks from the MoveWork to link.
if (moveTaskToLink.getDependentTasks() != null) {
for (Task dependentTask : moveTaskToLink.getDependentTasks()) {
moveOnlyMoveTask.addDependentTask(dependentTask);
}
}
} else {
addDependentMoveTasks(moveTaskToLink, conf, moveOnlyMoveTask, dependencyTask);
}
addDependentMoveTasks(moveTaskToLink, conf, mergeOnlyMergeTask, dependencyTask);
addDependentMoveTasks(moveTaskToLink, conf, mergeAndMoveMoveTask, dependencyTask);
return cndTsk;
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class Executor method launchTask.
private TaskRunner launchTask(Task<?> task, boolean noName, String jobName, int jobCount) throws HiveException {
SessionState.get().getHiveHistory().startTask(driverContext.getQueryId(), task, task.getClass().getName());
if (task.isMapRedTask() && !(task instanceof ConditionalTask)) {
if (noName) {
driverContext.getConf().set(MRJobConfig.JOB_NAME, jobName + " (" + task.getId() + ")");
}
taskQueue.incCurJobNo(1);
CONSOLE.printInfo("Launching Job " + taskQueue.getCurJobNo() + " out of " + jobCount);
}
task.initialize(driverContext.getQueryState(), driverContext.getPlan(), taskQueue, context);
TaskRunner taskRun = new TaskRunner(task, taskQueue);
taskQueue.launching(taskRun);
if (HiveConf.getBoolVar(task.getConf(), HiveConf.ConfVars.EXECPARALLEL) && task.canExecuteInParallel()) {
LOG.info("Starting task [" + task + "] in parallel");
taskRun.start();
} else {
LOG.info("Starting task [" + task + "] in serial mode");
taskRun.runSequential();
}
return taskRun;
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class QueryPlan method populateQueryPlan.
/**
* Populate api.QueryPlan from exec structures. This includes constructing the
* dependency graphs of stages and operators.
*
* @throws IOException
*/
private void populateQueryPlan() throws IOException {
query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
query.getStageGraph().setNodeType(NodeType.STAGE);
Queue<Task<?>> tasksToVisit = new LinkedList<Task<?>>();
Set<Task<?>> tasksVisited = new HashSet<Task<?>>();
tasksToVisit.addAll(rootTasks);
while (tasksToVisit.size() != 0) {
Task<?> task = tasksToVisit.remove();
tasksVisited.add(task);
// populate stage
org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
stage.setStageId(task.getId());
stage.setStageType(task.getType());
query.addToStageList(stage);
if (task instanceof ExecDriver) {
// populate map task
ExecDriver mrTask = (ExecDriver) task;
org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
mapTask.setTaskId(stage.getStageId() + "_MAP");
mapTask.setTaskType(TaskType.MAP);
stage.addToTaskList(mapTask);
populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
// populate reduce task
if (mrTask.hasReduce()) {
org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
reduceTask.setTaskType(TaskType.REDUCE);
stage.addToTaskList(reduceTask);
Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
populateOperatorGraph(reduceTask, reducerTopOps);
}
} else {
org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
otherTask.setTaskId(stage.getStageId() + "_OTHER");
otherTask.setTaskType(TaskType.OTHER);
stage.addToTaskList(otherTask);
}
if (task instanceof ConditionalTask) {
org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
listEntry.setNode(task.getId());
ConditionalTask t = (ConditionalTask) task;
for (Task<?> listTask : t.getListTasks()) {
if (t.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
childEntry.setNode(listTask.getId());
// done processing the task
for (Task<?> childTask : t.getChildTasks()) {
childEntry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(childEntry);
}
listEntry.addToChildren(listTask.getId());
if (!tasksVisited.contains(listTask)) {
tasksToVisit.add(listTask);
}
}
query.getStageGraph().addToAdjacencyList(listEntry);
} else if (task.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
entry.setNode(task.getId());
// done processing the task
for (Task<?> childTask : task.getChildTasks()) {
entry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(entry);
}
}
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* @param fsInput The FileSink operator.
* @param finalName the final destination path the merge job should output.
* @param dependencyTask
* @param mvTasks
* @param conf
* @param currTask
* @param lineageState
* @throws SemanticException
*
* create a Map-only merge job using CombineHiveInputFormat for all partitions with
* following operators:
* MR job J0:
* ...
* |
* v
* FileSinkOperator_1 (fsInput)
* |
* v
* Merge job J1:
* |
* v
* TableScan (using CombineHiveInputFormat) (tsMerge)
* |
* v
* FileSinkOperator (fsMerge)
*
* Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
* do
* not contain the dynamic partitions (their parent). So after the dynamic partitions are
* created (after the first job finished before the moveTask or ConditionalTask start),
* we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
* partition
* directories.
*/
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<?> currTask, LineageState lineageState) throws SemanticException {
//
// 1. create the operator tree
//
FileSinkDesc fsInputDesc = fsInput.getConf();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Creating merge work from " + System.identityHashCode(fsInput) + " with write ID " + (fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null) + " into " + finalName);
}
boolean isBlockMerge = (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class));
RowSchema inputRS = fsInput.getSchema();
Long srcMmWriteId = fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null;
FileSinkDesc fsOutputDesc = null;
TableScanOperator tsMerge = null;
if (!isBlockMerge) {
// Create a TableScan operator
tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
Path mergeDest = srcMmWriteId == null ? finalName : finalName.getParent();
fsOutputDesc = new FileSinkDesc(mergeDest, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
fsOutputDesc.setMmWriteId(srcMmWriteId);
fsOutputDesc.setIsMerge(true);
// Create and attach the filesink for the merge.
OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
}
// If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
// needs to include the partition column, and the fsOutput should have
// a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// adding DP ColumnInfo to the RowSchema signature
List<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
for (String dpCol : dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
true);
signature.add(colInfo);
}
inputRS.setSignature(signature);
if (!isBlockMerge) {
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
fsOutputDesc.setDynPartCtx(dpCtx2);
}
// update the FileSinkOperator to include partition columns
usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), fsInputDesc.getTable(), dpCtx.getDPColNames());
} else {
// non-partitioned table
fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
}
//
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
Path inputDirName = fsInputDesc.getMergeInputDirName();
MapWork cplan;
Serializable work;
if (isBlockMerge) {
cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = cplan;
}
} else {
cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork) work).setMapWork(cplan);
}
}
// use CombineHiveInputFormat for map-only merging
cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
// NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
// know if merge MR2 will be triggered at execution time
MoveWork dummyMv = null;
if (srcMmWriteId == null) {
// Only create the movework for non-MM table. No action needed for a MM table.
dummyMv = new MoveWork(null, null, null, new LoadFileDesc(inputDirName, finalName, true, null, null, false), false);
}
// Use the original fsOp path here in case of MM - while the new FSOP merges files inside the
// MM directory, the original MoveTask still commits based on the parent. Note that this path
// can only be triggered for a merge that's part of insert for now; MM tables do not support
// concatenate. Keeping the old logic for non-MM tables with temp directories and stuff.
Path fsopPath = srcMmWriteId != null ? fsInputDesc.getFinalDirName() : finalName;
Task<MoveWork> mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsopPath, fsInputDesc.isMmTable(), fsInputDesc.isDirectInsert(), fsInputDesc.getMoveTaskId(), fsInputDesc.getAcidOperation());
ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getMergeInputDirName(), finalName, mvTask, dependencyTask, lineageState);
// keep the dynamic partition context in conditional task resolver context
ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
Aggregations