use of org.apache.hadoop.hive.ql.io.merge.MergeFileTask in project hive by apache.
the class AlterTableConcatenateOperation method getTask.
private Task<?> getTask(MergeFileWork mergeWork) {
if (context.getConf().getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
TezWork tezWork = new TezWork(context.getQueryState().getQueryId(), context.getConf());
mergeWork.setName("File Merge");
tezWork.add(mergeWork);
Task<?> task = new TezTask();
((TezTask) task).setWork(tezWork);
return task;
} else {
Task<?> task = new MergeFileTask();
((MergeFileTask) task).setWork(mergeWork);
return task;
}
}
use of org.apache.hadoop.hive.ql.io.merge.MergeFileTask in project hive by apache.
the class DDLTask method mergeFiles.
/**
* First, make sure the source table/partition is not
* archived/indexes/non-rcfile. If either of these is true, throw an
* exception.
*
* The way how it does the merge is to create a BlockMergeTask from the
* mergeFilesDesc.
*
* @param db
* @param mergeFilesDesc
* @return
* @throws HiveException
*/
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName(), mergeFilesDesc.getTableDesc());
LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
ArrayList<String> inputDirstr = new ArrayList<String>(1);
inputDirstr.add(mergeFilesDesc.getInputDir().toString());
pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
final FileMergeDesc fmd;
if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
fmd = new RCFileMergeDesc();
} else {
// safe to assume else is ORC as semantic analyzer will check for RC/ORC
fmd = new OrcFileMergeDesc();
}
fmd.setDpCtx(null);
fmd.setHasDynamicPartitions(false);
fmd.setListBucketingAlterTableConcatenate(lbatc);
fmd.setListBucketingDepth(lbd);
fmd.setOutputPath(mergeFilesDesc.getOutputDir());
CompilationOpContext opContext = driverContext.getCtx().getOpContext();
Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
DriverContext driverCxt = new DriverContext();
Task<?> task;
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
TezWork tezWork = new TezWork(queryState.getQueryId(), conf);
mergeWork.setName("File Merge");
tezWork.add(mergeWork);
task = new TezTask();
((TezTask) task).setWork(tezWork);
} else {
task = new MergeFileTask();
((MergeFileTask) task).setWork(mergeWork);
}
// initialize the task and execute
task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
subtask = task;
int ret = task.execute(driverCxt);
if (subtask.getException() != null) {
setException(subtask.getException());
}
return ret;
}
use of org.apache.hadoop.hive.ql.io.merge.MergeFileTask in project hive by apache.
the class MoveTask method inferTaskInformation.
private void inferTaskInformation(TaskInformation ti) {
// (Either standard, local, or a merge)
while (ti.task.getParentTasks() != null && ti.task.getParentTasks().size() == 1) {
ti.task = (Task) ti.task.getParentTasks().get(0);
// If it was a merge task or a local map reduce task, nothing can be inferred
if (ti.task instanceof MergeFileTask || ti.task instanceof MapredLocalTask) {
break;
}
// the directory this move task is moving
if (ti.task instanceof MapRedTask) {
MapredWork work = (MapredWork) ti.task.getWork();
MapWork mapWork = work.getMapWork();
ti.bucketCols = mapWork.getBucketedColsByDirectory().get(ti.path);
ti.sortCols = mapWork.getSortedColsByDirectory().get(ti.path);
if (work.getReduceWork() != null) {
ti.numBuckets = work.getReduceWork().getNumReduceTasks();
}
if (ti.bucketCols != null || ti.sortCols != null) {
// operator that writes the final output)
assert work.isFinalMapRed();
}
break;
}
// condition for merging is not met, see GenMRFileSink1.
if (ti.task instanceof MoveTask) {
MoveTask mt = (MoveTask) ti.task;
if (mt.getWork().getLoadFileWork() != null) {
ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString();
}
}
}
}
Aggregations