use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class MapReduceCompiler method setInputFormat.
// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof ExecDriver) {
MapWork work = ((MapredWork) task.getWork()).getMapWork();
HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(work, op);
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class SerializationUtilities method clonePlan.
/**
* Clones using the powers of XML. Do not use unless necessary.
* @param plan The plan.
* @return The clone.
*/
public static MapredWork clonePlan(MapredWork plan) {
// TODO: need proper clone. Meanwhile, let's at least keep this horror in one place
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
Operator<?> op = plan.getAnyOperator();
CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext();
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
serializePlan(plan, baos, true);
MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true);
// Restore the context.
for (Operator<?> newOp : newPlan.getAllOperators()) {
newOp.setCompilationOpContext(ctx);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN);
return newPlan;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMRFileSink1 method process.
/**
* File Sink Operator encountered.
*
* @param nd
* the file sink operator encountered
* @param opProcCtx
* context
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
ParseContext parseCtx = ctx.getParseCtx();
boolean chDir = false;
Task<? extends Serializable> currTask = ctx.getCurrTask();
ctx.addRootIfPossible(currTask);
FileSinkOperator fsOp = (FileSinkOperator) nd;
// is INSERT OVERWRITE TABLE
boolean isInsertTable = GenMapRedUtils.isInsertInto(parseCtx, fsOp);
HiveConf hconf = parseCtx.getConf();
// Mark this task as a final map reduce task (ignoring the optional merge task)
((MapredWork) currTask.getWork()).setFinalMapRed(true);
// If this file sink desc has been processed due to a linked file sink desc,
// use that task
Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks();
if (fileSinkDescs != null) {
Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf());
processLinkedFileDesc(ctx, childTask);
return true;
}
// So, no need to attempt to merge the files again.
if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {
chDir = GenMapRedUtils.isMergeRequired(ctx.getMvTask(), hconf, fsOp, currTask, isInsertTable);
}
Path finalName = processFS(fsOp, stack, opProcCtx, chDir);
if (chDir) {
// Merge the files in the destination table/partitions by creating Map-only merge job
// If underlying data is RCFile or OrcFile, RCFileBlockMerge task or
// OrcFileStripeMerge task would be created.
LOG.info("using CombineHiveInputformat for the merge job");
GenMapRedUtils.createMRWorkForMergingFiles(fsOp, finalName, ctx.getDependencyTaskForMultiInsert(), ctx.getMvTask(), hconf, currTask);
}
FileSinkDesc fileSinkDesc = fsOp.getConf();
if (fileSinkDesc.isLinkedFileSink()) {
Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks = ctx.getLinkedFileDescTasks();
if (linkedFileDescTasks == null) {
linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();
ctx.setLinkedFileDescTasks(linkedFileDescTasks);
}
// The child tasks may be null in case of a select
if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) {
for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
}
}
}
FetchTask fetchTask = parseCtx.getFetchTask();
if (fetchTask != null && currTask.getNumChild() == 0) {
if (fetchTask.isFetchFrom(fileSinkDesc)) {
currTask.setFetchSource(true);
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMRRedSink1 method process.
/**
* Reduce Sink encountered.
* a) If we are seeing this RS for first time, we initialize plan corresponding to this RS.
* b) If we are seeing this RS for second or later time then either query had a join in which
* case we will merge this plan with earlier plan involving this RS or plan for this RS
* needs to be split in two branches.
*
* @param nd
* the reduce sink operator encountered
* @param opProcCtx
* context
*/
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
ReduceSinkOperator op = (ReduceSinkOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2));
Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork currPlan = (MapredWork) currTask.getWork();
String currAliasId = mapredCtx.getCurrAliasId();
if (op.getNumChild() != 1) {
throw new IllegalStateException("Expecting operator " + op + " to have one child. " + "But found multiple children : " + op.getChildOperators());
}
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
Task<? extends Serializable> oldTask = ctx.getOpTaskMap().get(reducer);
ctx.setCurrAliasId(currAliasId);
ctx.setCurrTask(currTask);
// If the plan for this reducer does not exist, initialize the plan
if (oldTask == null) {
if (currPlan.getReduceWork() == null) {
GenMapRedUtils.initPlan(op, ctx);
} else {
GenMapRedUtils.splitPlan(op, ctx);
}
} else {
// This will happen in case of joins. The current plan can be thrown away
// after being merged with the original plan
GenMapRedUtils.joinPlan(currTask, oldTask, ctx);
currTask = oldTask;
ctx.setCurrTask(currTask);
}
mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId()));
if (GenMapRedUtils.hasBranchFinished(nodeOutputs)) {
ctx.addRootIfPossible(currTask);
return false;
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.
the class GenMRRedSink3 method process.
/**
* Reduce Scan encountered.
*
* @param nd
* the reduce sink operator encountered
* @param opProcCtx
* context
*/
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
ReduceSinkOperator op = (ReduceSinkOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
// union consisted on a bunch of map-reduce jobs, and it has been split at
// the union
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
UnionOperator union = Utils.findNode(stack, UnionOperator.class);
assert union != null;
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(union);
Task<? extends Serializable> unionTask = null;
if (mapredCtx != null) {
unionTask = mapredCtx.getCurrTask();
} else {
unionTask = ctx.getCurrTask();
}
MapredWork plan = (MapredWork) unionTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
Task<? extends Serializable> reducerTask = opTaskMap.get(reducer);
ctx.setCurrTask(unionTask);
// If the plan for this reducer does not exist, initialize the plan
if (reducerTask == null) {
// When the reducer is encountered for the first time
if (plan.getReduceWork() == null) {
GenMapRedUtils.initUnionPlan(op, union, ctx, unionTask);
// When union is followed by a multi-table insert
} else {
GenMapRedUtils.splitPlan(op, ctx);
}
} else if (plan.getReduceWork() != null && plan.getReduceWork().getReducer() == reducer) {
// The union is already initialized. However, the union is walked from
// another input
// initUnionPlan is idempotent
GenMapRedUtils.initUnionPlan(op, union, ctx, unionTask);
} else {
GenMapRedUtils.joinUnionPlan(ctx, union, unionTask, reducerTask, false);
ctx.setCurrTask(reducerTask);
}
mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId()));
// the union operator has been processed
ctx.setCurrUnionOp(null);
return true;
}
Aggregations