use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenMapRedUtils method initPlan.
/**
* Initialize the current plan by adding it to root tasks.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork plan = (MapredWork) currTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
opTaskMap.put(reducer, currTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
assert currTopOp != null;
String currAliasId = opProcCtx.getCurrAliasId();
if (!opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx);
}
currTopOp = null;
currAliasId = null;
opProcCtx.setCurrTask(currTask);
opProcCtx.setCurrTopOp(currTopOp);
opProcCtx.setCurrAliasId(currAliasId);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenMapRedUtils method splitPlan.
/**
* Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case
* Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP
*
* @param cRS
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException {
// Generate a new task
ParseContext parseCtx = opProcCtx.getParseCtx();
Task<? extends Serializable> parentTask = opProcCtx.getCurrTask();
MapredWork childPlan = getMapRedWork(parseCtx);
Task<? extends Serializable> childTask = TaskFactory.get(childPlan);
Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0);
// Add the reducer
ReduceWork rWork = new ReduceWork();
childPlan.setReduceWork(rWork);
rWork.setReducer(reducer);
ReduceSinkDesc desc = cRS.getConf();
childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers()));
opProcCtx.getOpTaskMap().put(reducer, childTask);
splitTasks(cRS, parentTask, childTask, opProcCtx);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenMapRedUtils method isMergeRequiredForMr.
private static boolean isMergeRequiredForMr(HiveConf hconf, FileSinkOperator fsOp, Task<? extends Serializable> currTask) {
if (fsOp.getConf().isLinkedFileSink()) {
// possibly by a big margin. So, merge aggresively.
return (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES));
}
// or for a map-reduce job
if (currTask.getWork() instanceof MapredWork) {
ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork();
boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null;
boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && reduceWork != null;
if (mergeMapOnly || mergeMapRed) {
return true;
}
}
return false;
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestTezTask method setUp.
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
utils = mock(DagUtils.class);
fs = mock(FileSystem.class);
path = mock(Path.class);
when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
when(utils.getTezDir(any(Path.class))).thenReturn(path);
when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class), any(Map.class))).thenAnswer(new Answer<Vertex>() {
@Override
public Vertex answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
}
});
when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))).thenAnswer(new Answer<Edge>() {
@Override
public Edge answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
}
});
work = new TezWork("", null);
mws = new MapWork[] { new MapWork(), new MapWork() };
rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
work.addAll(mws);
work.addAll(rws);
int i = 0;
for (BaseWork w : work.getAllWork()) {
w.setName("Work " + (++i));
}
op = mock(Operator.class);
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", op);
mws[0].setAliasToWork(map);
mws[1].setAliasToWork(map);
LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("foo");
pathMap.put(new Path("foo"), aliasList);
mws[0].setPathToAliases(pathMap);
mws[1].setPathToAliases(pathMap);
rws[0].setReducer(op);
rws[1].setReducer(op);
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
work.connect(mws[0], rws[0], edgeProp);
work.connect(mws[1], rws[0], edgeProp);
work.connect(rws[0], rws[1], edgeProp);
task = new TezTask(utils);
task.setWork(work);
task.setConsole(mock(LogHelper.class));
QueryPlan mockQueryPlan = mock(QueryPlan.class);
doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
task.setQueryPlan(mockQueryPlan);
conf = new JobConf();
appLr = createResource("foo.jar");
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
SessionState.start(hiveConf);
session = mock(TezClient.class);
sessionState = mock(TezSessionState.class);
when(sessionState.getSession()).thenReturn(session);
when(sessionState.reopen()).thenReturn(sessionState);
when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkPlanGenerator method cloneJobConf.
@SuppressWarnings({ "unchecked" })
private JobConf cloneJobConf(BaseWork work) throws Exception {
if (workToJobConf.containsKey(work)) {
return workToJobConf.get(work);
}
JobConf cloned = new JobConf(jobConf);
// Make sure we'll use a different plan path from the original one
HiveConf.setVar(cloned, HiveConf.ConfVars.PLAN, "");
try {
cloned.setPartitionerClass(JavaUtils.loadClass(HiveConf.getVar(cloned, HiveConf.ConfVars.HIVEPARTITIONER)));
} catch (ClassNotFoundException e) {
String msg = "Could not find partitioner class: " + e.getMessage() + " which is specified by: " + HiveConf.ConfVars.HIVEPARTITIONER.varname;
throw new IllegalArgumentException(msg, e);
}
if (work instanceof MapWork) {
MapWork mapWork = (MapWork) work;
cloned.setBoolean("mapred.task.is.map", true);
List<Path> inputPaths = Utilities.getInputPaths(cloned, mapWork, scratchDir, context, false);
Utilities.setInputPaths(cloned, inputPaths);
Utilities.setMapWork(cloned, mapWork, scratchDir, false);
Utilities.createTmpDirs(cloned, mapWork);
if (work instanceof MergeFileWork) {
MergeFileWork mergeFileWork = (MergeFileWork) work;
cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName());
cloned.set("mapred.input.format.class", mergeFileWork.getInputformat());
cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class);
} else {
cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
}
if (mapWork.getMaxSplitSize() != null) {
HiveConf.setLongVar(cloned, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mapWork.getMaxSplitSize());
}
if (mapWork.getMinSplitSize() != null) {
HiveConf.setLongVar(cloned, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mapWork.getMinSplitSize());
}
if (mapWork.getMinSplitSizePerNode() != null) {
HiveConf.setLongVar(cloned, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mapWork.getMinSplitSizePerNode());
}
if (mapWork.getMinSplitSizePerRack() != null) {
HiveConf.setLongVar(cloned, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mapWork.getMinSplitSizePerRack());
}
// remember the JobConf cloned for each MapWork, so we won't clone for it again
workToJobConf.put(work, cloned);
} else if (work instanceof ReduceWork) {
cloned.setBoolean("mapred.task.is.map", false);
Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (ReduceWork) work);
cloned.set(Utilities.MAPRED_REDUCER_CLASS, ExecReducer.class.getName());
}
return cloned;
}
Aggregations