use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestGenTezWork method testCreateReduce.
@Test
public void testCreateReduce() throws SemanticException {
// create map
proc.process(rs, null, ctx, (Object[]) null);
// create reduce
proc.process(fs, null, ctx, (Object[]) null);
TezWork work = ctx.currentTask.getWork();
assertEquals(work.getAllWork().size(), 2);
BaseWork w = work.getAllWork().get(1);
assertTrue(w instanceof ReduceWork);
assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));
ReduceWork rw = (ReduceWork) w;
// need to make sure names are set for tez to connect things right
assertNotNull(w.getName());
// map work should start with our ts op
assertSame(rw.getReducer(), fs);
// should have severed the ties
assertEquals(fs.getParentOperators().size(), 0);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class ExecReducer method configure.
@Override
public void configure(JobConf job) {
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
if (isInfoEnabled) {
try {
LOG.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
LOG.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
} catch (Exception e) {
LOG.info("cannot get classpath: " + e.getMessage());
}
}
jc = job;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
MapredContext.init(false, new JobConf(jc));
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkPlanGenerator method generate.
private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception {
initStatsPublisher(work);
JobConf newJobConf = cloneJobConf(work);
checkSpecs(work, newJobConf);
byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
boolean caching = isCachingWork(work, sparkWork);
if (work instanceof MapWork) {
// Create tmp dir for MergeFileWork
if (work instanceof MergeFileWork) {
Path outputPath = ((MergeFileWork) work).getOutputDir();
Path tempOutPath = Utilities.toTempPath(outputPath);
FileSystem fs = outputPath.getFileSystem(jobConf);
try {
if (!fs.exists(tempOutPath)) {
fs.mkdirs(tempOutPath);
}
} catch (IOException e) {
throw new RuntimeException("Can't make path " + outputPath + " : " + e.getMessage());
}
}
MapTran mapTran = new MapTran(caching);
HiveMapFunction mapFunc = new HiveMapFunction(confBytes, sparkReporter);
mapTran.setMapFunction(mapFunc);
return mapTran;
} else if (work instanceof ReduceWork) {
ReduceTran reduceTran = new ReduceTran(caching);
HiveReduceFunction reduceFunc = new HiveReduceFunction(confBytes, sparkReporter);
reduceTran.setReduceFunction(reduceFunc);
return reduceTran;
} else {
throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName());
}
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkPlanGenerator method cloneJobConf.
@SuppressWarnings({ "unchecked" })
private JobConf cloneJobConf(BaseWork work) throws Exception {
if (workToJobConf.containsKey(work)) {
return workToJobConf.get(work);
}
JobConf cloned = new JobConf(jobConf);
// Make sure we'll use a different plan path from the original one
HiveConf.setVar(cloned, HiveConf.ConfVars.PLAN, "");
try {
cloned.setPartitionerClass(JavaUtils.loadClass(HiveConf.getVar(cloned, HiveConf.ConfVars.HIVEPARTITIONER)));
} catch (ClassNotFoundException e) {
String msg = "Could not find partitioner class: " + e.getMessage() + " which is specified by: " + HiveConf.ConfVars.HIVEPARTITIONER.varname;
throw new IllegalArgumentException(msg, e);
}
if (work instanceof MapWork) {
cloned.setBoolean("mapred.task.is.map", true);
List<Path> inputPaths = Utilities.getInputPaths(cloned, (MapWork) work, scratchDir, context, false);
Utilities.setInputPaths(cloned, inputPaths);
Utilities.setMapWork(cloned, (MapWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (MapWork) work);
if (work instanceof MergeFileWork) {
MergeFileWork mergeFileWork = (MergeFileWork) work;
cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName());
cloned.set("mapred.input.format.class", mergeFileWork.getInputformat());
cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class);
} else {
cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
}
if (((MapWork) work).getMinSplitSize() != null) {
HiveConf.setLongVar(cloned, HiveConf.ConfVars.MAPREDMINSPLITSIZE, ((MapWork) work).getMinSplitSize());
}
// remember the JobConf cloned for each MapWork, so we won't clone for it again
workToJobConf.put(work, cloned);
} else if (work instanceof ReduceWork) {
cloned.setBoolean("mapred.task.is.map", false);
Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (ReduceWork) work);
cloned.set(Utilities.MAPRED_REDUCER_CLASS, ExecReducer.class.getName());
}
return cloned;
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenMapRedUtils method initPlan.
/**
* Initialize the current plan by adding it to root tasks.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork plan = (MapredWork) currTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
opTaskMap.put(reducer, currTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
assert currTopOp != null;
String currAliasId = opProcCtx.getCurrAliasId();
if (!opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx);
}
currTopOp = null;
currAliasId = null;
opProcCtx.setCurrTask(currTask);
opProcCtx.setCurrTopOp(currTopOp);
opProcCtx.setCurrAliasId(currAliasId);
}
Aggregations