use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TezTask method getMapWork.
@Override
public Collection<MapWork> getMapWork() {
List<MapWork> result = new LinkedList<MapWork>();
TezWork work = getWork();
// fine, broadcast parent isn't)
for (BaseWork w : work.getAllWorkUnsorted()) {
if (w instanceof MapWork) {
List<BaseWork> parents = work.getParents(w);
boolean candidate = true;
for (BaseWork parent : parents) {
if (!(parent instanceof UnionWork)) {
candidate = false;
}
}
if (candidate) {
result.add((MapWork) w);
}
}
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class ReduceRecordProcessor method close.
@Override
void close() {
if (cache != null && cacheKeys != null) {
for (String key : cacheKeys) {
cache.release(key);
}
}
if (dynamicValueCache != null && dynamicValueCacheKeys != null) {
for (String k : dynamicValueCacheKeys) {
dynamicValueCache.release(k);
}
}
try {
if (isAborted()) {
for (ReduceRecordSource rs : sources) {
if (!rs.close()) {
// Preserving the old logic. Hmm...
setAborted(false);
break;
}
}
}
boolean abort = isAborted();
reducer.close(abort);
if (mergeWorkList != null) {
for (BaseWork redWork : mergeWorkList) {
((ReduceWork) redWork).getReducer().close(abort);
}
}
// Need to close the dummyOps as well. The operator pipeline
// is not considered "closed/done" unless all operators are
// done. For broadcast joins that includes the dummy parents.
List<HashTableDummyOperator> dummyOps = reduceWork.getDummyOps();
if (dummyOps != null) {
for (Operator<?> dummyOp : dummyOps) {
dummyOp.close(abort);
}
}
ReportStats rps = new ReportStats(reporter, jconf);
reducer.preorderMap(rps);
} catch (Exception e) {
if (!isAborted()) {
// signal new failure to map-reduce
l4j.error("Hit error while closing operators - failing tree");
throw new RuntimeException("Hive Runtime Error while closing operators: " + e.getMessage(), e);
}
} finally {
Utilities.clearWorkMap(jconf);
MapredContext.close();
}
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TestGenTezWork method testCreateReduce.
@Test
public void testCreateReduce() throws SemanticException {
// create map
proc.process(rs, null, ctx, (Object[]) null);
// create reduce
proc.process(fs, null, ctx, (Object[]) null);
TezWork work = ctx.currentTask.getWork();
assertEquals(work.getAllWork().size(), 2);
BaseWork w = work.getAllWork().get(1);
assertTrue(w instanceof ReduceWork);
assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));
ReduceWork rw = (ReduceWork) w;
// need to make sure names are set for tez to connect things right
assertNotNull(w.getName());
// map work should start with our ts op
assertSame(rw.getReducer(), fs);
// should have severed the ties
assertEquals(fs.getParentOperators().size(), 0);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TezCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenTezUtils utils = new GenTezUtils();
GenTezWork genTezWork = new GenTezWork(utils);
GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
GraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
// we need to specify the reserved memory for each work that contains Map Join
for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
for (BaseWork w : baseWorkList) {
// work should be the smallest unit for memory allocation
w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
}
}
// we need to clone some operator plans and remove union operators still
int indexForTezUnion = 0;
for (BaseWork w : procCtx.workWithUnionOperators) {
GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
}
// then we make sure the file sink operators are set up right
for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
GenTezUtils.processFileSink(procCtx, fileSink);
}
// Connect any edges required for min/max pushdown
if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
// Process min/max
GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
}
}
// and finally we hook up any events that need to be sent to the tez AM
LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
LOG.debug("Handling AppMasterEventOperator: " + event);
GenTezUtils.processAppMasterEvent(procCtx, event);
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TezCompiler method setInputFormat.
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof TezTask) {
TezWork work = ((TezTask) task).getWork();
List<BaseWork> all = work.getAllWork();
for (BaseWork w : all) {
if (w instanceof MapWork) {
MapWork mapWork = (MapWork) w;
HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(mapWork, op);
}
}
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
Aggregations