use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class QueryPlan method populateQueryPlan.
/**
* Populate api.QueryPlan from exec structures. This includes constructing the
* dependency graphs of stages and operators.
*
* @throws IOException
*/
private void populateQueryPlan() throws IOException {
query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
query.getStageGraph().setNodeType(NodeType.STAGE);
Queue<Task<?>> tasksToVisit = new LinkedList<Task<?>>();
Set<Task<?>> tasksVisited = new HashSet<Task<?>>();
tasksToVisit.addAll(rootTasks);
while (tasksToVisit.size() != 0) {
Task<?> task = tasksToVisit.remove();
tasksVisited.add(task);
// populate stage
org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
stage.setStageId(task.getId());
stage.setStageType(task.getType());
query.addToStageList(stage);
if (task instanceof ExecDriver) {
// populate map task
ExecDriver mrTask = (ExecDriver) task;
org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
mapTask.setTaskId(stage.getStageId() + "_MAP");
mapTask.setTaskType(TaskType.MAP);
stage.addToTaskList(mapTask);
populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
// populate reduce task
if (mrTask.hasReduce()) {
org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
reduceTask.setTaskType(TaskType.REDUCE);
stage.addToTaskList(reduceTask);
Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
populateOperatorGraph(reduceTask, reducerTopOps);
}
} else {
org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
otherTask.setTaskId(stage.getStageId() + "_OTHER");
otherTask.setTaskType(TaskType.OTHER);
stage.addToTaskList(otherTask);
}
if (task instanceof ConditionalTask) {
org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
listEntry.setNode(task.getId());
ConditionalTask t = (ConditionalTask) task;
for (Task<?> listTask : t.getListTasks()) {
if (t.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
childEntry.setNode(listTask.getId());
// done processing the task
for (Task<?> childTask : t.getChildTasks()) {
childEntry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(childEntry);
}
listEntry.addToChildren(listTask.getId());
if (!tasksVisited.contains(listTask)) {
tasksToVisit.add(listTask);
}
}
query.getStageGraph().addToAdjacencyList(listEntry);
} else if (task.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
entry.setNode(task.getId());
// done processing the task
for (Task<?> childTask : task.getChildTasks()) {
entry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(entry);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class ReduceRecordProcessor method init.
@Override
void init(MRTaskReporter mrReporter, Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
super.init(mrReporter, inputs, outputs);
MapredContext.init(false, new JobConf(jconf));
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs);
// TODO HIVE-14042. Move to using a loop and a timed wait once TEZ-3302 is fixed.
checkAbortCondition();
if (shuffleInputs != null) {
LOG.info("Waiting for ShuffleInputs to become ready");
processorContext.waitForAllInputsReady(new ArrayList<Input>(shuffleInputs));
}
connectOps.clear();
ReduceWork redWork = reduceWork;
LOG.info("Main work is " + reduceWork.getName());
List<HashTableDummyOperator> workOps = reduceWork.getDummyOps();
Set<HashTableDummyOperator> dummyOps = workOps == null ? new HashSet<>() : new HashSet<>(workOps);
tagToReducerMap.put(redWork.getTag(), redWork);
if (mergeWorkList != null) {
for (BaseWork mergeWork : mergeWorkList) {
LOG.debug("Additional work {}", mergeWork.getName());
workOps = mergeWork.getDummyOps();
if (workOps != null) {
dummyOps.addAll(workOps);
}
ReduceWork mergeReduceWork = (ReduceWork) mergeWork;
reducer = mergeReduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer);
connectOps.put(mergeReduceWork.getTag(), dummyStoreOp);
tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork);
}
((TezContext) MapredContext.get()).setDummyOpsMap(connectOps);
}
checkAbortCondition();
bigTablePosition = (byte) reduceWork.getTag();
ObjectInspector[] mainWorkOIs = null;
((TezContext) MapredContext.get()).setInputs(inputs);
((TezContext) MapredContext.get()).setTezProcessorContext(processorContext);
int numTags = reduceWork.getTagToValueDesc().size();
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
// set memory available for operators
long memoryAvailableToTask = processorContext.getTotalMemoryAvailableToTask();
if (reducer.getConf() != null) {
reducer.getConf().setMaxMemoryAvailable(memoryAvailableToTask);
LOG.info("Memory available for operators set to {}", LlapUtil.humanReadableByteCount(memoryAvailableToTask));
}
OperatorUtils.setMemoryAvailable(reducer.getChildOperators(), memoryAvailableToTask);
// Setup values registry
String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY;
DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, () -> new DynamicValueRegistryTez());
dynamicValueCacheKeys.add(valueRegistryKey);
RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs);
registryTez.init(registryConf);
checkAbortCondition();
if (numTags > 1) {
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources);
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, mainWorkOIs);
} else {
numTags = tagToReducerMap.keySet().size();
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
for (int i : tagToReducerMap.keySet()) {
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0));
reducer.initializeLocalWork(jconf);
}
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[bigTablePosition] });
for (int i : tagToReducerMap.keySet()) {
if (i == bigTablePosition) {
continue;
}
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[i] });
}
}
checkAbortCondition();
reducer = reduceWork.getReducer();
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
// dummy parent operators as well.
for (HashTableDummyOperator dummyOp : dummyOps) {
// TODO HIVE-14042. Propagating abort to dummyOps.
dummyOp.initialize(jconf, null);
checkAbortCondition();
}
// set output collector for any reduce sink operators in the pipeline.
List<Operator<?>> children = new ArrayList<>();
children.add(reducer);
children.addAll(dummyOps);
createOutputMap();
OperatorUtils.setChildrenCollector(children, outMap);
checkAbortCondition();
reducer.setReporter(reporter);
MapredContext.get().setReporter(reporter);
} catch (Throwable e) {
super.setAborted(true);
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else if (e instanceof InterruptedException) {
LOG.info("Hit an interrupt while initializing ReduceRecordProcessor. Message={}", e.getMessage());
throw (InterruptedException) e;
} else {
throw new RuntimeException(redWork.getName() + " operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class AlterTableConcatenateOperation method getMergeFileWork.
private MergeFileWork getMergeFileWork(CompilationOpContext opContext) {
List<Path> inputDirList = Lists.newArrayList(desc.getInputDir());
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(inputDirList, desc.getOutputDir(), desc.getInputFormatClass().getName(), desc.getTableDesc());
mergeWork.setListBucketingCtx(desc.getLbCtx());
mergeWork.resolveConcatenateMerge(context.getDb().getConf());
mergeWork.setMapperCannotSpanPartns(true);
mergeWork.setSourceTableInputFormat(desc.getInputFormatClass().getName());
Map<Path, List<String>> pathToAliases = new LinkedHashMap<>();
List<String> inputDirStr = Lists.newArrayList(inputDirList.toString());
pathToAliases.put(desc.getInputDir(), inputDirStr);
mergeWork.setPathToAliases(pathToAliases);
FileMergeDesc fmd = getFileMergeDesc();
Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
Map<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put(inputDirList.toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
return mergeWork;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SparkMapRecordHandler method init.
@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
try {
jc = job;
execContext = new ExecMapperContext(jc);
// create map and fetch operators
MapWork mrwork = Utilities.getMapWork(job);
for (PartitionDesc part : mrwork.getAliasToPartnInfo().values()) {
TableDesc tableDesc = part.getTableDesc();
Utilities.copyJobSecretToTableProperties(tableDesc);
}
CompilationOpContext runtimeCtx = new CompilationOpContext();
if (mrwork.getVectorMode()) {
mo = new VectorMapOperator(runtimeCtx);
} else {
mo = new MapOperator(runtimeCtx);
}
mo.setConf(mrwork);
// initialize map operator
mo.initialize(jc, null);
mo.setChildren(job);
LOG.info(mo.dump(0));
// initialize map local work
localWork = mrwork.getMapRedLocalWork();
execContext.setLocalWork(localWork);
MapredContext.init(true, new JobConf(jc));
MapredContext.get().setReporter(reporter);
mo.passExecContext(execContext);
mo.initializeLocalWork(jc);
mo.initializeMapOperator(jc);
mo.setReporter(rp);
if (localWork == null) {
return;
}
// The following code is for mapjoin
// initialize all the dummy ops
LOG.info("Initializing dummy operator");
List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Map operator initialization failed: " + e, e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class ConvertJoinMapJoin method convertJoinMapJoin.
/*
* Once we have decided on the map join, the tree would transform from
*
* | |
* Join MapJoin
* / \ / \
* RS RS ---> RS TS (big table)
* / \ /
* TS TS TS (small table)
*
* for tez.
*/
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, boolean removeReduceSink) throws SemanticException {
// of the constituent reduce sinks.
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
if (parentOp instanceof MuxOperator) {
return null;
}
}
// can safely convert the join to a map join.
final int bigTablePosition = mapJoinConversion.getBigTablePos();
MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
if (mapJoinOp == null) {
return null;
}
MapJoinDesc mapJoinDesc = mapJoinOp.getConf();
mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
List<ExprNodeDesc> joinExprs = mapJoinDesc.getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
// In case of cross join, we disable hybrid grace hash join
mapJoinDesc.setHybridHashJoin(false);
}
Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
if (parentBigTableOp instanceof ReduceSinkOperator) {
Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
if (removeReduceSink) {
for (Operator<?> p : parentBigTableOp.getParentOperators()) {
// we might have generated a dynamic partition operator chain. Since
// we're removing the reduce sink we need do remove that too.
Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
for (Operator<?> c : p.getChildOperators()) {
AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
if (event != null) {
dynamicPartitionOperators.add(c);
opEventPairs.put(c, event);
}
}
for (Operator<?> c : dynamicPartitionOperators) {
if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
p.removeChild(c);
// at this point we've found the fork in the op pipeline that has the pruning as a child plan.
LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
}
}
for (Operator<?> op : dynamicPartitionOperators) {
context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
}
}
mapJoinOp.getParentOperators().remove(bigTablePosition);
if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
}
parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
}
for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
if (!(op.getChildOperators().contains(mapJoinOp))) {
op.getChildOperators().add(mapJoinOp);
}
op.getChildOperators().remove(joinOp);
}
// join which takes place in a separate task.
if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
}
}
return mapJoinOp;
}
Aggregations