use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class GenTezWork method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
assert context != null && context.currentTask != null && context.currentRootOperator != null;
// Operator is a file sink or reduce sink. Something that forces
// a new vertex.
Operator<?> operator = (Operator<?>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
Operator<?> root = context.currentRootOperator;
LOG.debug("Root operator: " + root);
LOG.debug("Leaf operator: " + operator);
if (context.clonedReduceSinks.contains(operator)) {
// just skip and keep going
return null;
}
TezWork tezWork = context.currentTask.getWork();
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
// vertex. If there is a preceding work we will generate
// a reduce vertex
BaseWork work;
if (context.rootToWorkMap.containsKey(root)) {
// will result into a vertex with multiple FS or RS operators.
if (context.childToWorkMap.containsKey(operator)) {
// if we've seen both root and child, we can bail.
// clear out the mapjoin set. we don't need it anymore.
context.currentMapJoinOperators.clear();
// clear out the union set. we don't need it anymore.
context.currentUnionOperators.clear();
return null;
} else {
// At this point we don't have to do anything special. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
}
} else {
// create a new vertex
if (context.preceedingWork == null) {
work = utils.createMapWork(context, root, tezWork, null);
} else {
work = GenTezUtils.createReduceWork(context, root, tezWork);
}
context.rootToWorkMap.put(root, work);
}
// this is where we set the sort columns that we will be using for KeyValueInputMerge
if (operator instanceof DummyStoreOperator) {
work.addSortCols(root.getOpTraits().getSortCols().get(0));
}
if (!context.childToWorkMap.containsKey(operator)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.childToWorkMap.put(operator, workItems);
} else {
context.childToWorkMap.get(operator).add(work);
}
// which can affect the working of all downstream transformations.
if (context.currentMergeJoinOperator != null) {
// we are currently walking the big table side of the merge join. we need to create or hook up
// merge join work.
MergeJoinWork mergeJoinWork = null;
if (context.opMergeJoinWorkMap.containsKey(context.currentMergeJoinOperator)) {
// we have found a merge work corresponding to this closing operator. Hook up this work.
mergeJoinWork = context.opMergeJoinWorkMap.get(context.currentMergeJoinOperator);
} else {
// we need to create the merge join work
mergeJoinWork = new MergeJoinWork();
mergeJoinWork.setMergeJoinOperator(context.currentMergeJoinOperator);
tezWork.add(mergeJoinWork);
context.opMergeJoinWorkMap.put(context.currentMergeJoinOperator, mergeJoinWork);
}
// connect the work correctly.
work.addSortCols(root.getOpTraits().getSortCols().get(0));
mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork);
Operator<? extends OperatorDesc> parentOp = getParentFromStack(context.currentMergeJoinOperator, stack);
// Set the big table position. Both the reduce work and merge join operator
// should be set with the same value.
// int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
int pos = context.currentMergeJoinOperator.getConf().getBigTablePosition();
work.setTag(pos);
context.currentMergeJoinOperator.getConf().setBigTablePosition(pos);
tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
tezWork.disconnect(work, childWork);
tezWork.connect(mergeJoinWork, childWork, edgeProp);
}
tezWork.remove(work);
context.rootToWorkMap.put(root, mergeJoinWork);
context.childToWorkMap.get(operator).remove(work);
context.childToWorkMap.get(operator).add(mergeJoinWork);
work = mergeJoinWork;
context.currentMergeJoinOperator = null;
}
// remember which mapjoin operator links with which work
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj : context.currentMapJoinOperators) {
// so we can later run the same logic that is run in ReduceSinkMapJoinProc.
if (mj.getConf().isDynamicPartitionHashJoin()) {
// Since this is a dynamic partitioned hash join, the work for this join should be a ReduceWork
ReduceWork reduceWork = (ReduceWork) work;
int bigTablePosition = mj.getConf().getPosBigTable();
reduceWork.setTag(bigTablePosition);
// Use context.mapJoinParentMap to get the original RS parents, because
// the MapJoin's parents may have been replaced by dummy operator.
List<Operator<?>> mapJoinOriginalParents = context.mapJoinParentMap.get(mj);
if (mapJoinOriginalParents == null) {
throw new SemanticException("Unexpected error - context.mapJoinParentMap did not have an entry for " + mj);
}
for (int pos = 0; pos < mapJoinOriginalParents.size(); ++pos) {
// This processing only needs to happen for the small tables
if (pos == bigTablePosition) {
continue;
}
Operator<?> parentOp = mapJoinOriginalParents.get(pos);
context.smallTableParentToMapJoinMap.put(parentOp, mj);
ReduceSinkOperator parentRS = (ReduceSinkOperator) parentOp;
// TableDesc needed for dynamic partitioned hash join
GenMapRedUtils.setKeyAndValueDesc(reduceWork, parentRS);
// has its ReduceSink parent removed.
if (!context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(parentRS)) {
// This reduce sink has been processed already, so the work for the parentRS exists
BaseWork parentWork = ReduceSinkMapJoinProc.getMapJoinParentWork(context, parentRS);
int tag = parentRS.getConf().getTag();
tag = (tag == -1 ? 0 : tag);
reduceWork.getTagToInput().put(tag, parentWork.getName());
}
}
}
LOG.debug("Processing map join: " + mj);
// mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.mapJoinWorkMap.put(mj, workItems);
} else {
context.mapJoinWorkMap.get(mj).add(work);
}
/*
* this happens in case of map join operations.
* The tree looks like this:
*
* RS <--- we are here perhaps
* |
* MapJoin
* / \
* RS TS
* /
* TS
*
* If we are at the RS pointed above, and we may have already visited the
* RS following the TS, we have already generated work for the TS-RS.
* We need to hook the current work to this generated work.
*/
if (context.linkOpWithWorkMap.containsKey(mj)) {
Map<BaseWork, TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
if (linkWorkMap != null) {
// Note: it's not quite clear why this is done inside this if. Seems like it should be on the top level.
if (context.linkChildOpWithDummyOp.containsKey(mj)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding dummy ops to work: " + work.getName() + ": " + context.linkChildOpWithDummyOp.get(mj));
}
for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
work.addDummyOp((HashTableDummyOperator) dummy);
}
}
for (Entry<BaseWork, TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
TezEdgeProperty edgeProp = parentWorkMap.getValue();
tezWork.connect(parentWork, work, edgeProp);
if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
}
// of the downstream work
for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), new RowSchema(r.getSchema()), r.getParentOperators());
context.clonedReduceSinks.add(r);
}
r.getConf().setOutputName(work.getName());
context.connectedReduceSinks.add(r);
}
}
}
}
}
// clear out the set. we don't need it anymore.
context.currentMapJoinOperators.clear();
}
// we might have to connect parent work with this work later.
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing " + parent + " as parent from " + root);
}
context.leafOperatorToFollowingWork.remove(parent);
context.leafOperatorToFollowingWork.put(parent, work);
root.removeParent(parent);
}
if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators, it means that the walking context contains union all operators.
// please see more details of context.currentUnionOperator in GenTezWorkWalker
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
} else {
// we've not seen this terminal before. we need to check
// rootUnionWorkMap which contains the information of mapping the root
// operator of a union work to a union work
unionWork = context.rootUnionWorkMap.get(root);
if (unionWork == null) {
// if unionWork is null, it means it is the first time. we need to
// create a union work object and add this work to it. Subsequent
// work should reference the union and not the actual work.
unionWork = GenTezUtils.createUnionWork(context, root, operator, tezWork);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
}
}
context.currentUnionOperators.clear();
work = unionWork;
}
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + followingWork);
if (operator instanceof DummyStoreOperator) {
// this is the small table side.
assert (followingWork instanceof MergeJoinWork);
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
work.setTag(mergeJoinOp.getTagForOperator(operator));
mergeJoinWork.addMergedWork(null, work, context.leafOperatorToFollowingWork);
tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
work = mergeJoinWork;
} else {
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork) || followingWork instanceof UnionWork);
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = null;
if (followingWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else if (followingWork instanceof UnionWork) {
// this can only be possible if there is merge work followed by the union
UnionWork unionWork = (UnionWork) followingWork;
int index = getFollowingWorkIndex(tezWork, unionWork, rs);
BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
if (baseWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
// disconnect the connection to union work and connect to merge work
followingWork = mergeJoinWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else {
rWork = (ReduceWork) baseWork;
}
} else {
rWork = (ReduceWork) followingWork;
}
GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
// remember which parent belongs to which tag
int tag = rs.getConf().getTag();
rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());
// remember the output name of the reduce sink
rs.getConf().setOutputName(rWork.getName());
// For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any
// ReduceSink parents that we missed.
MapJoinOperator mj = context.smallTableParentToMapJoinMap.get(rs);
if (mj != null) {
// Only need to run the logic for tables we missed
if (context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(rs)) {
// ReduceSinkMapJoinProc logic does not work unless the ReduceSink is connected as
// a parent of the MapJoin, but at this point we have already removed all of the
// parents from the MapJoin.
// Try temporarily adding the RS as a parent
ArrayList<Operator<?>> tempMJParents = new ArrayList<Operator<?>>();
tempMJParents.add(rs);
mj.setParentOperators(tempMJParents);
// ReduceSink also needs MapJoin as child
List<Operator<?>> rsChildren = rs.getChildOperators();
rsChildren.add(mj);
// Since the MapJoin has had all of its other parents removed at this point,
// it would be bad here if processReduceSinkToHashJoin() tries to do anything
// with the RS parent based on its position in the list of parents.
ReduceSinkMapJoinProc.processReduceSinkToHashJoin(rs, mj, context);
// Remove any parents from MapJoin again
mj.removeParents();
// TODO: do we also need to remove the MapJoin from the list of RS's children?
}
}
if (!context.connectedReduceSinks.contains(rs)) {
// add dependency between the two work items
TezEdgeProperty edgeProp;
EdgeType edgeType = GenTezUtils.determineEdgeType(work, followingWork, rs);
if (rWork.isAutoReduceParallelism()) {
edgeProp = new TezEdgeProperty(context.conf, edgeType, true, rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
} else {
edgeProp = new TezEdgeProperty(edgeType);
}
tezWork.connect(work, followingWork, edgeProp);
context.connectedReduceSinks.add(rs);
}
}
} else {
LOG.debug("First pass. Leaf operator: " + operator);
}
// the next item will be a new root.
if (!operator.getChildOperators().isEmpty()) {
assert operator.getChildOperators().size() == 1;
context.parentOfRoot = operator;
context.currentRootOperator = operator.getChildOperators().get(0);
context.preceedingWork = work;
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class TezTask method getMapWork.
@Override
public Collection<MapWork> getMapWork() {
List<MapWork> result = new LinkedList<MapWork>();
TezWork work = getWork();
// fine, broadcast parent isn't)
for (BaseWork w : work.getAllWorkUnsorted()) {
if (w instanceof MapWork) {
List<BaseWork> parents = work.getParents(w);
boolean candidate = true;
for (BaseWork parent : parents) {
if (!(parent instanceof UnionWork)) {
candidate = false;
}
}
if (candidate) {
result.add((MapWork) w);
}
}
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* @param fsInput The FileSink operator.
* @param ctx The MR processing context.
* @param finalName the final destination path the merge job should output.
* @param dependencyTask
* @param mvTasks
* @param conf
* @param currTask
* @throws SemanticException
* create a Map-only merge job using CombineHiveInputFormat for all partitions with
* following operators:
* MR job J0:
* ...
* |
* v
* FileSinkOperator_1 (fsInput)
* |
* v
* Merge job J1:
* |
* v
* TableScan (using CombineHiveInputFormat) (tsMerge)
* |
* v
* FileSinkOperator (fsMerge)
*
* Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
* do
* not contain the dynamic partitions (their parent). So after the dynamic partitions are
* created (after the first job finished before the moveTask or ConditionalTask start),
* we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
* partition
* directories.
*
*/
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<? extends Serializable> currTask) throws SemanticException {
//
// 1. create the operator tree
//
FileSinkDesc fsInputDesc = fsInput.getConf();
// Create a TableScan operator
RowSchema inputRS = fsInput.getSchema();
TableScanOperator tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
// If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
// needs to include the partition column, and the fsOutput should have
// a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// adding DP ColumnInfo to the RowSchema signature
ArrayList<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
for (String dpCol : dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
true);
signature.add(colInfo);
}
inputRS.setSignature(signature);
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
fsOutputDesc.setDynPartCtx(dpCtx2);
// update the FileSinkOperator to include partition columns
usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames());
} else {
// non-partitioned table
fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
}
//
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
MoveWork dummyMv = new MoveWork(null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
MapWork cplan;
Serializable work;
if ((conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class))) {
cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = cplan;
}
} else {
cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
cplan.setName("File Merge");
((TezWork) work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Spark Merge File Work");
((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork) work).setMapWork(cplan);
}
}
// use CombineHiveInputFormat for map-only merging
cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
// NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
// know if merge MR2 will be triggered at execution time
Task<MoveWork> mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOutput);
ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getFinalDirName(), finalName, mvTask, dependencyTask);
// keep the dynamic partition context in conditional task resolver context
ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class GenMapRedUtils method isMergeRequired.
/**
* Returns true iff the fsOp requires a merge
* @param mvTasks
* @param hconf
* @param fsOp
* @param currTask
* @param isInsertTable
* @return
*/
public static boolean isMergeRequired(List<Task<MoveWork>> mvTasks, HiveConf hconf, FileSinkOperator fsOp, Task<? extends Serializable> currTask, boolean isInsertTable) {
// Has the user enabled merging of files for map-only jobs or for all jobs
if ((mvTasks != null) && (!mvTasks.isEmpty())) {
// no need of merging if the move is to a local file system
MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTask(mvTasks, fsOp);
if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && !fsOp.getConf().isMaterialization()) {
// mark the MapredWork and FileSinkOperator for gathering stats
fsOp.getConf().setGatherStats(true);
fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
if (!mvTask.hasFollowingStatsTask()) {
GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf);
}
}
if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {
if (currTask.getWork() instanceof TezWork) {
// config
return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES);
} else if (currTask.getWork() instanceof SparkWork) {
// spark has its own config for merging
return hconf.getBoolVar(ConfVars.HIVEMERGESPARKFILES);
}
if (fsOp.getConf().isLinkedFileSink()) {
// possibly by a big margin. So, merge aggresively.
if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) {
return true;
}
} else {
// or for a map-reduce job
if (currTask.getWork() instanceof MapredWork) {
ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork();
boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null;
boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && reduceWork != null;
if (mergeMapOnly || mergeMapRed) {
return true;
}
} else {
return false;
}
}
}
}
return false;
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class CrossProductCheck method dispatch.
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
@SuppressWarnings("unchecked") Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
if (currTask instanceof MapRedTask) {
MapRedTask mrTsk = (MapRedTask) currTask;
MapredWork mrWrk = mrTsk.getWork();
checkMapJoins(mrTsk);
checkMRReducer(currTask.toString(), mrWrk);
} else if (currTask instanceof ConditionalTask) {
List<Task<? extends Serializable>> taskListInConditionalTask = ((ConditionalTask) currTask).getListTasks();
for (Task<? extends Serializable> tsk : taskListInConditionalTask) {
dispatch(tsk, stack, nodeOutputs);
}
} else if (currTask instanceof TezTask) {
TezTask tzTask = (TezTask) currTask;
TezWork tzWrk = tzTask.getWork();
checkMapJoins(tzWrk);
checkTezReducer(tzWrk);
}
return null;
}
Aggregations