use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class HiveOpConverter method translateJoin.
private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(joinRel.getInput(i));
baseSrc[i] = inputs[i].tabAlias;
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
OpAttr inputOpAf = dispatch(exchangeRel.getInput());
String tabAlias = inputOpAf.tabAlias;
if (tabAlias == null || tabAlias.length() == 0) {
tabAlias = getHiveDerivedTableAlias();
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
RelDistribution distribution = exchangeRel.getDistribution();
if (distribution.getType() != Type.HASH_DISTRIBUTED) {
throw new SemanticException("Only hash distribution supported for LogicalExchange");
ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()];
for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) {
expressions[index] = convertToExprNode(exchangeRel.getJoinKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf);
ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveConf);
return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class HiveGBOpConvUtil method genReduceGBRS.
private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
if (inclGrpSetInReduceSide(gbInfo)) {
addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap);
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf().getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1);
return new OpAttr("", new HashSet<Integer>(), rsOp);
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class GenTezWork method process.
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
assert context != null && context.currentTask != null && context.currentRootOperator != null;
// Operator is a file sink or reduce sink. Something that forces
// a new vertex.
Operator<?> operator = (Operator<?>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
Operator<?> root = context.currentRootOperator;
LOG.debug("Root operator: " + root);
LOG.debug("Leaf operator: " + operator);
if (context.clonedReduceSinks.contains(operator)) {
// just skip and keep going
return null;
TezWork tezWork = context.currentTask.getWork();
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
// vertex. If there is a preceding work we will generate
// a reduce vertex
BaseWork work;
if (context.rootToWorkMap.containsKey(root)) {
// will result into a vertex with multiple FS or RS operators.
if (context.childToWorkMap.containsKey(operator)) {
// if we've seen both root and child, we can bail.
// clear out the mapjoin set. we don't need it anymore.
// clear out the union set. we don't need it anymore.
return null;
} else {
// At this point we don't have to do anything special. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
} else {
// create a new vertex
if (context.preceedingWork == null) {
work = utils.createMapWork(context, root, tezWork, null);
} else {
work = GenTezUtils.createReduceWork(context, root, tezWork);
context.rootToWorkMap.put(root, work);
// this is where we set the sort columns that we will be using for KeyValueInputMerge
if (operator instanceof DummyStoreOperator) {
if (!context.childToWorkMap.containsKey(operator)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
context.childToWorkMap.put(operator, workItems);
} else {
// which can affect the working of all downstream transformations.
if (context.currentMergeJoinOperator != null) {
// we are currently walking the big table side of the merge join. we need to create or hook up
// merge join work.
MergeJoinWork mergeJoinWork = null;
if (context.opMergeJoinWorkMap.containsKey(context.currentMergeJoinOperator)) {
// we have found a merge work corresponding to this closing operator. Hook up this work.
mergeJoinWork = context.opMergeJoinWorkMap.get(context.currentMergeJoinOperator);
} else {
// we need to create the merge join work
mergeJoinWork = new MergeJoinWork();
context.opMergeJoinWorkMap.put(context.currentMergeJoinOperator, mergeJoinWork);
// connect the work correctly.
mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork);
Operator<? extends OperatorDesc> parentOp = getParentFromStack(context.currentMergeJoinOperator, stack);
// Set the big table position. Both the reduce work and merge join operator
// should be set with the same value.
// int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
int pos = context.currentMergeJoinOperator.getConf().getBigTablePosition();
tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
for (BaseWork childWork : tezWork.getChildren(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
tezWork.disconnect(work, childWork);
tezWork.connect(mergeJoinWork, childWork, edgeProp);
context.rootToWorkMap.put(root, mergeJoinWork);
work = mergeJoinWork;
context.currentMergeJoinOperator = null;
// remember which mapjoin operator links with which work
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj : context.currentMapJoinOperators) {
// so we can later run the same logic that is run in ReduceSinkMapJoinProc.
if (mj.getConf().isDynamicPartitionHashJoin()) {
// Since this is a dynamic partitioned hash join, the work for this join should be a ReduceWork
ReduceWork reduceWork = (ReduceWork) work;
int bigTablePosition = mj.getConf().getPosBigTable();
// Use context.mapJoinParentMap to get the original RS parents, because
// the MapJoin's parents may have been replaced by dummy operator.
List<Operator<?>> mapJoinOriginalParents = context.mapJoinParentMap.get(mj);
if (mapJoinOriginalParents == null) {
throw new SemanticException("Unexpected error - context.mapJoinParentMap did not have an entry for " + mj);
for (int pos = 0; pos < mapJoinOriginalParents.size(); ++pos) {
// This processing only needs to happen for the small tables
if (pos == bigTablePosition) {
Operator<?> parentOp = mapJoinOriginalParents.get(pos);
context.smallTableParentToMapJoinMap.put(parentOp, mj);
ReduceSinkOperator parentRS = (ReduceSinkOperator) parentOp;
// TableDesc needed for dynamic partitioned hash join
GenMapRedUtils.setKeyAndValueDesc(reduceWork, parentRS);
// has its ReduceSink parent removed.
if (!context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(parentRS)) {
// This reduce sink has been processed already, so the work for the parentRS exists
BaseWork parentWork = ReduceSinkMapJoinProc.getMapJoinParentWork(context, parentRS);
int tag = parentRS.getConf().getTag();
tag = (tag == -1 ? 0 : tag);
reduceWork.getTagToInput().put(tag, parentWork.getName());
LOG.debug("Processing map join: " + mj);
// mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
context.mapJoinWorkMap.put(mj, workItems);
} else {
* this happens in case of map join operations.
* The tree looks like this:
* RS <--- we are here perhaps
* |
* MapJoin
* / \
* /
* TS
* If we are at the RS pointed above, and we may have already visited the
* RS following the TS, we have already generated work for the TS-RS.
* We need to hook the current work to this generated work.
if (context.linkOpWithWorkMap.containsKey(mj)) {
Map<BaseWork, TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
if (linkWorkMap != null) {
// Note: it's not quite clear why this is done inside this if. Seems like it should be on the top level.
if (context.linkChildOpWithDummyOp.containsKey(mj)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding dummy ops to work: " + work.getName() + ": " + context.linkChildOpWithDummyOp.get(mj));
for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
work.addDummyOp((HashTableDummyOperator) dummy);
for (Entry<BaseWork, TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
TezEdgeProperty edgeProp = parentWorkMap.getValue();
tezWork.connect(parentWork, work, edgeProp);
if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
// of the downstream work
for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), new RowSchema(r.getSchema()), r.getParentOperators());
// clear out the set. we don't need it anymore.
// we might have to connect parent work with this work later.
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing " + parent + " as parent from " + root);
context.leafOperatorToFollowingWork.put(parent, work);
if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators, it means that the walking context contains union all operators.
// please see more details of context.currentUnionOperator in GenTezWorkWalker
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
} else {
// we've not seen this terminal before. we need to check
// rootUnionWorkMap which contains the information of mapping the root
// operator of a union work to a union work
unionWork = context.rootUnionWorkMap.get(root);
if (unionWork == null) {
// if unionWork is null, it means it is the first time. we need to
// create a union work object and add this work to it. Subsequent
// work should reference the union and not the actual work.
unionWork = GenTezUtils.createUnionWork(context, root, operator, tezWork);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
work = unionWork;
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + followingWork);
if (operator instanceof DummyStoreOperator) {
// this is the small table side.
assert (followingWork instanceof MergeJoinWork);
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
mergeJoinWork.addMergedWork(null, work, context.leafOperatorToFollowingWork);
tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
work = mergeJoinWork;
} else {
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork) || followingWork instanceof UnionWork);
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = null;
if (followingWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else if (followingWork instanceof UnionWork) {
// this can only be possible if there is merge work followed by the union
UnionWork unionWork = (UnionWork) followingWork;
int index = getFollowingWorkIndex(tezWork, unionWork, rs);
BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
if (baseWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
// disconnect the connection to union work and connect to merge work
followingWork = mergeJoinWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else {
rWork = (ReduceWork) baseWork;
} else {
rWork = (ReduceWork) followingWork;
GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
// remember which parent belongs to which tag
int tag = rs.getConf().getTag();
rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());
// remember the output name of the reduce sink
// For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any
// ReduceSink parents that we missed.
MapJoinOperator mj = context.smallTableParentToMapJoinMap.get(rs);
if (mj != null) {
// Only need to run the logic for tables we missed
if (context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(rs)) {
// ReduceSinkMapJoinProc logic does not work unless the ReduceSink is connected as
// a parent of the MapJoin, but at this point we have already removed all of the
// parents from the MapJoin.
// Try temporarily adding the RS as a parent
ArrayList<Operator<?>> tempMJParents = new ArrayList<Operator<?>>();
// ReduceSink also needs MapJoin as child
List<Operator<?>> rsChildren = rs.getChildOperators();
// Since the MapJoin has had all of its other parents removed at this point,
// it would be bad here if processReduceSinkToHashJoin() tries to do anything
// with the RS parent based on its position in the list of parents.
ReduceSinkMapJoinProc.processReduceSinkToHashJoin(rs, mj, context);
// Remove any parents from MapJoin again
// TODO: do we also need to remove the MapJoin from the list of RS's children?
if (!context.connectedReduceSinks.contains(rs)) {
// add dependency between the two work items
TezEdgeProperty edgeProp;
EdgeType edgeType = GenTezUtils.determineEdgeType(work, followingWork, rs);
if (rWork.isAutoReduceParallelism()) {
edgeProp = new TezEdgeProperty(context.conf, edgeType, true, rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
} else {
edgeProp = new TezEdgeProperty(edgeType);
tezWork.connect(work, followingWork, edgeProp);
} else {
LOG.debug("First pass. Leaf operator: " + operator);
// the next item will be a new root.
if (!operator.getChildOperators().isEmpty()) {
assert operator.getChildOperators().size() == 1;
context.parentOfRoot = operator;
context.currentRootOperator = operator.getChildOperators().get(0);
context.preceedingWork = work;
return null;
use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.
the class GenSparkUtils method createReduceWork.
public ReduceWork createReduceWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork) throws SemanticException {
Preconditions.checkArgument(!root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be non-empty");
ReduceWork reduceWork = new ReduceWork("Reducer " + (++sequenceNumber));
LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
// Pick the maximum # reducers across all parents as the # of reduce tasks.
int maxExecutors = -1;
for (Operator<? extends OperatorDesc> parentOfRoot : root.getParentOperators()) {
Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator, "AssertionError: expected parentOfRoot to be an " + "instance of ReduceSinkOperator, but was " + parentOfRoot.getClass().getName());
ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot;
maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers());
ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
setupReduceSink(context, reduceWork, reduceSink);
SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork);
sparkWork.connect(context.preceedingWork, reduceWork, edgeProp);
return reduceWork;