use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class BucketMapjoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
BucketJoinProcCtx context = (BucketJoinProcCtx) procCtx;
MapJoinOperator mapJoinOperator = (MapJoinOperator) nd;
// can the mapjoin present be converted to a bucketed mapjoin
boolean convert = canConvertMapJoinToBucketMapJoin(mapJoinOperator, context);
HiveConf conf = context.getConf();
// bucketed mapjoin cannot be performed
if (!convert && conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETMAPJOIN)) {
throw new SemanticException(ErrorMsg.BUCKET_MAPJOIN_NOT_POSSIBLE.getMsg());
}
if (convert) {
// convert the mapjoin to a bucketized mapjoin
convertMapJoinToBucketMapJoin(mapJoinOperator, context);
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkSMBJoinHintOptimizer method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
boolean convert = canConvertBucketMapJoinToSMBJoin(mapJoinOp, stack, smbJoinContext, nodeOutputs);
// and sort merge bucketed mapjoin cannot be performed
if (!convert && pGraphContext.getConf().getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTMERGEBUCKETMAPJOIN)) {
throw new SemanticException(ErrorMsg.SORTMERGE_MAPJOIN_FAILED.getMsg());
}
if (convert) {
removeSmallTableReduceSink(mapJoinOp);
convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class MapJoinProcessor method generateMapJoinOperator.
public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator op, int mapJoinPos) throws SemanticException {
HiveConf hiveConf = pctx.getConf();
boolean noCheckOuterJoin = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN);
MapJoinOperator mapJoinOp = convertMapJoin(pctx.getConf(), op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), mapJoinPos, noCheckOuterJoin, true);
// create a dummy select to select all columns
genSelectPlan(pctx, mapJoinOp);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class MapJoinProcessor method genSelectPlan.
protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
input.setChildOperators(null);
// create a dummy select - This select is needed by the walker to split the
// mapJoin later on
RowSchema inputRS = input.getSchema();
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputs = new ArrayList<String>();
List<String> outputCols = input.getConf().getOutputColumnNames();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
exprs.add(colDesc);
outputs.add(internalName);
ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
newCol.setAlias(valueInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(internalName, colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputs, false);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
sel.setColumnExprMap(colExprMap);
// Insert the select operator in between.
sel.setChildOperators(childOps);
for (Operator<? extends OperatorDesc> ch : childOps) {
ch.replaceParent(input, sel);
}
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class ReduceSinkMapJoinProc method process.
/* (non-Javadoc)
* This processor addresses the RS-MJ case that occurs in tez on the small/hash
* table side of things. The work that RS will be a part of must be connected
* to the MJ work via be a broadcast edge.
* We should not walk down the tree when we encounter this pattern because:
* the type of work (map work or reduce work) needs to be determined
* on the basis of the big table side because it may be a mapwork (no need for shuffle)
* or reduce work.
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
// remember the original parent list before we start modifying it.
if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
context.mapJoinParentMap.put(mapJoinOp, parents);
}
boolean isBigTable = stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator);
ReduceSinkOperator parentRS = null;
if (!isBigTable) {
parentRS = (ReduceSinkOperator) stack.get(stack.size() - 2);
// For dynamic partitioned hash join, the big table will also be coming from a ReduceSinkOperator
// Check for this condition.
// TODO: use indexOf(), or parentRS.getTag()?
isBigTable = (mapJoinOp.getParentOperators().indexOf(parentRS) == mapJoinOp.getConf().getPosBigTable());
}
if (mapJoinOp.getConf().isDynamicPartitionHashJoin() && !context.mapJoinToUnprocessedSmallTableReduceSinks.containsKey(mapJoinOp)) {
// Initialize set of unprocessed small tables
Set<ReduceSinkOperator> rsSet = Sets.newIdentityHashSet();
for (int pos = 0; pos < mapJoinOp.getParentOperators().size(); ++pos) {
if (pos == mapJoinOp.getConf().getPosBigTable()) {
continue;
}
rsSet.add((ReduceSinkOperator) mapJoinOp.getParentOperators().get(pos));
}
context.mapJoinToUnprocessedSmallTableReduceSinks.put(mapJoinOp, rsSet);
}
if (isBigTable) {
context.currentMapJoinOperators.add(mapJoinOp);
return null;
}
context.preceedingWork = null;
context.currentRootOperator = null;
return processReduceSinkToHashJoin(parentRS, mapJoinOp, context);
}
Aggregations