use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class AbstractSMBJoinProc method convertJoinToBucketMapJoin.
// Convert the join operator to a bucket map-join join operator
protected MapJoinOperator convertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx joinContext) throws SemanticException {
MapJoinOperator mapJoinOp = new MapJoinProcessor().convertMapJoin(pGraphContext.getConf(), joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), joinContext.getBigTablePosition(), false, false);
// Remove the join operator from the query join context
// Data structures coming from QBJoinTree
mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
//
pGraphContext.getMapJoinOps().add(mapJoinOp);
pGraphContext.getJoinOps().remove(joinOp);
convertMapJoinToBucketMapJoin(mapJoinOp, joinContext);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class AbstractSMBJoinProc method convertBucketMapJoinToSMBJoin.
// Convert the bucket map-join operator to a sort-merge map join operator
protected SMBMapJoinOperator convertBucketMapJoinToSMBJoin(MapJoinOperator mapJoinOp, SortBucketJoinProcCtx smbJoinContext) {
String[] srcs = smbJoinContext.getSrcs();
SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp);
SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
smbJop.setConf(smbJoinDesc);
HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>();
for (int i = 0; i < srcs.length; i++) {
tagToAlias.put((byte) i, srcs[i]);
}
smbJoinDesc.setTagToAlias(tagToAlias);
int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp);
if (indexInListMapJoinNoReducer >= 0) {
this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer);
this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop);
}
Map<String, DummyStoreOperator> aliasToSink = new HashMap<String, DummyStoreOperator>();
// For all parents (other than the big table), insert a dummy store operator
/* Consider a query like:
*
* select * from
* (subq1 --> has a filter)
* join
* (subq2 --> has a filter)
* on some key
*
* Let us assume that subq1 is the small table (either specified by the user or inferred
* automatically). The following operator tree will be created:
*
* TableScan (subq1) --> Select --> Filter --> DummyStore
* \
* \ SMBJoin
* /
* /
* TableScan (subq2) --> Select --> Filter
*/
List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
for (int i = 0; i < parentOperators.size(); i++) {
Operator<? extends OperatorDesc> par = parentOperators.get(i);
int index = par.getChildOperators().indexOf(mapJoinOp);
par.getChildOperators().remove(index);
if (i == smbJoinDesc.getPosBigTable()) {
par.getChildOperators().add(index, smbJop);
} else {
DummyStoreOperator dummyStoreOp = new DummyStoreOperator(par.getCompilationOpContext());
par.getChildOperators().add(index, dummyStoreOp);
List<Operator<? extends OperatorDesc>> childrenOps = new ArrayList<Operator<? extends OperatorDesc>>();
childrenOps.add(smbJop);
dummyStoreOp.setChildOperators(childrenOps);
List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>();
parentOps.add(par);
dummyStoreOp.setParentOperators(parentOps);
aliasToSink.put(srcs[i], dummyStoreOp);
smbJop.getParentOperators().remove(i);
smbJop.getParentOperators().add(i, dummyStoreOp);
}
}
smbJoinDesc.setAliasToSink(aliasToSink);
List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators();
for (int i = 0; i < childOps.size(); i++) {
Operator<? extends OperatorDesc> child = childOps.get(i);
int index = child.getParentOperators().indexOf(mapJoinOp);
child.getParentOperators().remove(index);
child.getParentOperators().add(index, smbJop);
}
// Data structures coming from QBJoinTree
smbJop.getConf().setQBJoinTreeProps(mapJoinOp.getConf());
//
pGraphContext.getSmbMapJoinOps().add(smbJop);
pGraphContext.getMapJoinOps().remove(mapJoinOp);
return smbJop;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class ConvertJoinMapJoin method process.
@Override
public /*
* (non-Javadoc) we should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
JoinOperator joinOp = (JoinOperator) nd;
long maxSize = context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
// adjust noconditional task size threshold for LLAP
LlapClusterStateForCompile llapInfo = null;
if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
llapInfo.initClusterInfo();
}
MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf, llapInfo);
joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
// not use map join in case of cross product
boolean cartesianProductEdgeEnabled = HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) {
fallbackToMergeJoin(joinOp, context);
return null;
}
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
if (!hiveConvertJoin) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize);
if (retval == null) {
return retval;
} else {
fallbackToReduceSideJoin(joinOp, context, maxSize);
return null;
}
}
// if we have traits, and table info is present in the traits, we know the
// exact number of buckets. Else choose the largest number of estimated
// reducers from the parent operators.
int numBuckets = -1;
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
numBuckets = estimateNumBuckets(joinOp, true);
} else {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxSize, true);
if (mapJoinConversionPos < 0) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize);
if (retval == null) {
return retval;
} else {
// only case is full outer join with SMB enabled which is not possible. Convert to regular
// join.
fallbackToReduceSideJoin(joinOp, context, maxSize);
return null;
}
}
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
// Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
if (llapInfo != null) {
if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversionPos, numBuckets)) {
return null;
}
} else if (numBuckets > 1 && convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
return null;
}
}
// check if we can convert to map join no bucket scaling.
LOG.info("Convert to non-bucketed map join");
if (numBuckets != 1) {
mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxSize, true);
}
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context, maxSize);
return null;
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true);
// map join operator by default has no bucket cols and num of reduce sinks
// reduced by 1
mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
mapJoinOp.setStatistics(joinOp.getStatistics());
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkSortMergeJoinOptimizer method convertJoinToSMBJoinAndReturn.
protected SMBMapJoinOperator convertJoinToSMBJoinAndReturn(JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext);
SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
return smbMapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class AbstractSMBJoinProc method convertJoinToSMBJoin.
// Convert the join operator to a sort-merge join operator
protected void convertJoinToSMBJoin(JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext);
SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
}
Aggregations