use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class ConvertJoinMapJoin method convertJoinDynamicPartitionedHashJoin.
private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException {
// Attempt dynamic partitioned hash join
// Since we don't have big table index yet, must start with estimate of numReducers
int numReducers = estimateNumBuckets(joinOp, false);
LOG.info("Try dynamic partitioned hash join with estimated " + numReducers + " reducers");
int bigTablePos = getMapJoinConversionPos(joinOp, context, numReducers, false, context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD), false);
if (bigTablePos >= 0) {
// Now that we have the big table index, get real numReducers value based on big table RS
ReduceSinkOperator bigTableParentRS = (ReduceSinkOperator) (joinOp.getParentOperators().get(bigTablePos));
numReducers = bigTableParentRS.getConf().getNumReducers();
LOG.debug("Real big table reducers = " + numReducers);
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false);
if (mapJoinOp != null) {
LOG.info("Selected dynamic partitioned hash join");
mapJoinOp.getConf().setDynamicPartitionHashJoin(true);
// Set OpTraits for dynamically partitioned hash join:
// bucketColNames: Re-use previous joinOp's bucketColNames. Parent operators should be
// reduce sink, which should have bucket columns based on the join keys.
// numBuckets: set to number of reducers
// sortCols: This is an unsorted join - no sort cols
OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numReducers, null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
}
return true;
}
}
return false;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class MapJoinProcessor method convertJoinOpMapJoinOp.
public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
// reduce sink row resolver used to generate map join op
RowSchema outputRS = op.getSchema();
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
mapJoinOp.setColumnExprMap(colExprMap);
List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(op, mapJoinOp);
}
mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
mapJoinOp.setChildOperators(childOps);
op.setChildOperators(null);
op.setParentOperators(null);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SortedMergeBucketMapjoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
if (nd instanceof SMBMapJoinOperator) {
return null;
}
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
boolean convert = canConvertBucketMapJoinToSMBJoin(mapJoinOp, stack, smbJoinContext, nodeOutputs);
// and sort merge bucketed mapjoin cannot be performed
if (!convert && pGraphContext.getConf().getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTMERGEBUCKETMAPJOIN)) {
throw new SemanticException(ErrorMsg.SORTMERGE_MAPJOIN_FAILED.getMsg());
}
if (convert) {
convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class AbstractMapJoin method setupBenchmarkImplementation.
protected static MapJoinOperator setupBenchmarkImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception {
// UNDONE: Parameterize for implementation variation?
MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
final boolean isVectorOutput = isVectorOutput(mapJoinImplementation);
// This collector is just a row counter.
Operator<? extends OperatorDesc> testCollectorOperator = (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator());
MapJoinOperator operator = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc);
return operator;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class ConvertJoinMapJoin method convertJoinMapJoin.
/*
* Once we have decided on the map join, the tree would transform from
*
* | |
* Join MapJoin
* / \ / \
* RS RS ---> RS TS (big table)
* / \ /
* TS TS TS (small table)
*
* for tez.
*/
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException {
// of the constituent reduce sinks.
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
if (parentOp instanceof MuxOperator) {
return null;
}
}
// can safely convert the join to a map join.
MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
// In case of cross join, we disable hybrid grace hash join
mapJoinOp.getConf().setHybridHashJoin(false);
}
Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
if (parentBigTableOp instanceof ReduceSinkOperator) {
Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
if (removeReduceSink) {
for (Operator<?> p : parentBigTableOp.getParentOperators()) {
// we might have generated a dynamic partition operator chain. Since
// we're removing the reduce sink we need do remove that too.
Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
for (Operator<?> c : p.getChildOperators()) {
AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
if (event != null) {
dynamicPartitionOperators.add(c);
opEventPairs.put(c, event);
}
}
for (Operator<?> c : dynamicPartitionOperators) {
if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
p.removeChild(c);
// at this point we've found the fork in the op pipeline that has the pruning as a child plan.
LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
}
}
for (Operator<?> op : dynamicPartitionOperators) {
context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
}
}
mapJoinOp.getParentOperators().remove(bigTablePosition);
if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
}
parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
}
for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
if (!(op.getChildOperators().contains(mapJoinOp))) {
op.getChildOperators().add(mapJoinOp);
}
op.getChildOperators().remove(joinOp);
}
// join which takes place in a separate task.
if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
}
}
return mapJoinOp;
}
Aggregations