use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class GenSparkUtils method getEdgeProperty.
public static SparkEdgeProperty getEdgeProperty(ReduceSinkOperator reduceSink, ReduceWork reduceWork) throws SemanticException {
SparkEdgeProperty edgeProperty = new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE);
edgeProperty.setNumPartitions(reduceWork.getNumReduceTasks());
String sortOrder = Strings.nullToEmpty(reduceSink.getConf().getOrder()).trim();
if (hasGBYOperator(reduceSink)) {
edgeProperty.setShuffleGroup();
// SHUFFLE_SORT shouldn't be used for this purpose, see HIVE-8542
if (!sortOrder.isEmpty() && groupByNeedParLevelOrder(reduceSink)) {
edgeProperty.setMRShuffle();
}
}
if (reduceWork.getReducer() instanceof JoinOperator) {
//reduce-side join, use MR-style shuffle
edgeProperty.setMRShuffle();
}
//If its a FileSink to bucketed files, also use MR-style shuffle to
// get compatible taskId for bucket-name
FileSinkOperator fso = getChildOperator(reduceWork.getReducer(), FileSinkOperator.class);
if (fso != null) {
String bucketCount = fso.getConf().getTableInfo().getProperties().getProperty(hive_metastoreConstants.BUCKET_COUNT);
if (bucketCount != null && Integer.parseInt(bucketCount) > 1) {
edgeProperty.setMRShuffle();
}
}
// test if we need partition/global order, SHUFFLE_SORT should only be used for global order
if (edgeProperty.isShuffleNone() && !sortOrder.isEmpty()) {
if ((reduceSink.getConf().getPartitionCols() == null || reduceSink.getConf().getPartitionCols().isEmpty() || isSame(reduceSink.getConf().getPartitionCols(), reduceSink.getConf().getKeyCols())) && reduceSink.getConf().hasOrderBy()) {
edgeProperty.setShuffleSort();
} else {
edgeProperty.setMRShuffle();
}
}
// simple distribute-by goes here
if (edgeProperty.isShuffleNone()) {
edgeProperty.setShuffleGroup();
}
return edgeProperty;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class ConvertJoinMapJoin method process.
@Override
public /*
* (non-Javadoc) we should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
JoinOperator joinOp = (JoinOperator) nd;
long maxSize = context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
// if we have traits, and table info is present in the traits, we know the
// exact number of buckets. Else choose the largest number of estimated
// reducers from the parent operators.
int numBuckets = -1;
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
numBuckets = estimateNumBuckets(joinOp, true);
} else {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxSize, true);
if (mapJoinConversionPos < 0) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
// only case is full outer join with SMB enabled which is not possible. Convert to regular
// join.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
if (numBuckets > 1) {
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
return null;
}
}
}
// check if we can convert to map join no bucket scaling.
LOG.info("Convert to non-bucketed map join");
if (numBuckets != 1) {
mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxSize, true);
}
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true);
// map join operator by default has no bucket cols and num of reduce sinks
// reduced by 1
mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
mapJoinOp.setStatistics(joinOp.getStatistics());
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class CommonJoinTaskDispatcher method convertTaskToMapJoinTask.
// create map join task and set big table as bigTablePosition
private MapRedTask convertTaskToMapJoinTask(MapredWork newWork, int bigTablePosition) throws UnsupportedEncodingException, SemanticException {
// create a mapred task for this work
MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext.getParseContext().getConf());
JoinOperator newJoinOp = getJoinOp(newTask);
// optimize this newWork given the big table position
MapJoinProcessor.genMapJoinOpAndLocalWork(physicalContext.getParseContext().getConf(), newWork, newJoinOp, bigTablePosition);
return newTask;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class CommonJoinTaskDispatcher method getJoinOp.
private JoinOperator getJoinOp(MapRedTask task) throws SemanticException {
MapWork mWork = task.getWork().getMapWork();
ReduceWork rWork = task.getWork().getReduceWork();
if (rWork == null) {
return null;
}
Operator<? extends OperatorDesc> reducerOp = rWork.getReducer();
if (reducerOp instanceof JoinOperator) {
/* Is any operator present, which prevents the conversion */
Map<String, Operator<? extends OperatorDesc>> aliasToWork = mWork.getAliasToWork();
for (Operator<? extends OperatorDesc> op : aliasToWork.values()) {
if (!checkOperatorOKMapJoinConversion(op)) {
return null;
}
}
return (JoinOperator) reducerOp;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class CrossProductCheck method checkMRReducer.
private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticException {
ReduceWork rWrk = mrWrk.getReduceWork();
if (rWrk == null) {
return;
}
Operator<? extends OperatorDesc> reducer = rWrk.getReducer();
if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
BaseWork prntWork = mrWrk.getMapWork();
checkForCrossProduct(taskName, reducer, new ExtractReduceSinkInfo(null).analyze(prntWork));
}
}
Aggregations