use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class CrossProductCheck method checkTezReducer.
private void checkTezReducer(TezWork tzWrk) throws SemanticException {
for (BaseWork wrk : tzWrk.getAllWork()) {
if (wrk instanceof MergeJoinWork) {
wrk = ((MergeJoinWork) wrk).getMainWork();
}
if (!(wrk instanceof ReduceWork)) {
continue;
}
ReduceWork rWork = (ReduceWork) wrk;
Operator<? extends OperatorDesc> reducer = ((ReduceWork) wrk).getReducer();
if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
Map<Integer, ExtractReduceSinkInfo.Info> rsInfo = new HashMap<Integer, ExtractReduceSinkInfo.Info>();
for (Map.Entry<Integer, String> e : rWork.getTagToInput().entrySet()) {
rsInfo.putAll(getReducerInfo(tzWrk, rWork.getName(), e.getValue()));
}
checkForCrossProduct(rWork.getName(), reducer, rsInfo);
}
}
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class GenMRSkewJoinProcessor method processSkewJoin.
/**
* Create tasks for processing skew joins. The idea is (HIVE-964) to use
* separated jobs and map-joins to handle skew joins.
* <p>
* <ul>
* <li>
* Number of mr jobs to handle skew keys is the number of table minus 1 (we
* can stream the last table, so big keys in the last table will not be a
* problem).
* <li>
* At runtime in Join, we output big keys in one table into one corresponding
* directories, and all same keys in other tables into different dirs(one for
* each table). The directories will look like:
* <ul>
* <li>
* dir-T1-bigkeys(containing big keys in T1), dir-T2-keys(containing keys
* which is big in T1),dir-T3-keys(containing keys which is big in T1), ...
* <li>
* dir-T1-keys(containing keys which is big in T2), dir-T2-bigkeys(containing
* big keys in T2),dir-T3-keys(containing keys which is big in T2), ...
* <li>
* dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big
* keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... .....
* </ul>
* </ul>
* For each table, we launch one mapjoin job, taking the directory containing
* big keys in this table and corresponding dirs in other tables as input.
* (Actally one job for one row in the above.)
*
* <p>
* For more discussions, please check
* https://issues.apache.org/jira/browse/HIVE-964.
*
*/
@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<? extends Serializable> currTask, ParseContext parseCtx) throws SemanticException {
// now does not work with outer joins
if (!GenMRSkewJoinProcessor.skewJoinEnabled(parseCtx.getConf(), joinOp)) {
return;
}
List<Task<? extends Serializable>> children = currTask.getChildTasks();
Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
JoinDesc joinDescriptor = joinOp.getConf();
Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
int numAliases = joinValues.size();
Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
Byte[] tags = joinDescriptor.getTagOrder();
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
bigKeysDirMap.put(alias, getBigKeysDir(baseTmpDir, alias));
Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
smallKeysDirMap.put(alias, smallKeysMap);
for (Byte src2 : tags) {
if (!src2.equals(alias)) {
smallKeysMap.put(src2, getSmallKeysDir(baseTmpDir, alias, src2));
}
}
skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir, alias));
}
joinDescriptor.setHandleSkewJoin(true);
joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap = new HashMap<Path, Task<? extends Serializable>>();
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
MapredWork currPlan = (MapredWork) currTask.getWork();
TableDesc keyTblDesc = (TableDesc) currPlan.getReduceWork().getKeyDesc().clone();
List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
// used for create mapJoinDesc, should be in order
List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
for (Byte tag : tags) {
newJoinValueTblDesc.add(null);
}
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
List<ExprNodeDesc> valueCols = joinValues.get(alias);
String colNames = "";
String colTypes = "";
int columnSize = valueCols.size();
List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
boolean first = true;
for (int k = 0; k < columnSize; k++) {
TypeInfo type = valueCols.get(k).getTypeInfo();
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
columnInfos.add(columnInfo);
newValueExpr.add(new ExprNodeColumnDesc(columnInfo));
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + newColName;
colTypes = colTypes + valueCols.get(k).getTypeString();
}
// we are putting join keys at last part of the spilled table
for (int k = 0; k < joinKeys.size(); k++) {
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + joinKeys.get(k);
colTypes = colTypes + joinKeyTypes.get(k);
ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
columnInfos.add(columnInfo);
newKeyExpr.add(new ExprNodeColumnDesc(columnInfo));
}
newJoinValues.put(alias, newValueExpr);
newJoinKeys.put(alias, newKeyExpr);
tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
rowSchemaList.put(alias, new RowSchema(columnInfos));
// construct value table Desc
String valueColNames = "";
String valueColTypes = "";
first = true;
for (int k = 0; k < columnSize; k++) {
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
if (!first) {
valueColNames = valueColNames + ",";
valueColTypes = valueColTypes + ",";
}
valueColNames = valueColNames + newColName;
valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
first = false;
}
newJoinValueTblDesc.set(Byte.valueOf((byte) i), Utilities.getTableDesc(valueColNames, valueColTypes));
}
joinDescriptor.setSkewKeysValuesTables(tableDescList);
joinDescriptor.setKeyTableDesc(keyTblDesc);
for (int i = 0; i < numAliases - 1; i++) {
Byte src = tags[i];
MapWork newPlan = PlanUtils.getMapRedWork().getMapWork();
// This code has been only added for testing
boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);
MapredWork clonePlan = SerializationUtilities.clonePlan(currPlan);
Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
for (int k = 0; k < tags.length; k++) {
Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
((TableScanOperator) ts).setTableDesc(tableDescList.get((byte) k));
parentOps[k] = ts;
}
Operator<? extends OperatorDesc> tblScan_op = parentOps[i];
ArrayList<String> aliases = new ArrayList<String>();
String alias = src.toString().intern();
aliases.add(alias);
Path bigKeyDirPath = bigKeysDirMap.get(src);
newPlan.addPathToAlias(bigKeyDirPath, aliases);
newPlan.getAliasToWork().put(alias, tblScan_op);
PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
newPlan.addPathToPartitionInfo(bigKeyDirPath, part);
newPlan.getAliasToPartnInfo().put(alias, part);
Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();
assert reducer instanceof JoinOperator;
JoinOperator cloneJoinOp = (JoinOperator) reducer;
String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
mapJoinDescriptor.setTagOrder(tags);
mapJoinDescriptor.setHandleSkewJoin(false);
mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
MapredLocalWork localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
for (int j = 0; j < numAliases; j++) {
if (j == i) {
continue;
}
Byte small_alias = tags[j];
Operator<? extends OperatorDesc> tblScan_op2 = parentOps[j];
localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
Path tblDir = smallTblDirs.get(small_alias);
localPlan.getAliasToFetchWork().put(small_alias.toString(), new FetchWork(tblDir, tableDescList.get(small_alias)));
}
newPlan.setMapRedLocalWork(localPlan);
// construct a map join and set it as the child operator of tblScan_op
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, (RowSchema) null, parentOps);
// change the children of the original join operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(cloneJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
HiveConf jc = new HiveConf(parseCtx.getConf(), GenMRSkewJoinProcessor.class);
newPlan.setNumMapTasks(HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
newPlan.setMinSplitSize(HiveConf.getLongVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
newPlan.setInputformat(HiveInputFormat.class.getName());
MapredWork w = new MapredWork();
w.setMapWork(newPlan);
Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(w, jc);
skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
bigKeysDirToTaskMap.put(bigKeyDirPath, skewJoinMapJoinTask);
listWorks.add(skewJoinMapJoinTask.getWork());
listTasks.add(skewJoinMapJoinTask);
}
if (children != null) {
for (Task<? extends Serializable> tsk : listTasks) {
for (Task<? extends Serializable> oldChild : children) {
tsk.addDependentTask(oldChild);
}
}
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
for (Task<? extends Serializable> oldChild : children) {
oldChild.getParentTasks().remove(currTask);
}
listTasks.addAll(children);
}
ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, parseCtx.getConf());
cndTsk.setListTasks(listTasks);
cndTsk.setResolver(new ConditionalResolverSkewJoin());
cndTsk.setResolverCtx(context);
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
currTask.addDependentTask(cndTsk);
return;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class SparkSkewJoinProcFactory method splitTask.
/**
* If the join is not in a leaf ReduceWork, the spark task has to be split into 2 tasks.
*/
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException {
SparkWork currentWork = currentTask.getWork();
Set<Operator<?>> reduceSinkSet = SparkMapJoinResolver.getOp(reduceWork, ReduceSinkOperator.class);
if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) {
ReduceSinkOperator reduceSink = (ReduceSinkOperator) reduceSinkSet.iterator().next();
BaseWork childWork = currentWork.getChildren(reduceWork).get(0);
SparkEdgeProperty originEdge = currentWork.getEdgeProperty(reduceWork, childWork);
// disconnect the reduce work from its child. this should produce two isolated sub graphs
currentWork.disconnect(reduceWork, childWork);
// move works following the current reduce work into a new spark work
SparkWork newWork = new SparkWork(parseContext.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
newWork.add(childWork);
copyWorkGraph(currentWork, newWork, childWork);
// remove them from current spark work
for (BaseWork baseWork : newWork.getAllWorkUnsorted()) {
currentWork.remove(baseWork);
currentWork.getCloneToWork().remove(baseWork);
}
// create TS to read intermediate data
Context baseCtx = parseContext.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
Operator<? extends OperatorDesc> rsParent = reduceSink.getParentOperators().get(0);
TableDesc tableDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rsParent.getSchema(), "temporarycol"));
// this will insert FS and TS between the RS and its parent
TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(rsParent, reduceSink, taskTmpDir, tableDesc, parseContext);
// create new MapWork
MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
newWork.add(mapWork);
newWork.connect(mapWork, childWork, originEdge);
// setup the new map work
String streamDesc = taskTmpDir.toUri().toString();
if (GenMapRedUtils.needsTagging((ReduceWork) childWork)) {
Operator<? extends OperatorDesc> childReducer = ((ReduceWork) childWork).getReducer();
String id = null;
if (childReducer instanceof JoinOperator) {
if (parseContext.getJoinOps().contains(childReducer)) {
id = ((JoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof MapJoinOperator) {
if (parseContext.getMapJoinOps().contains(childReducer)) {
id = ((MapJoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof SMBMapJoinOperator) {
if (parseContext.getSmbMapJoinOps().contains(childReducer)) {
id = ((SMBMapJoinOperator) childReducer).getConf().getId();
}
}
if (id != null) {
streamDesc = id + ":$INTNAME";
} else {
streamDesc = "$INTNAME";
}
String origStreamDesc = streamDesc;
int pos = 0;
while (mapWork.getAliasToWork().get(streamDesc) != null) {
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
}
GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc, tableScanOp, mapWork, false, tableDesc);
// insert the new task between current task and its child
@SuppressWarnings("unchecked") Task<? extends Serializable> newTask = TaskFactory.get(newWork, parseContext.getConf());
List<Task<? extends Serializable>> childTasks = currentTask.getChildTasks();
// must have at most one child
if (childTasks != null && childTasks.size() > 0) {
Task<? extends Serializable> childTask = childTasks.get(0);
currentTask.removeDependentTask(childTask);
newTask.addDependentTask(childTask);
}
currentTask.addDependentTask(newTask);
newTask.setFetchSource(currentTask.isFetchSource());
}
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class SparkMapJoinOptimizer method process.
@Override
public /**
* We should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx;
HiveConf conf = context.getConf();
JoinOperator joinOp = (JoinOperator) nd;
if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
return null;
}
LOG.info("Check if it can be converted to map join");
long[] mapJoinInfo = getMapJoinConversionInfo(joinOp, context);
int mapJoinConversionPos = (int) mapJoinInfo[0];
if (mapJoinConversionPos < 0) {
return null;
}
int numBuckets = -1;
List<List<String>> bucketColNames = null;
LOG.info("Convert to non-bucketed map join");
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
// but changing SerDe won't hurt correctness
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED) && conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
mapJoinOp.getConf().getKeyTblDesc().getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName());
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) {
LOG.info("Check if it can be converted to bucketed map join");
numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, context, mapJoinConversionPos);
if (numBuckets > 1) {
LOG.info("Converted to map join with " + numBuckets + " buckets");
bucketColNames = joinOp.getOpTraits().getBucketColNames();
mapJoinInfo[2] /= numBuckets;
} else {
LOG.info("Can not convert to bucketed map join");
}
}
// we can set the traits for this join operator
OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
setNumberOfBucketsOnChildren(mapJoinOp);
context.getMjOpSizes().put(mapJoinOp, mapJoinInfo[1] + mapJoinInfo[2]);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.
the class ConvertJoinMapJoin method convertJoinMapJoin.
/*
* Once we have decided on the map join, the tree would transform from
*
* | |
* Join MapJoin
* / \ / \
* RS RS ---> RS TS (big table)
* / \ /
* TS TS TS (small table)
*
* for tez.
*/
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException {
// of the constituent reduce sinks.
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
if (parentOp instanceof MuxOperator) {
return null;
}
}
// can safely convert the join to a map join.
MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
// In case of cross join, we disable hybrid grace hash join
mapJoinOp.getConf().setHybridHashJoin(false);
}
Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
if (parentBigTableOp instanceof ReduceSinkOperator) {
Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
if (removeReduceSink) {
for (Operator<?> p : parentBigTableOp.getParentOperators()) {
// we might have generated a dynamic partition operator chain. Since
// we're removing the reduce sink we need do remove that too.
Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
for (Operator<?> c : p.getChildOperators()) {
AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
if (event != null) {
dynamicPartitionOperators.add(c);
opEventPairs.put(c, event);
}
}
for (Operator<?> c : dynamicPartitionOperators) {
if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
p.removeChild(c);
// at this point we've found the fork in the op pipeline that has the pruning as a child plan.
LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
}
}
for (Operator<?> op : dynamicPartitionOperators) {
context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
}
}
mapJoinOp.getParentOperators().remove(bigTablePosition);
if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
}
parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
}
for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
if (!(op.getChildOperators().contains(mapJoinOp))) {
op.getChildOperators().add(mapJoinOp);
}
op.getChildOperators().remove(joinOp);
}
// join which takes place in a separate task.
if (context.parseContext.getRsOpToTsOpMap().size() > 0 && removeReduceSink) {
removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
}
}
return mapJoinOp;
}
Aggregations