use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class ReduceSinkMapJoinProc method processReduceSinkToHashJoin.
public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, MapJoinOperator mapJoinOp, GenTezProcContext context) throws SemanticException {
// remove the tag for in-memory side of mapjoin
parentRS.getConf().setSkipTag(true);
parentRS.setSkipTag(true);
// Mark this small table as being processed
if (mapJoinOp.getConf().isDynamicPartitionHashJoin()) {
context.mapJoinToUnprocessedSmallTableReduceSinks.get(mapJoinOp).remove(parentRS);
}
List<BaseWork> mapJoinWork = null;
/*
* if there was a pre-existing work generated for the big-table mapjoin side,
* we need to hook the work generated for the RS (associated with the RS-MJ pattern)
* with the pre-existing work.
*
* Otherwise, we need to associate that the mapjoin op
* to be linked to the RS work (associated with the RS-MJ pattern).
*
*/
mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
BaseWork parentWork = getMapJoinParentWork(context, parentRS);
// set the link between mapjoin and parent vertex
int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
if (pos == -1) {
throw new SemanticException("Cannot find position of parent in mapjoin");
}
MapJoinDesc joinConf = mapJoinOp.getConf();
long keyCount = Long.MAX_VALUE, rowCount = Long.MAX_VALUE, bucketCount = 1;
long tableSize = Long.MAX_VALUE;
Statistics stats = parentRS.getStatistics();
if (stats != null) {
keyCount = rowCount = stats.getNumRows();
if (keyCount <= 0) {
keyCount = rowCount = Long.MAX_VALUE;
}
tableSize = stats.getDataSize();
ArrayList<String> keyCols = parentRS.getConf().getOutputKeyColumnNames();
if (keyCols != null && !keyCols.isEmpty()) {
// See if we can arrive at a smaller number using distinct stats from key columns.
long maxKeyCount = 1;
String prefix = Utilities.ReduceField.KEY.toString();
for (String keyCol : keyCols) {
ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
ColStatistics cs = StatsUtils.getColStatisticsFromExpression(context.conf, stats, realCol);
if (cs == null || cs.getCountDistint() <= 0) {
maxKeyCount = Long.MAX_VALUE;
break;
}
maxKeyCount *= cs.getCountDistint();
if (maxKeyCount >= keyCount) {
break;
}
}
keyCount = Math.min(maxKeyCount, keyCount);
}
if (joinConf.isBucketMapJoin()) {
OpTraits opTraits = mapJoinOp.getOpTraits();
bucketCount = (opTraits == null) ? -1 : opTraits.getNumBuckets();
if (bucketCount > 0) {
// We cannot obtain a better estimate without CustomPartitionVertex providing it
// to us somehow; in which case using statistics would be completely unnecessary.
keyCount /= bucketCount;
tableSize /= bucketCount;
}
} else if (joinConf.isDynamicPartitionHashJoin()) {
// For dynamic partitioned hash join, assuming table is split evenly among the reduce tasks.
bucketCount = parentRS.getConf().getNumReducers();
keyCount /= bucketCount;
tableSize /= bucketCount;
}
}
if (keyCount == 0) {
keyCount = 1;
}
if (tableSize == 0) {
tableSize = 1;
}
LOG.info("Mapjoin " + mapJoinOp + "(bucket map join = " + joinConf.isBucketMapJoin() + "), pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
joinConf.getParentToInput().put(pos, parentWork.getName());
if (keyCount != Long.MAX_VALUE) {
joinConf.getParentKeyCounts().put(pos, keyCount);
}
joinConf.getParentDataSizes().put(pos, tableSize);
int numBuckets = -1;
EdgeType edgeType = EdgeType.BROADCAST_EDGE;
if (joinConf.isBucketMapJoin()) {
numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
/*
* Here, we can be in one of 4 states.
*
* 1. If map join work is null implies that we have not yet traversed the big table side. We
* just need to see if we can find a reduce sink operator in the big table side. This would
* imply a reduce side operation.
*
* 2. If we don't find a reducesink in 1 it has to be the case that it is a map side operation.
*
* 3. If we have already created a work item for the big table side, we need to see if we can
* find a table scan operator in the big table side. This would imply a map side operation.
*
* 4. If we don't find a table scan operator, it has to be a reduce side operation.
*/
if (mapJoinWork == null) {
Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()), ReduceSinkOperator.class);
if (rootOp == null) {
// likely we found a table scan operator
edgeType = EdgeType.CUSTOM_EDGE;
} else {
// we have found a reduce sink
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
}
} else {
Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()), TableScanOperator.class);
if (rootOp != null) {
// likely we found a table scan operator
edgeType = EdgeType.CUSTOM_EDGE;
} else {
// we have found a reduce sink
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
}
}
} else if (mapJoinOp.getConf().isDynamicPartitionHashJoin()) {
if (parentRS.getConf().isForwarding()) {
edgeType = EdgeType.ONE_TO_ONE_EDGE;
} else {
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
}
}
if (edgeType == EdgeType.CUSTOM_EDGE) {
// disable auto parallelism for bucket map joins
parentRS.getConf().setReducerTraits(EnumSet.of(FIXED));
}
TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);
if (mapJoinWork != null) {
for (BaseWork myWork : mapJoinWork) {
// link the work with the work associated with the reduce sink that triggered this rule
TezWork tezWork = context.currentTask.getWork();
LOG.debug("connecting " + parentWork.getName() + " with " + myWork.getName());
tezWork.connect(parentWork, myWork, edgeProp);
if (edgeType == EdgeType.CUSTOM_EDGE) {
tezWork.setVertexType(myWork, VertexType.INITIALIZED_EDGES);
}
ReduceSinkOperator r = null;
if (context.connectedReduceSinks.contains(parentRS)) {
LOG.debug("Cloning reduce sink " + parentRS + " for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(parentRS.getCompilationOpContext(), (ReduceSinkDesc) parentRS.getConf().clone(), new RowSchema(parentRS.getSchema()), parentRS.getParentOperators());
context.clonedReduceSinks.add(r);
} else {
r = parentRS;
}
// remember the output name of the reduce sink
r.getConf().setOutputName(myWork.getName());
context.connectedReduceSinks.add(r);
}
}
// remember in case we need to connect additional work later
Map<BaseWork, TezEdgeProperty> linkWorkMap = null;
if (context.linkOpWithWorkMap.containsKey(mapJoinOp)) {
linkWorkMap = context.linkOpWithWorkMap.get(mapJoinOp);
} else {
linkWorkMap = new HashMap<BaseWork, TezEdgeProperty>();
}
linkWorkMap.put(parentWork, edgeProp);
context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);
List<ReduceSinkOperator> reduceSinks = context.linkWorkWithReduceSinkMap.get(parentWork);
if (reduceSinks == null) {
reduceSinks = new ArrayList<ReduceSinkOperator>();
}
reduceSinks.add(parentRS);
context.linkWorkWithReduceSinkMap.put(parentWork, reduceSinks);
// create the dummy operators
List<Operator<?>> dummyOperators = new ArrayList<Operator<?>>();
// create an new operator: HashTableDummyOperator, which share the table desc
HashTableDummyDesc desc = new HashTableDummyDesc();
@SuppressWarnings("unchecked") HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(parentRS.getCompilationOpContext(), desc);
TableDesc tbl;
// need to create the correct table descriptor for key/value
RowSchema rowSchema = parentRS.getParentOperators().get(0).getSchema();
tbl = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rowSchema, ""));
dummyOp.getConf().setTbl(tbl);
Map<Byte, List<ExprNodeDesc>> keyExprMap = mapJoinOp.getConf().getKeys();
List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
StringBuilder keyOrder = new StringBuilder();
StringBuilder keyNullOrder = new StringBuilder();
for (ExprNodeDesc k : keyCols) {
keyOrder.append("+");
keyNullOrder.append("a");
}
TableDesc keyTableDesc = PlanUtils.getReduceKeyTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString(), keyNullOrder.toString());
mapJoinOp.getConf().setKeyTableDesc(keyTableDesc);
// let the dummy op be the parent of mapjoin op
mapJoinOp.replaceParent(parentRS, dummyOp);
List<Operator<? extends OperatorDesc>> dummyChildren = new ArrayList<Operator<? extends OperatorDesc>>();
dummyChildren.add(mapJoinOp);
dummyOp.setChildOperators(dummyChildren);
dummyOperators.add(dummyOp);
// cut the operator tree so as to not retain connections from the parent RS downstream
List<Operator<? extends OperatorDesc>> childOperators = parentRS.getChildOperators();
int childIndex = childOperators.indexOf(mapJoinOp);
childOperators.remove(childIndex);
// at task startup
if (mapJoinWork != null) {
for (BaseWork myWork : mapJoinWork) {
LOG.debug("adding dummy op to work " + myWork.getName() + " from MJ work: " + dummyOp);
myWork.addDummyOp(dummyOp);
}
}
if (context.linkChildOpWithDummyOp.containsKey(mapJoinOp)) {
for (Operator<?> op : context.linkChildOpWithDummyOp.get(mapJoinOp)) {
dummyOperators.add(op);
}
}
context.linkChildOpWithDummyOp.put(mapJoinOp, dummyOperators);
return true;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkSMBJoinHintOptimizer method removeSmallTableReduceSink.
/**
* In bucket mapjoin, there are ReduceSinks that mark a small table parent (Reduce Sink are removed from big-table).
* In SMB join these are not expected for any parents, either from small or big tables.
* @param mapJoinOp
*/
@SuppressWarnings("unchecked")
private void removeSmallTableReduceSink(MapJoinOperator mapJoinOp) {
SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
for (int i = 0; i < parentOperators.size(); i++) {
Operator<? extends OperatorDesc> par = parentOperators.get(i);
if (i != smbJoinDesc.getPosBigTable()) {
if (par instanceof ReduceSinkOperator) {
List<Operator<? extends OperatorDesc>> grandParents = par.getParentOperators();
Preconditions.checkArgument(grandParents.size() == 1, "AssertionError: expect # of parents to be 1, but was " + grandParents.size());
Operator<? extends OperatorDesc> grandParent = grandParents.get(0);
grandParent.removeChild(par);
grandParent.setChildOperators(Utilities.makeList(mapJoinOp));
mapJoinOp.getParentOperators().set(i, grandParent);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class GenSparkSkewJoinProcessor method processSkewJoin.
@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<? extends Serializable> currTask, ReduceWork reduceWork, ParseContext parseCtx) throws SemanticException {
SparkWork currentWork = ((SparkTask) currTask).getWork();
if (currentWork.getChildren(reduceWork).size() > 0) {
LOG.warn("Skip runtime skew join as the ReduceWork has child work and hasn't been split.");
return;
}
List<Task<? extends Serializable>> children = currTask.getChildTasks();
Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
JoinDesc joinDescriptor = joinOp.getConf();
Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
int numAliases = joinValues.size();
Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
Byte[] tags = joinDescriptor.getTagOrder();
// for each joining table, set dir for big key and small keys properly
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
bigKeysDirMap.put(alias, GenMRSkewJoinProcessor.getBigKeysDir(baseTmpDir, alias));
Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
smallKeysDirMap.put(alias, smallKeysMap);
for (Byte src2 : tags) {
if (!src2.equals(alias)) {
smallKeysMap.put(src2, GenMRSkewJoinProcessor.getSmallKeysDir(baseTmpDir, alias, src2));
}
}
skewJoinJobResultsDir.put(alias, GenMRSkewJoinProcessor.getBigKeysSkewJoinResultDir(baseTmpDir, alias));
}
joinDescriptor.setHandleSkewJoin(true);
joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
// create proper table/column desc for spilled tables
TableDesc keyTblDesc = (TableDesc) reduceWork.getKeyDesc().clone();
List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
// used for create mapJoinDesc, should be in order
List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
for (int i = 0; i < tags.length; i++) {
newJoinValueTblDesc.add(null);
}
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
List<ExprNodeDesc> valueCols = joinValues.get(alias);
String colNames = "";
String colTypes = "";
int columnSize = valueCols.size();
List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
boolean first = true;
for (int k = 0; k < columnSize; k++) {
TypeInfo type = valueCols.get(k).getTypeInfo();
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
columnInfos.add(columnInfo);
newValueExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + newColName;
colTypes = colTypes + valueCols.get(k).getTypeString();
}
// we are putting join keys at last part of the spilled table
for (int k = 0; k < joinKeys.size(); k++) {
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + joinKeys.get(k);
colTypes = colTypes + joinKeyTypes.get(k);
ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
columnInfos.add(columnInfo);
newKeyExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
}
newJoinValues.put(alias, newValueExpr);
newJoinKeys.put(alias, newKeyExpr);
tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
rowSchemaList.put(alias, new RowSchema(columnInfos));
// construct value table Desc
String valueColNames = "";
String valueColTypes = "";
first = true;
for (int k = 0; k < columnSize; k++) {
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
if (!first) {
valueColNames = valueColNames + ",";
valueColTypes = valueColTypes + ",";
}
valueColNames = valueColNames + newColName;
valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
first = false;
}
newJoinValueTblDesc.set((byte) i, Utilities.getTableDesc(valueColNames, valueColTypes));
}
joinDescriptor.setSkewKeysValuesTables(tableDescList);
joinDescriptor.setKeyTableDesc(keyTblDesc);
// create N-1 map join tasks
HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap = new HashMap<Path, Task<? extends Serializable>>();
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
for (int i = 0; i < numAliases - 1; i++) {
Byte src = tags[i];
HiveConf hiveConf = new HiveConf(parseCtx.getConf(), GenSparkSkewJoinProcessor.class);
SparkWork sparkWork = new SparkWork(parseCtx.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(sparkWork);
skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
// create N TableScans
Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
for (int k = 0; k < tags.length; k++) {
Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k));
parentOps[k] = ts;
}
// create the MapJoinOperator
String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize());
mapJoinDescriptor.setTagOrder(tags);
mapJoinDescriptor.setHandleSkewJoin(false);
mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
// temporarily, mark it as child of all the TS
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, null, parentOps);
// clone the original join operator, and replace it with the MJ
// this makes sure MJ has the same downstream operator plan as the original join
List<Operator<?>> reducerList = new ArrayList<Operator<?>>();
reducerList.add(reduceWork.getReducer());
Operator<? extends OperatorDesc> reducer = SerializationUtilities.cloneOperatorTree(reducerList).get(0);
Preconditions.checkArgument(reducer instanceof JoinOperator, "Reducer should be join operator, but actually is " + reducer.getName());
JoinOperator cloneJoinOp = (JoinOperator) reducer;
List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(cloneJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
// set memory usage for the MJ operator
setMemUsage(mapJoinOp, skewJoinMapJoinTask, parseCtx);
// create N MapWorks and add them to the SparkWork
MapWork bigMapWork = null;
Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
for (int j = 0; j < tags.length; j++) {
MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
sparkWork.add(mapWork);
// This code has been only added for testing
boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
mapWork.setMapperCannotSpanPartns(mapperCannotSpanPartns);
Operator<? extends OperatorDesc> tableScan = parentOps[j];
String alias = tags[j].toString();
ArrayList<String> aliases = new ArrayList<String>();
aliases.add(alias);
Path path;
if (j == i) {
path = bigKeysDirMap.get(tags[j]);
bigKeysDirToTaskMap.put(path, skewJoinMapJoinTask);
bigMapWork = mapWork;
} else {
path = smallTblDirs.get(tags[j]);
}
mapWork.addPathToAlias(path, aliases);
mapWork.getAliasToWork().put(alias, tableScan);
PartitionDesc partitionDesc = new PartitionDesc(tableDescList.get(tags[j]), null);
mapWork.addPathToPartitionInfo(path, partitionDesc);
mapWork.getAliasToPartnInfo().put(alias, partitionDesc);
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
}
// connect all small dir map work to the big dir map work
Preconditions.checkArgument(bigMapWork != null, "Haven't identified big dir MapWork");
// these 2 flags are intended only for the big-key map work
bigMapWork.setNumMapTasks(HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
bigMapWork.setMinSplitSize(HiveConf.getLongVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
// use HiveInputFormat so that we can control the number of map tasks
bigMapWork.setInputformat(HiveInputFormat.class.getName());
for (BaseWork work : sparkWork.getRoots()) {
Preconditions.checkArgument(work instanceof MapWork, "All root work should be MapWork, but got " + work.getClass().getSimpleName());
if (work != bigMapWork) {
sparkWork.connect(work, bigMapWork, new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE));
}
}
// insert SparkHashTableSink and Dummy operators
for (int j = 0; j < tags.length; j++) {
if (j != i) {
insertSHTS(tags[j], (TableScanOperator) parentOps[j], bigMapWork);
}
}
listWorks.add(skewJoinMapJoinTask.getWork());
listTasks.add(skewJoinMapJoinTask);
}
if (children != null) {
for (Task<? extends Serializable> tsk : listTasks) {
for (Task<? extends Serializable> oldChild : children) {
tsk.addDependentTask(oldChild);
}
}
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
for (Task<? extends Serializable> oldChild : children) {
oldChild.getParentTasks().remove(currTask);
}
listTasks.addAll(children);
for (Task<? extends Serializable> oldChild : children) {
listWorks.add(oldChild.getWork());
}
}
ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
cndTsk.setResolver(new ConditionalResolverSkewJoin());
cndTsk.setResolverCtx(context);
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
currTask.addDependentTask(cndTsk);
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class GenSparkSkewJoinProcessor method insertSHTS.
/**
* Insert SparkHashTableSink and HashTableDummy between small dir TS and MJ.
*/
@SuppressWarnings("unchecked")
private static void insertSHTS(byte tag, TableScanOperator tableScan, MapWork bigMapWork) {
Preconditions.checkArgument(tableScan.getChildOperators().size() == 1 && tableScan.getChildOperators().get(0) instanceof MapJoinOperator);
HashTableDummyDesc desc = new HashTableDummyDesc();
HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(tableScan.getCompilationOpContext(), desc);
dummyOp.getConf().setTbl(tableScan.getTableDescSkewJoin());
MapJoinOperator mapJoinOp = (MapJoinOperator) tableScan.getChildOperators().get(0);
mapJoinOp.replaceParent(tableScan, dummyOp);
List<Operator<? extends OperatorDesc>> mapJoinChildren = new ArrayList<Operator<? extends OperatorDesc>>();
mapJoinChildren.add(mapJoinOp);
dummyOp.setChildOperators(mapJoinChildren);
bigMapWork.addDummyOp(dummyOp);
MapJoinDesc mjDesc = mapJoinOp.getConf();
// mapjoin should not be affected by join reordering
mjDesc.resetOrder();
SparkHashTableSinkDesc hashTableSinkDesc = new SparkHashTableSinkDesc(mjDesc);
SparkHashTableSinkOperator hashTableSinkOp = (SparkHashTableSinkOperator) OperatorFactory.get(tableScan.getCompilationOpContext(), hashTableSinkDesc);
int[] valueIndex = mjDesc.getValueIndex(tag);
if (valueIndex != null) {
List<ExprNodeDesc> newValues = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> values = hashTableSinkDesc.getExprs().get(tag);
for (int index = 0; index < values.size(); index++) {
if (valueIndex[index] < 0) {
newValues.add(values.get(index));
}
}
hashTableSinkDesc.getExprs().put(tag, newValues);
}
tableScan.replaceChild(mapJoinOp, hashTableSinkOp);
List<Operator<? extends OperatorDesc>> tableScanParents = new ArrayList<Operator<? extends OperatorDesc>>();
tableScanParents.add(tableScan);
hashTableSinkOp.setParentOperators(tableScanParents);
hashTableSinkOp.getConf().setTag(tag);
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class GenSparkWork method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procContext;
Preconditions.checkArgument(context != null, "AssertionError: expected context to be not null");
Preconditions.checkArgument(context.currentTask != null, "AssertionError: expected context.currentTask to be not null");
Preconditions.checkArgument(context.currentRootOperator != null, "AssertionError: expected context.currentRootOperator to be not null");
// Operator is a file sink or reduce sink. Something that forces a new vertex.
@SuppressWarnings("unchecked") Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
Operator<?> root = context.currentRootOperator;
LOG.debug("Root operator: " + root);
LOG.debug("Leaf operator: " + operator);
SparkWork sparkWork = context.currentTask.getWork();
SMBMapJoinOperator smbOp = GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class);
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
// vertex. If there is a preceding work we will generate
// a reduce vertex
BaseWork work;
if (context.rootToWorkMap.containsKey(root)) {
// having seen the root operator before means there was a branch in the
// operator graph. There's typically two reasons for that: a) mux/demux
// b) multi insert. Mux/Demux will hit the same leaf again, multi insert
// will result into a vertex with multiple FS or RS operators.
// At this point we don't have to do anything special in this case. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
} else {
// create a new vertex
if (context.preceedingWork == null) {
if (smbOp == null) {
work = utils.createMapWork(context, root, sparkWork, null);
} else {
// save work to be initialized later with SMB information.
work = utils.createMapWork(context, root, sparkWork, null, true);
context.smbMapJoinCtxMap.get(smbOp).mapWork = (MapWork) work;
}
} else {
work = utils.createReduceWork(context, root, sparkWork);
}
context.rootToWorkMap.put(root, work);
}
if (!context.childToWorkMap.containsKey(operator)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.childToWorkMap.put(operator, workItems);
} else {
context.childToWorkMap.get(operator).add(work);
}
// remember which mapjoin operator links with which work
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj : context.currentMapJoinOperators) {
LOG.debug("Processing map join: " + mj);
// remember the mapping in case we scan another branch of the mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.mapJoinWorkMap.put(mj, workItems);
} else {
context.mapJoinWorkMap.get(mj).add(work);
}
/*
* this happens in case of map join operations.
* The tree looks like this:
*
* RS <--- we are here perhaps
* |
* MapJoin
* / \
* RS TS
* /
* TS
*
* If we are at the RS pointed above, and we may have already visited the
* RS following the TS, we have already generated work for the TS-RS.
* We need to hook the current work to this generated work.
*/
if (context.linkOpWithWorkMap.containsKey(mj)) {
Map<BaseWork, SparkEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
if (linkWorkMap != null) {
if (context.linkChildOpWithDummyOp.containsKey(mj)) {
for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
work.addDummyOp((HashTableDummyOperator) dummy);
}
}
for (Entry<BaseWork, SparkEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
SparkEdgeProperty edgeProp = parentWorkMap.getValue();
sparkWork.connect(parentWork, work, edgeProp);
// of the downstream work
for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), r.getParentOperators());
}
r.getConf().setOutputName(work.getName());
}
}
}
}
}
// clear out the set. we don't need it anymore.
context.currentMapJoinOperators.clear();
}
// with this root operator.
if (root.getNumParent() > 0) {
Preconditions.checkArgument(work instanceof ReduceWork, "AssertionError: expected work to be a ReduceWork, but was " + work.getClass().getName());
ReduceWork reduceWork = (ReduceWork) work;
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
Preconditions.checkArgument(parent instanceof ReduceSinkOperator, "AssertionError: expected operator to be a ReduceSinkOperator, but was " + parent.getClass().getName());
ReduceSinkOperator rsOp = (ReduceSinkOperator) parent;
SparkEdgeProperty edgeProp = GenSparkUtils.getEdgeProperty(context.conf, rsOp, reduceWork);
rsOp.getConf().setOutputName(reduceWork.getName());
GenMapRedUtils.setKeyAndValueDesc(reduceWork, rsOp);
context.leafOpToFollowingWorkInfo.put(rsOp, ObjectPair.create(edgeProp, reduceWork));
LOG.debug("Removing " + parent + " as parent from " + root);
root.removeParent(parent);
}
}
// the union operators from the operator tree later.
if (!context.currentUnionOperators.isEmpty()) {
context.currentUnionOperators.clear();
context.workWithUnionOperators.add(work);
}
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOpToFollowingWorkInfo.containsKey(operator)) {
ObjectPair<SparkEdgeProperty, ReduceWork> childWorkInfo = context.leafOpToFollowingWorkInfo.get(operator);
SparkEdgeProperty edgeProp = childWorkInfo.getFirst();
ReduceWork childWork = childWorkInfo.getSecond();
LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + childWork);
// we don't want to connect them with the work associated with TS more than once.
if (sparkWork.getEdgeProperty(work, childWork) == null) {
sparkWork.connect(work, childWork, edgeProp);
} else {
LOG.debug("work " + work.getName() + " is already connected to " + childWork.getName() + " before");
}
} else {
LOG.debug("First pass. Leaf operator: " + operator);
}
// the next item will be a new root.
if (!operator.getChildOperators().isEmpty()) {
Preconditions.checkArgument(operator.getChildOperators().size() == 1, "AssertionError: expected operator.getChildOperators().size() to be 1, but was " + operator.getChildOperators().size());
context.parentOfRoot = operator;
context.currentRootOperator = operator.getChildOperators().get(0);
context.preceedingWork = work;
}
return null;
}
Aggregations