use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkMapJoinOptimizer method process.
@Override
public /**
* We should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx;
HiveConf conf = context.getConf();
JoinOperator joinOp = (JoinOperator) nd;
if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
return null;
}
LOG.info("Check if operator " + joinOp + " can be converted to map join");
long[] mapJoinInfo = getMapJoinConversionInfo(joinOp, context);
int mapJoinConversionPos = (int) mapJoinInfo[0];
if (mapJoinConversionPos < 0) {
return null;
}
int numBuckets = -1;
List<List<String>> bucketColNames = null;
LOG.info("Convert to non-bucketed map join");
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
// but changing SerDe won't hurt correctness
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED) && conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
mapJoinOp.getConf().getKeyTblDesc().getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName());
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) {
LOG.info("Check if it can be converted to bucketed map join");
numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, context, mapJoinConversionPos);
if (numBuckets > 1) {
LOG.info("Converted to map join with " + numBuckets + " buckets");
bucketColNames = joinOp.getOpTraits().getBucketColNames();
mapJoinInfo[2] /= numBuckets;
} else {
LOG.info("Can not convert to bucketed map join");
}
}
// we can set the traits for this join operator
OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
setNumberOfBucketsOnChildren(mapJoinOp);
context.getMjOpSizes().put(mapJoinOp, mapJoinInfo[1] + mapJoinInfo[2]);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkReduceSinkMapJoinProc method process.
/* (non-Javadoc)
* This processor addresses the RS-MJ case that occurs in spark on the small/hash
* table side of things. The work that RS will be a part of must be connected
* to the MJ work via be a broadcast edge.
* We should not walk down the tree when we encounter this pattern because:
* the type of work (map work or reduce work) needs to be determined
* on the basis of the big table side because it may be a mapwork (no need for shuffle)
* or reduce work.
*/
@SuppressWarnings("unchecked")
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procContext;
if (!nd.getClass().equals(MapJoinOperator.class)) {
return null;
}
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator)) {
context.currentMapJoinOperators.add(mapJoinOp);
return null;
}
context.preceedingWork = null;
context.currentRootOperator = null;
ReduceSinkOperator parentRS = (ReduceSinkOperator) stack.get(stack.size() - 2);
// remove the tag for in-memory side of mapjoin
parentRS.getConf().setSkipTag(true);
parentRS.setSkipTag(true);
// remember the original parent list before we start modifying it.
if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
context.mapJoinParentMap.put(mapJoinOp, parents);
}
List<BaseWork> mapJoinWork;
/*
* If there was a pre-existing work generated for the big-table mapjoin side,
* we need to hook the work generated for the RS (associated with the RS-MJ pattern)
* with the pre-existing work.
*
* Otherwise, we need to associate that the mapjoin op
* to be linked to the RS work (associated with the RS-MJ pattern).
*
*/
mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
int workMapSize = context.childToWorkMap.get(parentRS).size();
Preconditions.checkArgument(workMapSize == 1, "AssertionError: expected context.childToWorkMap.get(parentRS).size() to be 1, but was " + workMapSize);
BaseWork parentWork = context.childToWorkMap.get(parentRS).get(0);
// set the link between mapjoin and parent vertex
int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
if (pos == -1) {
throw new SemanticException("Cannot find position of parent in mapjoin");
}
LOG.debug("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName());
mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());
SparkEdgeProperty edgeProp = new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE);
if (mapJoinWork != null) {
for (BaseWork myWork : mapJoinWork) {
// link the work with the work associated with the reduce sink that triggered this rule
SparkWork sparkWork = context.currentTask.getWork();
LOG.debug("connecting " + parentWork.getName() + " with " + myWork.getName());
sparkWork.connect(parentWork, myWork, edgeProp);
}
}
// remember in case we need to connect additional work later
Map<BaseWork, SparkEdgeProperty> linkWorkMap = null;
if (context.linkOpWithWorkMap.containsKey(mapJoinOp)) {
linkWorkMap = context.linkOpWithWorkMap.get(mapJoinOp);
} else {
linkWorkMap = new HashMap<BaseWork, SparkEdgeProperty>();
}
linkWorkMap.put(parentWork, edgeProp);
context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);
List<ReduceSinkOperator> reduceSinks = context.linkWorkWithReduceSinkMap.get(parentWork);
if (reduceSinks == null) {
reduceSinks = new ArrayList<ReduceSinkOperator>();
}
reduceSinks.add(parentRS);
context.linkWorkWithReduceSinkMap.put(parentWork, reduceSinks);
// create the dummy operators
List<Operator<?>> dummyOperators = new ArrayList<Operator<?>>();
// create an new operator: HashTableDummyOperator, which share the table desc
HashTableDummyDesc desc = new HashTableDummyDesc();
HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(mapJoinOp.getCompilationOpContext(), desc);
TableDesc tbl;
// need to create the correct table descriptor for key/value
RowSchema rowSchema = parentRS.getParentOperators().get(0).getSchema();
tbl = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rowSchema, ""));
dummyOp.getConf().setTbl(tbl);
Map<Byte, List<ExprNodeDesc>> keyExprMap = mapJoinOp.getConf().getKeys();
List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
StringBuilder keyOrder = new StringBuilder();
StringBuilder keyNullOrder = new StringBuilder();
for (int i = 0; i < keyCols.size(); i++) {
keyOrder.append("+");
keyNullOrder.append("a");
}
TableDesc keyTableDesc = PlanUtils.getReduceKeyTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString(), keyNullOrder.toString());
mapJoinOp.getConf().setKeyTableDesc(keyTableDesc);
// let the dummy op be the parent of mapjoin op
mapJoinOp.replaceParent(parentRS, dummyOp);
List<Operator<? extends OperatorDesc>> dummyChildren = new ArrayList<Operator<? extends OperatorDesc>>();
dummyChildren.add(mapJoinOp);
dummyOp.setChildOperators(dummyChildren);
dummyOperators.add(dummyOp);
// cut the operator tree so as to not retain connections from the parent RS downstream
List<Operator<? extends OperatorDesc>> childOperators = parentRS.getChildOperators();
int childIndex = childOperators.indexOf(mapJoinOp);
childOperators.remove(childIndex);
// at task startup
if (mapJoinWork != null) {
for (BaseWork myWork : mapJoinWork) {
myWork.addDummyOp(dummyOp);
}
}
if (context.linkChildOpWithDummyOp.containsKey(mapJoinOp)) {
for (Operator<?> op : context.linkChildOpWithDummyOp.get(mapJoinOp)) {
dummyOperators.add(op);
}
}
context.linkChildOpWithDummyOp.put(mapJoinOp, dummyOperators);
// replace ReduceSinkOp with HashTableSinkOp for the RSops which are parents of MJop
MapJoinDesc mjDesc = mapJoinOp.getConf();
HiveConf conf = context.conf;
// Unlike in MR, we may call this method multiple times, for each
// small table HTS. But, since it's idempotent, it should be OK.
mjDesc.resetOrder();
float hashtableMemoryUsage;
if (hasGroupBy(mapJoinOp, context)) {
hashtableMemoryUsage = conf.getFloatVar(HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
} else {
hashtableMemoryUsage = conf.getFloatVar(HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
}
mjDesc.setHashTableMemoryUsage(hashtableMemoryUsage);
SparkHashTableSinkDesc hashTableSinkDesc = new SparkHashTableSinkDesc(mjDesc);
SparkHashTableSinkOperator hashTableSinkOp = (SparkHashTableSinkOperator) OperatorFactory.get(mapJoinOp.getCompilationOpContext(), hashTableSinkDesc);
byte tag = (byte) pos;
int[] valueIndex = mjDesc.getValueIndex(tag);
if (valueIndex != null) {
List<ExprNodeDesc> newValues = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> values = hashTableSinkDesc.getExprs().get(tag);
for (int index = 0; index < values.size(); index++) {
if (valueIndex[index] < 0) {
newValues.add(values.get(index));
}
}
hashTableSinkDesc.getExprs().put(tag, newValues);
}
// get all parents of reduce sink
List<Operator<? extends OperatorDesc>> rsParentOps = parentRS.getParentOperators();
for (Operator<? extends OperatorDesc> parent : rsParentOps) {
parent.replaceChild(parentRS, hashTableSinkOp);
}
hashTableSinkOp.setParentOperators(rsParentOps);
hashTableSinkOp.getConf().setTag(tag);
return true;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class SparkSkewJoinProcFactory method splitTask.
/**
* If the join is not in a leaf ReduceWork, the spark task has to be split into 2 tasks.
*/
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException {
SparkWork currentWork = currentTask.getWork();
Set<Operator<?>> reduceSinkSet = OperatorUtils.getOp(reduceWork, ReduceSinkOperator.class);
if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) {
ReduceSinkOperator reduceSink = (ReduceSinkOperator) reduceSinkSet.iterator().next();
BaseWork childWork = currentWork.getChildren(reduceWork).get(0);
SparkEdgeProperty originEdge = currentWork.getEdgeProperty(reduceWork, childWork);
// disconnect the reduce work from its child. this should produce two isolated sub graphs
currentWork.disconnect(reduceWork, childWork);
// move works following the current reduce work into a new spark work
SparkWork newWork = new SparkWork(parseContext.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
newWork.add(childWork);
copyWorkGraph(currentWork, newWork, childWork);
// remove them from current spark work
for (BaseWork baseWork : newWork.getAllWorkUnsorted()) {
currentWork.remove(baseWork);
currentWork.getCloneToWork().remove(baseWork);
}
// create TS to read intermediate data
Context baseCtx = parseContext.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
Operator<? extends OperatorDesc> rsParent = reduceSink.getParentOperators().get(0);
TableDesc tableDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rsParent.getSchema(), "temporarycol"));
// this will insert FS and TS between the RS and its parent
TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(rsParent, reduceSink, taskTmpDir, tableDesc, parseContext);
// create new MapWork
MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
newWork.add(mapWork);
newWork.connect(mapWork, childWork, originEdge);
// setup the new map work
String streamDesc = taskTmpDir.toUri().toString();
if (GenMapRedUtils.needsTagging((ReduceWork) childWork)) {
Operator<? extends OperatorDesc> childReducer = ((ReduceWork) childWork).getReducer();
String id = null;
if (childReducer instanceof JoinOperator) {
if (parseContext.getJoinOps().contains(childReducer)) {
id = ((JoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof MapJoinOperator) {
if (parseContext.getMapJoinOps().contains(childReducer)) {
id = ((MapJoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof SMBMapJoinOperator) {
if (parseContext.getSmbMapJoinOps().contains(childReducer)) {
id = ((SMBMapJoinOperator) childReducer).getConf().getId();
}
}
if (id != null) {
streamDesc = id + ":$INTNAME";
} else {
streamDesc = "$INTNAME";
}
String origStreamDesc = streamDesc;
int pos = 0;
while (mapWork.getAliasToWork().get(streamDesc) != null) {
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
}
GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc, tableScanOp, mapWork, false, tableDesc);
// insert the new task between current task and its child
@SuppressWarnings("unchecked") Task<? extends Serializable> newTask = TaskFactory.get(newWork);
List<Task<? extends Serializable>> childTasks = currentTask.getChildTasks();
// must have at most one child
if (childTasks != null && childTasks.size() > 0) {
Task<? extends Serializable> childTask = childTasks.get(0);
currentTask.removeDependentTask(childTask);
newTask.addDependentTask(childTask);
}
currentTask.addDependentTask(newTask);
newTask.setFetchSource(currentTask.isFetchSource());
}
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class GenMRSkewJoinProcessor method processSkewJoin.
/**
* Create tasks for processing skew joins. The idea is (HIVE-964) to use
* separated jobs and map-joins to handle skew joins.
* <p>
* <ul>
* <li>
* Number of mr jobs to handle skew keys is the number of table minus 1 (we
* can stream the last table, so big keys in the last table will not be a
* problem).
* <li>
* At runtime in Join, we output big keys in one table into one corresponding
* directories, and all same keys in other tables into different dirs(one for
* each table). The directories will look like:
* <ul>
* <li>
* dir-T1-bigkeys(containing big keys in T1), dir-T2-keys(containing keys
* which is big in T1),dir-T3-keys(containing keys which is big in T1), ...
* <li>
* dir-T1-keys(containing keys which is big in T2), dir-T2-bigkeys(containing
* big keys in T2),dir-T3-keys(containing keys which is big in T2), ...
* <li>
* dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big
* keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... .....
* </ul>
* </ul>
* For each table, we launch one mapjoin job, taking the directory containing
* big keys in this table and corresponding dirs in other tables as input.
* (Actally one job for one row in the above.)
*
* <p>
* For more discussions, please check
* https://issues.apache.org/jira/browse/HIVE-964.
*/
@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<? extends Serializable> currTask, ParseContext parseCtx) throws SemanticException {
// now does not work with outer joins
if (!GenMRSkewJoinProcessor.skewJoinEnabled(parseCtx.getConf(), joinOp)) {
return;
}
List<Task<? extends Serializable>> children = currTask.getChildTasks();
Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
JoinDesc joinDescriptor = joinOp.getConf();
Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
int numAliases = joinValues.size();
Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
Byte[] tags = joinDescriptor.getTagOrder();
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
bigKeysDirMap.put(alias, getBigKeysDir(baseTmpDir, alias));
Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
smallKeysDirMap.put(alias, smallKeysMap);
for (Byte src2 : tags) {
if (!src2.equals(alias)) {
smallKeysMap.put(src2, getSmallKeysDir(baseTmpDir, alias, src2));
}
}
skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir, alias));
}
joinDescriptor.setHandleSkewJoin(true);
joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap = new HashMap<Path, Task<? extends Serializable>>();
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
MapredWork currPlan = (MapredWork) currTask.getWork();
TableDesc keyTblDesc = (TableDesc) currPlan.getReduceWork().getKeyDesc().clone();
List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
// used for create mapJoinDesc, should be in order
List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
for (Byte tag : tags) {
newJoinValueTblDesc.add(null);
}
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
List<ExprNodeDesc> valueCols = joinValues.get(alias);
String colNames = "";
String colTypes = "";
int columnSize = valueCols.size();
List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
boolean first = true;
for (int k = 0; k < columnSize; k++) {
TypeInfo type = valueCols.get(k).getTypeInfo();
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
columnInfos.add(columnInfo);
newValueExpr.add(new ExprNodeColumnDesc(columnInfo));
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + newColName;
colTypes = colTypes + valueCols.get(k).getTypeString();
}
// we are putting join keys at last part of the spilled table
for (int k = 0; k < joinKeys.size(); k++) {
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + joinKeys.get(k);
colTypes = colTypes + joinKeyTypes.get(k);
ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
columnInfos.add(columnInfo);
newKeyExpr.add(new ExprNodeColumnDesc(columnInfo));
}
newJoinValues.put(alias, newValueExpr);
newJoinKeys.put(alias, newKeyExpr);
tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
rowSchemaList.put(alias, new RowSchema(columnInfos));
// construct value table Desc
String valueColNames = "";
String valueColTypes = "";
first = true;
for (int k = 0; k < columnSize; k++) {
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
if (!first) {
valueColNames = valueColNames + ",";
valueColTypes = valueColTypes + ",";
}
valueColNames = valueColNames + newColName;
valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
first = false;
}
newJoinValueTblDesc.set(Byte.valueOf((byte) i), Utilities.getTableDesc(valueColNames, valueColTypes));
}
joinDescriptor.setSkewKeysValuesTables(tableDescList);
joinDescriptor.setKeyTableDesc(keyTblDesc);
for (int i = 0; i < numAliases - 1; i++) {
Byte src = tags[i];
MapWork newPlan = PlanUtils.getMapRedWork().getMapWork();
// This code has been only added for testing
boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);
MapredWork clonePlan = SerializationUtilities.clonePlan(currPlan);
Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
for (int k = 0; k < tags.length; k++) {
Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k));
parentOps[k] = ts;
}
Operator<? extends OperatorDesc> tblScan_op = parentOps[i];
ArrayList<String> aliases = new ArrayList<String>();
String alias = src.toString().intern();
aliases.add(alias);
Path bigKeyDirPath = bigKeysDirMap.get(src);
newPlan.addPathToAlias(bigKeyDirPath, aliases);
newPlan.getAliasToWork().put(alias, tblScan_op);
PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
newPlan.addPathToPartitionInfo(bigKeyDirPath, part);
newPlan.getAliasToPartnInfo().put(alias, part);
Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();
assert reducer instanceof JoinOperator;
JoinOperator cloneJoinOp = (JoinOperator) reducer;
String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize());
mapJoinDescriptor.setTagOrder(tags);
mapJoinDescriptor.setHandleSkewJoin(false);
mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
MapredLocalWork localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
for (int j = 0; j < numAliases; j++) {
if (j == i) {
continue;
}
Byte small_alias = tags[j];
Operator<? extends OperatorDesc> tblScan_op2 = parentOps[j];
localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
Path tblDir = smallTblDirs.get(small_alias);
localPlan.getAliasToFetchWork().put(small_alias.toString(), new FetchWork(tblDir, tableDescList.get(small_alias)));
}
newPlan.setMapRedLocalWork(localPlan);
// construct a map join and set it as the child operator of tblScan_op
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, (RowSchema) null, parentOps);
// change the children of the original join operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(cloneJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
HiveConf jc = new HiveConf(parseCtx.getConf(), GenMRSkewJoinProcessor.class);
newPlan.setNumMapTasks(HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
newPlan.setMinSplitSize(HiveConf.getLongVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
newPlan.setInputformat(HiveInputFormat.class.getName());
MapredWork w = new MapredWork();
w.setMapWork(newPlan);
Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(w);
skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
bigKeysDirToTaskMap.put(bigKeyDirPath, skewJoinMapJoinTask);
listWorks.add(skewJoinMapJoinTask.getWork());
listTasks.add(skewJoinMapJoinTask);
}
if (children != null) {
for (Task<? extends Serializable> tsk : listTasks) {
for (Task<? extends Serializable> oldChild : children) {
tsk.addDependentTask(oldChild);
}
}
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
for (Task<? extends Serializable> oldChild : children) {
oldChild.getParentTasks().remove(currTask);
}
listTasks.addAll(children);
}
ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
cndTsk.setResolver(new ConditionalResolverSkewJoin());
cndTsk.setResolverCtx(context);
currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
currTask.addDependentTask(cndTsk);
return;
}
use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.
the class GenTezWork method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
assert context != null && context.currentTask != null && context.currentRootOperator != null;
// Operator is a file sink or reduce sink. Something that forces
// a new vertex.
Operator<?> operator = (Operator<?>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
Operator<?> root = context.currentRootOperator;
LOG.debug("Root operator: " + root);
LOG.debug("Leaf operator: " + operator);
if (context.clonedReduceSinks.contains(operator)) {
// just skip and keep going
return null;
}
TezWork tezWork = context.currentTask.getWork();
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
// vertex. If there is a preceding work we will generate
// a reduce vertex
BaseWork work;
if (context.rootToWorkMap.containsKey(root)) {
// will result into a vertex with multiple FS or RS operators.
if (context.childToWorkMap.containsKey(operator)) {
// if we've seen both root and child, we can bail.
// clear out the mapjoin set. we don't need it anymore.
context.currentMapJoinOperators.clear();
// clear out the union set. we don't need it anymore.
context.currentUnionOperators.clear();
return null;
} else {
// At this point we don't have to do anything special. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
}
} else {
// create a new vertex
if (context.preceedingWork == null) {
work = utils.createMapWork(context, root, tezWork, null);
} else {
work = GenTezUtils.createReduceWork(context, root, tezWork);
}
context.rootToWorkMap.put(root, work);
}
// this is where we set the sort columns that we will be using for KeyValueInputMerge
if (operator instanceof DummyStoreOperator) {
work.addSortCols(root.getOpTraits().getSortCols().get(0));
}
if (!context.childToWorkMap.containsKey(operator)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.childToWorkMap.put(operator, workItems);
} else {
context.childToWorkMap.get(operator).add(work);
}
// which can affect the working of all downstream transformations.
if (context.currentMergeJoinOperator != null) {
// we are currently walking the big table side of the merge join. we need to create or hook up
// merge join work.
MergeJoinWork mergeJoinWork = null;
if (context.opMergeJoinWorkMap.containsKey(context.currentMergeJoinOperator)) {
// we have found a merge work corresponding to this closing operator. Hook up this work.
mergeJoinWork = context.opMergeJoinWorkMap.get(context.currentMergeJoinOperator);
} else {
// we need to create the merge join work
mergeJoinWork = new MergeJoinWork();
mergeJoinWork.setMergeJoinOperator(context.currentMergeJoinOperator);
tezWork.add(mergeJoinWork);
context.opMergeJoinWorkMap.put(context.currentMergeJoinOperator, mergeJoinWork);
}
// connect the work correctly.
work.addSortCols(root.getOpTraits().getSortCols().get(0));
mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork);
Operator<? extends OperatorDesc> parentOp = getParentFromStack(context.currentMergeJoinOperator, stack);
// Set the big table position. Both the reduce work and merge join operator
// should be set with the same value.
// int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
int pos = context.currentMergeJoinOperator.getConf().getBigTablePosition();
work.setTag(pos);
context.currentMergeJoinOperator.getConf().setBigTablePosition(pos);
tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
tezWork.disconnect(work, childWork);
tezWork.connect(mergeJoinWork, childWork, edgeProp);
}
tezWork.remove(work);
context.rootToWorkMap.put(root, mergeJoinWork);
context.childToWorkMap.get(operator).remove(work);
context.childToWorkMap.get(operator).add(mergeJoinWork);
work = mergeJoinWork;
context.currentMergeJoinOperator = null;
}
// remember which mapjoin operator links with which work
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj : context.currentMapJoinOperators) {
// so we can later run the same logic that is run in ReduceSinkMapJoinProc.
if (mj.getConf().isDynamicPartitionHashJoin()) {
// Since this is a dynamic partitioned hash join, the work for this join should be a ReduceWork
ReduceWork reduceWork = (ReduceWork) work;
int bigTablePosition = mj.getConf().getPosBigTable();
reduceWork.setTag(bigTablePosition);
// Use context.mapJoinParentMap to get the original RS parents, because
// the MapJoin's parents may have been replaced by dummy operator.
List<Operator<?>> mapJoinOriginalParents = context.mapJoinParentMap.get(mj);
if (mapJoinOriginalParents == null) {
throw new SemanticException("Unexpected error - context.mapJoinParentMap did not have an entry for " + mj);
}
for (int pos = 0; pos < mapJoinOriginalParents.size(); ++pos) {
// This processing only needs to happen for the small tables
if (pos == bigTablePosition) {
continue;
}
Operator<?> parentOp = mapJoinOriginalParents.get(pos);
context.smallTableParentToMapJoinMap.put(parentOp, mj);
ReduceSinkOperator parentRS = (ReduceSinkOperator) parentOp;
// TableDesc needed for dynamic partitioned hash join
GenMapRedUtils.setKeyAndValueDesc(reduceWork, parentRS);
// has its ReduceSink parent removed.
if (!context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(parentRS)) {
// This reduce sink has been processed already, so the work for the parentRS exists
BaseWork parentWork = ReduceSinkMapJoinProc.getMapJoinParentWork(context, parentRS);
int tag = parentRS.getConf().getTag();
tag = (tag == -1 ? 0 : tag);
reduceWork.getTagToInput().put(tag, parentWork.getName());
}
}
}
LOG.debug("Processing map join: " + mj);
// mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.mapJoinWorkMap.put(mj, workItems);
} else {
context.mapJoinWorkMap.get(mj).add(work);
}
/*
* this happens in case of map join operations.
* The tree looks like this:
*
* RS <--- we are here perhaps
* |
* MapJoin
* / \
* RS TS
* /
* TS
*
* If we are at the RS pointed above, and we may have already visited the
* RS following the TS, we have already generated work for the TS-RS.
* We need to hook the current work to this generated work.
*/
if (context.linkOpWithWorkMap.containsKey(mj)) {
Map<BaseWork, TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
if (linkWorkMap != null) {
// Note: it's not quite clear why this is done inside this if. Seems like it should be on the top level.
if (context.linkChildOpWithDummyOp.containsKey(mj)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding dummy ops to work: " + work.getName() + ": " + context.linkChildOpWithDummyOp.get(mj));
}
for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
work.addDummyOp((HashTableDummyOperator) dummy);
}
}
for (Entry<BaseWork, TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
TezEdgeProperty edgeProp = parentWorkMap.getValue();
tezWork.connect(parentWork, work, edgeProp);
if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
}
// of the downstream work
for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (!context.mapJoinParentMap.get(mj).contains(r)) {
// already connected this RS operator or we will connect it at subsequent pass.
continue;
}
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink " + r + " for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), new RowSchema(r.getSchema()), r.getParentOperators());
context.clonedReduceSinks.add(r);
}
r.getConf().setOutputName(work.getName());
context.connectedReduceSinks.add(r);
}
}
}
}
}
// clear out the set. we don't need it anymore.
context.currentMapJoinOperators.clear();
}
// we might have to connect parent work with this work later.
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing " + parent + " as parent from " + root);
}
context.leafOperatorToFollowingWork.remove(parent);
context.leafOperatorToFollowingWork.put(parent, work);
root.removeParent(parent);
}
if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators, it means that the walking context contains union all operators.
// please see more details of context.currentUnionOperator in GenTezWorkWalker
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
} else {
// we've not seen this terminal before. we need to check
// rootUnionWorkMap which contains the information of mapping the root
// operator of a union work to a union work
unionWork = context.rootUnionWorkMap.get(root);
if (unionWork == null) {
// if unionWork is null, it means it is the first time. we need to
// create a union work object and add this work to it. Subsequent
// work should reference the union and not the actual work.
unionWork = GenTezUtils.createUnionWork(context, root, operator, tezWork);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
}
}
context.currentUnionOperators.clear();
work = unionWork;
}
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work: " + followingWork);
if (operator instanceof DummyStoreOperator) {
// this is the small table side.
assert (followingWork instanceof MergeJoinWork);
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
work.setTag(mergeJoinOp.getTagForOperator(operator));
mergeJoinWork.addMergedWork(null, work, context.leafOperatorToFollowingWork);
tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
work = mergeJoinWork;
} else {
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork) || followingWork instanceof UnionWork);
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = null;
if (followingWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else if (followingWork instanceof UnionWork) {
// this can only be possible if there is merge work followed by the union
UnionWork unionWork = (UnionWork) followingWork;
int index = getFollowingWorkIndex(tezWork, unionWork, rs);
BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
if (baseWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
// disconnect the connection to union work and connect to merge work
followingWork = mergeJoinWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else {
rWork = (ReduceWork) baseWork;
}
} else {
rWork = (ReduceWork) followingWork;
}
GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
// remember which parent belongs to which tag
int tag = rs.getConf().getTag();
rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());
// remember the output name of the reduce sink
rs.getConf().setOutputName(rWork.getName());
// For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any
// ReduceSink parents that we missed.
MapJoinOperator mj = context.smallTableParentToMapJoinMap.get(rs);
if (mj != null) {
// Only need to run the logic for tables we missed
if (context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(rs)) {
// ReduceSinkMapJoinProc logic does not work unless the ReduceSink is connected as
// a parent of the MapJoin, but at this point we have already removed all of the
// parents from the MapJoin.
// Try temporarily adding the RS as a parent
ArrayList<Operator<?>> tempMJParents = new ArrayList<Operator<?>>();
tempMJParents.add(rs);
mj.setParentOperators(tempMJParents);
// ReduceSink also needs MapJoin as child
List<Operator<?>> rsChildren = rs.getChildOperators();
rsChildren.add(mj);
// Since the MapJoin has had all of its other parents removed at this point,
// it would be bad here if processReduceSinkToHashJoin() tries to do anything
// with the RS parent based on its position in the list of parents.
ReduceSinkMapJoinProc.processReduceSinkToHashJoin(rs, mj, context);
// Remove any parents from MapJoin again
mj.removeParents();
// TODO: do we also need to remove the MapJoin from the list of RS's children?
}
}
if (!context.connectedReduceSinks.contains(rs)) {
// add dependency between the two work items
TezEdgeProperty edgeProp;
EdgeType edgeType = GenTezUtils.determineEdgeType(work, followingWork, rs);
if (rWork.isAutoReduceParallelism()) {
edgeProp = new TezEdgeProperty(context.conf, edgeType, true, rWork.isSlowStart(), rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
} else {
edgeProp = new TezEdgeProperty(edgeType);
edgeProp.setSlowStart(rWork.isSlowStart());
}
tezWork.connect(work, followingWork, edgeProp);
context.connectedReduceSinks.add(rs);
}
}
} else {
LOG.debug("First pass. Leaf operator: " + operator);
}
// the next item will be a new root.
if (!operator.getChildOperators().isEmpty()) {
assert operator.getChildOperators().size() == 1;
context.parentOfRoot = operator;
context.currentRootOperator = operator.getChildOperators().get(0);
context.preceedingWork = work;
}
return null;
}
Aggregations