use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class MapJoinProcessor method genSelectPlan.
protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
input.setChildOperators(null);
// create a dummy select - This select is needed by the walker to split the
// mapJoin later on
RowSchema inputRS = input.getSchema();
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputs = new ArrayList<String>();
List<String> outputCols = input.getConf().getOutputColumnNames();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
exprs.add(colDesc);
outputs.add(internalName);
ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
newCol.setAlias(valueInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(internalName, colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputs, false);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
sel.setColumnExprMap(colExprMap);
// Insert the select operator in between.
sel.setChildOperators(childOps);
for (Operator<? extends OperatorDesc> ch : childOps) {
ch.replaceParent(input, sel);
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class MapJoinProcessor method convertSMBJoinToMapJoin.
/**
* convert a sortmerge join to a a map-side join.
*
* @param opParseCtxMap
* @param smbJoinOp
* join operator
* @param joinTree
* qb join tree
* @param bigTablePos
* position of the source to be read as part of map-reduce framework. All other sources
* are cached in memory
* @param noCheckOuterJoin
*/
public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf, SMBMapJoinOperator smbJoinOp, int bigTablePos, boolean noCheckOuterJoin) throws SemanticException {
// Create a new map join operator
SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(), keyTableDesc, smbJoinDesc.getExprs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getOutputColumnNames(), bigTablePos, smbJoinDesc.getConds(), smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());
mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());
RowSchema joinRS = smbJoinOp.getSchema();
// The mapjoin has the same schema as the join operator
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(smbJoinOp.getCompilationOpContext(), mapJoinDesc, joinRS, new ArrayList<Operator<? extends OperatorDesc>>());
// change the children of the original join operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> childOps = smbJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(smbJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
smbJoinOp.setChildOperators(null);
// change the parent of the original SMBjoin operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> parentOps = smbJoinOp.getParentOperators();
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
parentOp.replaceChild(smbJoinOp, mapJoinOp);
}
mapJoinOp.setParentOperators(parentOps);
smbJoinOp.setParentOperators(null);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class SparkMapJoinProcessor method convertMapJoin.
/**
* convert a regular join to a a map-side join.
*
* @param conf
* @param opParseCtxMap
* @param op join operator
* @param joinTree qb join tree
* @param bigTablePos position of the source to be read as part of
* map-reduce framework. All other sources are cached in memory
* @param noCheckOuterJoin
* @param validateMapJoinTree
*/
@Override
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftSrc, String[] baseSrc, List<String> mapAliases, int bigTablePos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
// outer join cannot be performed on a table which is being cached
JoinCondDesc[] condns = op.getConf().getConds();
if (!noCheckOuterJoin) {
if (checkMapJoin(bigTablePos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
// create the map-join operator
MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), bigTablePos, noCheckOuterJoin);
// 1. remove RS as parent for the big table branch
// 2. remove old join op from child set of all the RSs
List<Operator<? extends OperatorDesc>> parentOps = mapJoinOp.getParentOperators();
for (int i = 0; i < parentOps.size(); i++) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(i);
parentOp.getChildOperators().remove(op);
if (i == bigTablePos) {
List<Operator<? extends OperatorDesc>> grandParentOps = parentOp.getParentOperators();
Preconditions.checkArgument(grandParentOps.size() == 1, "AssertionError: expect number of parents to be 1, but was " + grandParentOps.size());
Operator<? extends OperatorDesc> grandParentOp = grandParentOps.get(0);
grandParentOp.replaceChild(parentOp, mapJoinOp);
mapJoinOp.replaceParent(parentOp, grandParentOp);
}
}
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class QueryPlanTreeTransformation method applyCorrelation.
/**
* Based on the correlation, we transform the query plan tree (operator tree).
* In here, we first create DemuxOperator and all bottom ReduceSinkOperators
* (bottom means near TableScanOperaotr) in the correlation will be be
* the parents of the DemuxOperaotr. We also reassign tags to those
* ReduceSinkOperators. Then, we use MuxOperators to replace ReduceSinkOperators
* which are not bottom ones in this correlation.
* Example: The original operator tree is ...
* JOIN2
* / \
* RS4 RS5
* / \
* GBY1 JOIN1
* | / \
* RS1 RS2 RS3
* If GBY1, JOIN1, and JOIN2 can be executed in the same reducer
* (optimized by Correlation Optimizer).
* The new operator tree will be ...
* JOIN2
* |
* MUX
* / \
* GBY1 JOIN1
* \ /
* DEMUX
* / | \
* / | \
* / | \
* RS1 RS2 RS3
* @param pCtx
* @param corrCtx
* @param correlation
* @throws SemanticException
*/
protected static void applyCorrelation(ParseContext pCtx, CorrelationNodeProcCtx corrCtx, IntraQueryCorrelation correlation) throws SemanticException {
final List<ReduceSinkOperator> bottomReduceSinkOperators = correlation.getBottomReduceSinkOperators();
final int numReducers = correlation.getNumReducers();
List<Operator<? extends OperatorDesc>> childrenOfDemux = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> parentRSsOfDemux = new ArrayList<Operator<? extends OperatorDesc>>();
Map<Integer, Integer> childIndexToOriginalNumParents = new HashMap<Integer, Integer>();
List<TableDesc> keysSerializeInfos = new ArrayList<TableDesc>();
List<TableDesc> valuessSerializeInfos = new ArrayList<TableDesc>();
Map<ReduceSinkOperator, Integer> bottomRSToNewTag = new HashMap<ReduceSinkOperator, Integer>();
int newTag = 0;
CompilationOpContext opCtx = null;
for (ReduceSinkOperator rsop : bottomReduceSinkOperators) {
if (opCtx == null) {
opCtx = rsop.getCompilationOpContext();
}
rsop.getConf().setNumReducers(numReducers);
bottomRSToNewTag.put(rsop, newTag);
parentRSsOfDemux.add(rsop);
keysSerializeInfos.add(rsop.getConf().getKeySerializeInfo());
valuessSerializeInfos.add(rsop.getConf().getValueSerializeInfo());
Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(rsop, true);
if (!childrenOfDemux.contains(child)) {
childrenOfDemux.add(child);
int childIndex = childrenOfDemux.size() - 1;
childIndexToOriginalNumParents.put(childIndex, child.getNumParent());
}
newTag++;
}
for (ReduceSinkOperator rsop : bottomReduceSinkOperators) {
setNewTag(correlation, childrenOfDemux, rsop, bottomRSToNewTag);
}
// Create the DemuxOperaotr
DemuxDesc demuxDesc = new DemuxDesc(correlation.getNewTagToOldTag(), correlation.getNewTagToChildIndex(), childIndexToOriginalNumParents, keysSerializeInfos, valuessSerializeInfos);
Operator<? extends OperatorDesc> demuxOp = OperatorFactory.get(opCtx, demuxDesc);
demuxOp.setChildOperators(childrenOfDemux);
demuxOp.setParentOperators(parentRSsOfDemux);
for (Operator<? extends OperatorDesc> child : childrenOfDemux) {
List<Operator<? extends OperatorDesc>> parentsWithMultipleDemux = new ArrayList<Operator<? extends OperatorDesc>>();
boolean hasBottomReduceSinkOperators = false;
boolean hasNonBottomReduceSinkOperators = false;
for (int i = 0; i < child.getParentOperators().size(); i++) {
Operator<? extends OperatorDesc> p = child.getParentOperators().get(i);
assert p instanceof ReduceSinkOperator;
ReduceSinkOperator rsop = (ReduceSinkOperator) p;
if (bottomReduceSinkOperators.contains(rsop)) {
hasBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(demuxOp);
} else {
hasNonBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(rsop);
}
}
if (hasBottomReduceSinkOperators && hasNonBottomReduceSinkOperators) {
child.setParentOperators(parentsWithMultipleDemux);
} else {
child.setParentOperators(Utilities.makeList(demuxOp));
}
}
for (Operator<? extends OperatorDesc> parent : parentRSsOfDemux) {
parent.setChildOperators(Utilities.makeList(demuxOp));
}
// replace all ReduceSinkOperators which are not at the bottom of
// this correlation to MuxOperators
Set<ReduceSinkOperator> handledRSs = new HashSet<ReduceSinkOperator>();
for (ReduceSinkOperator rsop : correlation.getAllReduceSinkOperators()) {
if (!bottomReduceSinkOperators.contains(rsop)) {
if (handledRSs.contains(rsop)) {
continue;
}
Operator<? extends OperatorDesc> childOP = CorrelationUtilities.getSingleChild(rsop, true);
if (childOP instanceof GroupByOperator) {
CorrelationUtilities.removeReduceSinkForGroupBy(rsop, (GroupByOperator) childOP, pCtx, corrCtx);
List<Operator<? extends OperatorDesc>> parentsOfMux = new ArrayList<Operator<? extends OperatorDesc>>();
Operator<? extends OperatorDesc> parentOp = CorrelationUtilities.getSingleParent(childOP, true);
parentsOfMux.add(parentOp);
Operator<? extends OperatorDesc> mux = OperatorFactory.get(childOP.getCompilationOpContext(), new MuxDesc(parentsOfMux));
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
childOP.setParentOperators(Utilities.makeList(mux));
parentOp.setChildOperators(Utilities.makeList(mux));
} else {
List<Operator<? extends OperatorDesc>> parentsOfMux = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> siblingOPs = CorrelationUtilities.findSiblingOperators(rsop);
for (Operator<? extends OperatorDesc> op : siblingOPs) {
if (op instanceof DemuxOperator) {
parentsOfMux.add(op);
} else if (op instanceof ReduceSinkOperator) {
GroupByOperator pGBYm = CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) {
// We get a semi join at here.
// This map-side GroupByOperator needs to be removed
CorrelationUtilities.removeOperator(pGBYm, op, CorrelationUtilities.getSingleParent(pGBYm, true), pCtx);
}
handledRSs.add((ReduceSinkOperator) op);
parentsOfMux.add(CorrelationUtilities.getSingleParent(op, true));
} else {
throw new SemanticException("A sibling of ReduceSinkOperator is neither a " + "DemuxOperator nor a ReduceSinkOperator");
}
}
MuxDesc muxDesc = new MuxDesc(siblingOPs);
Operator<? extends OperatorDesc> mux = OperatorFactory.get(rsop.getCompilationOpContext(), muxDesc);
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
for (Operator<? extends OperatorDesc> op : parentsOfMux) {
if (op instanceof DemuxOperator) {
// and childOP.
if (op.getChildOperators().contains(childOP)) {
op.replaceChild(childOP, mux);
}
} else {
// op is not a DemuxOperator, so it should have
// a single child.
op.setChildOperators(Utilities.makeList(mux));
}
}
childOP.setParentOperators(Utilities.makeList(mux));
}
}
}
for (ReduceSinkOperator rsop : handledRSs) {
rsop.setChildOperators(null);
rsop.setParentOperators(null);
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class AnnotateRunTimeStatsOptimizer method setRuntimeStatsDir.
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException {
try {
OperatorDesc conf = op.getConf();
if (conf != null) {
LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString();
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
runtimeStatsContext.setStatsTmpDir(path);
if (!statsPublisher.init(runtimeStatsContext)) {
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
conf.setRuntimeStatsTmpDir(path);
} else {
LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null");
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
Aggregations