use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class TezCompiler method removeSemijoinOptimizationByBenefit.
private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
// Not needed without semi-join reduction
return;
}
List<ReduceSinkOperator> semijoinRsToRemove = new ArrayList<ReduceSinkOperator>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
double semijoinReductionThreshold = procCtx.conf.getFloatVar(HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD);
for (ReduceSinkOperator rs : map.keySet()) {
SemiJoinBranchInfo sjInfo = map.get(rs);
if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) {
// Semijoin created using hint or marked useful, skip it
continue;
}
// rs is semijoin optimization branch, which should look like <Parent>-SEL-GB1-RS1-GB2-RS2
// Get to the SelectOperator ancestor
SelectOperator sel = null;
for (Operator<?> currOp = rs; currOp.getParentOperators().size() > 0; currOp = currOp.getParentOperators().get(0)) {
if (currOp instanceof SelectOperator) {
sel = (SelectOperator) currOp;
break;
}
}
if (sel == null) {
throw new SemanticException("Unexpected error - could not find SEL ancestor from semijoin branch of " + rs);
}
// Check the ndv/rows from the SEL vs the destination tablescan the semijoin opt is going to.
TableScanOperator ts = sjInfo.getTsOp();
RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs);
ExprNodeDesc tsExpr = rti.getTsColExpr();
// In the SEL operator of the semijoin branch, there should be only one column in the operator
ExprNodeDesc selExpr = sel.getConf().getColList().get(0);
if (LOG.isDebugEnabled()) {
LOG.debug("Computing BloomFilter cost/benefit for " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts) + " (" + tsExpr + ")");
}
double reductionFactor = computeBloomFilterNetBenefit(sel, selExpr, (FilterOperator) ts.getChildOperators().get(0), tsExpr);
if (reductionFactor < semijoinReductionThreshold) {
// This semijoin optimization should be removed. Do it after we're done iterating
semijoinRsToRemove.add(rs);
}
}
for (ReduceSinkOperator rs : semijoinRsToRemove) {
TableScanOperator ts = map.get(rs).getTsOp();
if (LOG.isDebugEnabled()) {
LOG.debug("Reduction factor not satisfied for " + OperatorUtils.getOpNamePretty(rs) + "-" + OperatorUtils.getOpNamePretty(ts) + ". Removing semijoin optimization.");
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, ts);
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class ReduceSinkDeDuplicationUtils method aggressiveDedup.
protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException {
assert cRS.getNumParent() == 1;
ReduceSinkDesc cConf = cRS.getConf();
ReduceSinkDesc pConf = pRS.getConf();
List<ExprNodeDesc> cKeys = cConf.getKeyCols();
List<ExprNodeDesc> pKeys = pConf.getKeyCols();
// Check that in the path between cRS and pRS, there are only Select operators
// i.e. the sequence must be pRS-SEL*-cRS
Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
while (parent != pRS) {
assert parent.getNumParent() == 1;
if (!(parent instanceof SelectOperator)) {
return false;
}
parent = parent.getParentOperators().get(0);
}
// If child keys are null or empty, we bail out
if (cKeys == null || cKeys.isEmpty()) {
return false;
}
// If parent keys are null or empty, we bail out
if (pKeys == null || pKeys.isEmpty()) {
return false;
}
// Backtrack key columns of cRS to pRS
// If we cannot backtrack any of the columns, bail out
List<ExprNodeDesc> cKeysInParentRS = ExprNodeDescUtils.backtrack(cKeys, cRS, pRS);
for (int i = 0; i < cKeysInParentRS.size(); i++) {
ExprNodeDesc pexpr = cKeysInParentRS.get(i);
if (pexpr == null) {
// We cannot backtrack the expression, we bail out
return false;
}
}
cRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(cKeysInParentRS, cRS, pRS));
// Backtrack partition columns of cRS to pRS
// If we cannot backtrack any of the columns, bail out
List<ExprNodeDesc> cPartitionInParentRS = ExprNodeDescUtils.backtrack(cConf.getPartitionCols(), cRS, pRS);
for (int i = 0; i < cPartitionInParentRS.size(); i++) {
ExprNodeDesc pexpr = cPartitionInParentRS.get(i);
if (pexpr == null) {
// We cannot backtrack the expression, we bail out
return false;
}
}
cRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(cPartitionInParentRS, cRS, pRS));
// Backtrack value columns of cRS to pRS
// If we cannot backtrack any of the columns, bail out
List<ExprNodeDesc> cValueInParentRS = ExprNodeDescUtils.backtrack(cConf.getValueCols(), cRS, pRS);
for (int i = 0; i < cValueInParentRS.size(); i++) {
ExprNodeDesc pexpr = cValueInParentRS.get(i);
if (pexpr == null) {
// We cannot backtrack the expression, we bail out
return false;
}
}
cRS.getConf().setValueCols(ExprNodeDescUtils.backtrack(cValueInParentRS, cRS, pRS));
// If we cannot backtrack any of the columns, bail out
if (cConf.getBucketCols() != null) {
List<ExprNodeDesc> cBucketInParentRS = ExprNodeDescUtils.backtrack(cConf.getBucketCols(), cRS, pRS);
for (int i = 0; i < cBucketInParentRS.size(); i++) {
ExprNodeDesc pexpr = cBucketInParentRS.get(i);
if (pexpr == null) {
// We cannot backtrack the expression, we bail out
return false;
}
}
cRS.getConf().setBucketCols(ExprNodeDescUtils.backtrack(cBucketInParentRS, cRS, pRS));
}
// Update column expression map
for (Entry<String, ExprNodeDesc> e : cRS.getColumnExprMap().entrySet()) {
e.setValue(ExprNodeDescUtils.backtrack(e.getValue(), cRS, pRS));
}
// Replace pRS with cRS and remove operator sequence from pRS to cRS
// Recall that the sequence must be pRS-SEL*-cRS
parent = cRS.getParentOperators().get(0);
while (parent != pRS) {
dedupCtx.addRemovedOperator(parent);
parent = parent.getParentOperators().get(0);
}
dedupCtx.addRemovedOperator(pRS);
cRS.getParentOperators().clear();
for (Operator<? extends OperatorDesc> op : pRS.getParentOperators()) {
op.replaceChild(pRS, cRS);
cRS.getParentOperators().add(op);
}
pRS.getParentOperators().clear();
pRS.getChildOperators().clear();
return true;
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class ColumnPrunerProcCtx method handleFilterUnionChildren.
/**
* If the input filter operator has direct child(ren) which are union operator,
* and the filter's column is not the same as union's
* create select operator between them. The select operator has same number of columns as
* pruned child operator.
*
* @param curOp
* The filter operator which need to handle children.
* @throws SemanticException
*/
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
return;
}
List<FieldNode> parentPrunList = prunedColLists.get(curOp);
if (parentPrunList == null || parentPrunList.size() == 0) {
return;
}
List<FieldNode> prunList = null;
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
prunList = genColLists(child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
for (ColumnInfo colInfo : child.getSchema().getSignature()) {
if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
continue;
}
ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
exprs.add(colDesc);
outputColNames.add(colInfo.getInternalName());
ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
newCol.setAlias(colInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(colInfo.getInternalName(), colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputColNames, false);
curOp.removeChild(child);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
OperatorFactory.makeChild(sel, child);
sel.setColumnExprMap(colExprMap);
}
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class ConvertJoinMapJoin method removeCycleCreatingSemiJoinOps.
// Remove any semijoin branch associated with hashjoin's parent's operator
// pipeline which can cause a cycle after hashjoin optimization.
private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator<?> parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException {
Map<ReduceSinkOperator, TableScanOperator> semiJoinMap = new HashMap<ReduceSinkOperator, TableScanOperator>();
for (Operator<?> op : parentSelectOpOfBigTable.getChildOperators()) {
if (!(op instanceof SelectOperator)) {
continue;
}
while (op.getChildOperators().size() > 0) {
op = op.getChildOperators().get(0);
}
// If not ReduceSink Op, skip
if (!(op instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rs = (ReduceSinkOperator) op;
TableScanOperator ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
if (ts == null) {
// skip, no semijoin branch
continue;
}
// Found a semijoin branch.
// There can be more than one semijoin branch coming from the parent
// GBY Operator of the RS Operator.
Operator<?> parentGB = op.getParentOperators().get(0);
for (Operator<?> childRS : parentGB.getChildOperators()) {
// Get the RS and TS for this branch
rs = (ReduceSinkOperator) childRS;
ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
assert ts != null;
for (Operator<?> parent : mapjoinOp.getParentOperators()) {
if (!(parent instanceof ReduceSinkOperator)) {
continue;
}
Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent, TableScanOperator.class);
boolean found = false;
for (TableScanOperator parentTS : tsOps) {
// If the parent is same as the ts, then we have a cycle.
if (ts == parentTS) {
semiJoinMap.put(rs, ts);
found = true;
break;
}
}
if (found)
break;
}
}
}
if (semiJoinMap.size() > 0) {
for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found semijoin optimization from the big table side of a map join, which will cause a task cycle. " + "Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(semiJoinMap.get(rs)));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(parseContext, rs, semiJoinMap.get(rs));
}
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class DynamicPartitionPruningOptimization method getColumnInfo.
// Given a key, find the corresponding column name.
private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName, StringBuilder colName, StringBuilder tabAlias) {
ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
if (colExpr == null) {
return false;
}
internalColName.append(colExpr.getColumn());
// fetch table ablias
ExprNodeDescUtils.ColumnOrigin columnOrigin = ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
if (columnOrigin != null) {
// get both tableAlias and column name from columnOrigin
assert columnOrigin.op instanceof TableScanOperator;
TableScanOperator ts = (TableScanOperator) columnOrigin.op;
tabAlias.append(ts.getConf().getAlias());
colName.append(ExprNodeDescUtils.getColumnExpr(columnOrigin.col).getColumn());
return true;
}
Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
if (!(parentOfRS instanceof SelectOperator)) {
colName.append(internalColName.toString());
return true;
}
exprNodeDesc = parentOfRS.getColumnExprMap().get(internalColName.toString());
colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
if (colExpr == null) {
return false;
}
colName.append(ExprNodeDescUtils.extractColName(colExpr));
return true;
}
Aggregations