use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.
the class SharedWorkOptimizer method pushFilterToTopOfTableScan.
private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException {
ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
for (Operator<? extends OperatorDesc> op : allChildren) {
if (op instanceof FilterOperator) {
FilterOperator filterOp = (FilterOperator) op;
ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
if (tableScanExprNode.isSame(filterExprNode)) {
// We do not need to do anything
return;
}
if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
if (childExprNode.isSame(filterExprNode)) {
// so probably we pushed previously
return;
}
}
}
ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
filterOp.getConf().setPredicate(newPred);
} else {
Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
tsOp.replaceChild(op, newOp);
newOp.getParentOperators().add(tsOp);
op.replaceParent(tsOp, newOp);
newOp.getChildOperators().add(op);
// Add to cache (same group as tsOp)
optimizerCache.putIfWorkExists(newOp, tsOp);
}
}
}
use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.
the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.
private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp) throws SemanticException {
LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
Set<Operator<?>> discardableInputOps = new HashSet<>();
long dataSize = 0L;
long maxDataSize = 0L;
retainableOps.add(retainableTsOp);
discardableOps.add(discardableTsOp);
Operator<?> equalOp1 = retainableTsOp;
Operator<?> equalOp2 = discardableTsOp;
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
// Special treatment for Filter operator that ignores the DPP predicates
if (currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
boolean equalFilters = false;
FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
if (conjsOp1String.equals(conjsOp2String)) {
equalFilters = true;
}
}
if (equalFilters) {
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
} else {
// Bail out
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
}
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
}
use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.
the class ColumnPrunerProcCtx method handleFilterUnionChildren.
/**
* If the input filter operator has direct child(ren) which are union operator,
* and the filter's column is not the same as union's
* create select operator between them. The select operator has same number of columns as
* pruned child operator.
*
* @param curOp
* The filter operator which need to handle children.
* @throws SemanticException
*/
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
return;
}
List<FieldNode> parentPrunList = prunedColLists.get(curOp);
if (parentPrunList == null || parentPrunList.size() == 0) {
return;
}
List<FieldNode> prunList = null;
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
prunList = genColLists(child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
for (ColumnInfo colInfo : child.getSchema().getSignature()) {
if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
continue;
}
ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
exprs.add(colDesc);
outputColNames.add(colInfo.getInternalName());
ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
newCol.setAlias(colInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(colInfo.getInternalName(), colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputColNames, false);
curOp.removeChild(child);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
OperatorFactory.makeChild(sel, child);
sel.setColumnExprMap(colExprMap);
}
}
}
use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.
the class GlobalLimitOptimizer method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
Context ctx = pctx.getContext();
Map<String, TableScanOperator> topOps = pctx.getTopOps();
GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
// is used.
if (topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
// Here we recursively check:
// 1. whether there are exact one LIMIT in the query
// 2. whether there is no aggregation, group-by, distinct, sort by,
// distributed by, or table sampling in any of the sub-query.
// The query only qualifies if both conditions are satisfied.
//
// Example qualified queries:
// CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
// INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
// FROM ... LIMIT...
// SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
//
TableScanOperator ts = topOps.values().iterator().next();
Table tab = ts.getConf().getTableMetadata();
if (tab.isNonNative()) {
LOG.info("Not enabling limit optimization on non native table: " + tab.getTableName());
return pctx;
}
// InputFormat.getSplits wont be called if no input path & TS Vertex will have 0 task parallelism
if (tab.getStorageHandler() == null) {
LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
// query qualify for the optimization
if (tempGlobalLimit != null) {
LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
if (!tab.isPartitioned()) {
if (filterOps.size() == 0) {
Integer tempOffset = tempGlobalLimitDesc.getOffset();
globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
}
} else {
// check if the pruner only contains partition columns
if (onlyContainsPartnCols(tab, filterOps)) {
String alias = (String) topOps.keySet().toArray()[0];
PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
// the filter to prune correctly
if (!partsList.hasUnknownPartitions()) {
Integer tempOffset = tempGlobalLimitDesc.getOffset();
globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
}
}
}
if (globalLimitCtx.isEnable()) {
LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
}
}
}
}
return pctx;
}
use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.
the class PredicateTransitivePropagate method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
TransitiveContext context = new TransitiveContext();
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
// Create a list of topop nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();
// insert new filter between RS and parent of RS
for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
ReduceSinkOperator reducer = entry.getKey();
Operator<?> parent = reducer.getParentOperators().get(0);
List<ExprNodeDesc> exprs = entry.getValue();
if (parent instanceof FilterOperator) {
exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
((FilterOperator) parent).getConf().setPredicate(merged);
} else {
ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
RowSchema parentRS = parent.getSchema();
Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
}
}
return pGraphContext;
}
Aggregations