use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SimpleFetchOptimizer method checkThreshold.
private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception {
if (limit > 0) {
if (data.hasOnlyPruningFilter()) {
/* partitioned table + query has only pruning filters */
return true;
} else if (data.isPartitioned() == false && data.isFiltered() == false) {
/* unpartitioned table + no filters */
return true;
}
/* fall through */
}
long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
if (threshold < 0) {
return true;
}
Operator child = data.scanOp.getChildOperators().get(0);
if (child instanceof SelectOperator) {
// select *, constant and casts can be allowed without a threshold check
if (checkExpressions((SelectOperator) child)) {
return true;
}
}
return data.isDataLengthWithInThreshold(pctx, threshold);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SortedMergeBucketMapJoinOptimizer method getCheckCandidateJoin.
// check if the join operator encountered is a candidate for being converted
// to a sort-merge join
private NodeProcessor getCheckCandidateJoin() {
return new NodeProcessor() {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
JoinOperator joinOperator = (JoinOperator) nd;
int size = stack.size();
if (!(stack.get(size - 1) instanceof JoinOperator) || !(stack.get(size - 2) instanceof ReduceSinkOperator)) {
smbJoinContext.getRejectedJoinOps().add(joinOperator);
return null;
}
// not be converted.
for (int pos = size - 3; pos >= 0; pos--) {
Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) stack.get(pos);
if (!op.supportAutomaticSortMergeJoin()) {
smbJoinContext.getRejectedJoinOps().add(joinOperator);
return null;
}
}
return null;
}
};
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class MapJoinProcessor method convertMapJoin.
/**
* convert a regular join to a a map-side join.
*
* @param opParseCtxMap
* @param op
* join operator
* @param joinTree
* qb join tree
* @param mapJoinPos
* position of the source to be read as part of map-reduce framework. All other sources
* are cached in memory
* @param noCheckOuterJoin
* @param validateMapJoinTree
*/
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
// outer join cannot be performed on a table which is being cached
JoinDesc desc = op.getConf();
JoinCondDesc[] condns = desc.getConds();
if (!noCheckOuterJoin) {
if (checkMapJoin(mapJoinPos, condns) < 0) {
throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
}
}
// Walk over all the sources (which are guaranteed to be reduce sink
// operators).
// The join outputs a concatenation of all the inputs.
List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
List<Operator<? extends OperatorDesc>> newParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
// found a source which is not to be stored in memory
if (leftInputJoin) {
// assert mapJoinPos == 0;
Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
byte pos = 0;
// Remove parent reduce-sink operators
for (String src : baseSrc) {
if (src != null) {
Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
assert parentOp.getParentOperators().size() == 1;
Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
oldReduceSinkParentOps.add(parentOp);
newParentOps.add(grandParentOp);
}
pos++;
}
// create the map-join operator
MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin);
// remove old parents
for (pos = 0; pos < newParentOps.size(); pos++) {
newParentOps.get(pos).replaceChild(oldReduceSinkParentOps.get(pos), mapJoinOp);
}
mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps);
mapJoinOp.setParentOperators(newParentOps);
// make sure only map-joins can be performed.
if (validateMapJoinTree) {
validateMapJoinTypes(mapJoinOp);
}
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class MergeJoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procCtx;
CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
context.currentMergeJoinOperator = mergeJoinOp;
return null;
}
TezWork tezWork = context.currentTask.getWork();
@SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
// Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
// we need to set the merge work that has been created as part of the dummy store walk. If a
// merge work already exists for this merge join operator, add the dummy store work to the
// merge work. Else create a merge work, add above work to the merge work
MergeJoinWork mergeWork = null;
if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
// we already have the merge work corresponding to this merge join operator
mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
} else {
mergeWork = new MergeJoinWork();
tezWork.add(mergeWork);
context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
}
mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
mergeWork.setMergeJoinOperator(mergeJoinOp);
tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
tezWork.disconnect(grandParentWork, parentWork);
tezWork.connect(grandParentWork, mergeWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
tezWork.disconnect(parentWork, childWork);
tezWork.connect(mergeWork, childWork, edgeProp);
}
tezWork.remove(parentWork);
DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
mergeJoinOp.getParentOperators().remove(dummyOp);
dummyOp.getChildOperators().clear();
return true;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GlobalLimitOptimizer method checkQbpForGlobalLimit.
/**
* Check the limit number in all sub queries
*
* @return if there is one and only one limit for all subqueries, return the limit
* if there is no limit, return 0
* otherwise, return null
*/
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
// existsOrdering AND existsPartitioning should be false.
for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
return null;
}
}
// - There cannot exist any (distinct) aggregate.
for (Operator<?> op : ops.get(GroupByOperator.class)) {
GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
return null;
}
}
// - There cannot exist any sampling predicate.
for (Operator<?> op : ops.get(FilterOperator.class)) {
FilterDesc filterConf = ((FilterOperator) op).getConf();
if (filterConf.getIsSamplingPred()) {
return null;
}
}
// If there is one and only one limit starting at op, return the limit
// If there is no limit, return 0
// Otherwise, return null
Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
if (limitOps.size() == 1) {
return (LimitOperator) limitOps.iterator().next();
} else if (limitOps.size() == 0) {
return null;
}
return null;
}
Aggregations