Search in sources :

Example 76 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SimpleFetchOptimizer method checkThreshold.

private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception {
    if (limit > 0) {
        if (data.hasOnlyPruningFilter()) {
            /* partitioned table + query has only pruning filters */
            return true;
        } else if (data.isPartitioned() == false && data.isFiltered() == false) {
            /* unpartitioned table + no filters */
            return true;
        }
    /* fall through */
    }
    long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
    if (threshold < 0) {
        return true;
    }
    Operator child = data.scanOp.getChildOperators().get(0);
    if (child instanceof SelectOperator) {
        // select *, constant and casts can be allowed without a threshold check
        if (checkExpressions((SelectOperator) child)) {
            return true;
        }
    }
    return data.isDataLengthWithInThreshold(pctx, threshold);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ListSinkOperator(org.apache.hadoop.hive.ql.exec.ListSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator)

Example 77 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SortedMergeBucketMapJoinOptimizer method getCheckCandidateJoin.

// check if the join operator encountered is a candidate for being converted
// to a sort-merge join
private NodeProcessor getCheckCandidateJoin() {
    return new NodeProcessor() {

        @Override
        public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
            SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
            JoinOperator joinOperator = (JoinOperator) nd;
            int size = stack.size();
            if (!(stack.get(size - 1) instanceof JoinOperator) || !(stack.get(size - 2) instanceof ReduceSinkOperator)) {
                smbJoinContext.getRejectedJoinOps().add(joinOperator);
                return null;
            }
            // not be converted.
            for (int pos = size - 3; pos >= 0; pos--) {
                Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) stack.get(pos);
                if (!op.supportAutomaticSortMergeJoin()) {
                    smbJoinContext.getRejectedJoinOps().add(joinOperator);
                    return null;
                }
            }
            return null;
        }
    };
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Node(org.apache.hadoop.hive.ql.lib.Node) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Stack(java.util.Stack)

Example 78 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class MapJoinProcessor method convertMapJoin.

/**
   * convert a regular join to a a map-side join.
   *
   * @param opParseCtxMap
   * @param op
   *          join operator
   * @param joinTree
   *          qb join tree
   * @param mapJoinPos
   *          position of the source to be read as part of map-reduce framework. All other sources
   *          are cached in memory
   * @param noCheckOuterJoin
   * @param validateMapJoinTree
   */
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
    // outer join cannot be performed on a table which is being cached
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    if (!noCheckOuterJoin) {
        if (checkMapJoin(mapJoinPos, condns) < 0) {
            throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
        }
    }
    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
    List<Operator<? extends OperatorDesc>> newParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps = new ArrayList<Operator<? extends OperatorDesc>>();
    // found a source which is not to be stored in memory
    if (leftInputJoin) {
        // assert mapJoinPos == 0;
        Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
        assert parentOp.getParentOperators().size() == 1;
        Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
        oldReduceSinkParentOps.add(parentOp);
        newParentOps.add(grandParentOp);
    }
    byte pos = 0;
    // Remove parent reduce-sink operators
    for (String src : baseSrc) {
        if (src != null) {
            Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
            assert parentOp.getParentOperators().size() == 1;
            Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
            oldReduceSinkParentOps.add(parentOp);
            newParentOps.add(grandParentOp);
        }
        pos++;
    }
    // create the map-join operator
    MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin);
    // remove old parents
    for (pos = 0; pos < newParentOps.size(); pos++) {
        newParentOps.get(pos).replaceChild(oldReduceSinkParentOps.get(pos), mapJoinOp);
    }
    mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps);
    mapJoinOp.setParentOperators(newParentOps);
    // make sure only map-joins can be performed.
    if (validateMapJoinTree) {
        validateMapJoinTypes(mapJoinOp);
    }
    return mapJoinOp;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 79 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class MergeJoinProc method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procCtx;
    CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
    if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
        context.currentMergeJoinOperator = mergeJoinOp;
        return null;
    }
    TezWork tezWork = context.currentTask.getWork();
    @SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
    // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
    BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
    // we need to set the merge work that has been created as part of the dummy store walk. If a
    // merge work already exists for this merge join operator, add the dummy store work to the
    // merge work. Else create a merge work, add above work to the merge work
    MergeJoinWork mergeWork = null;
    if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
        // we already have the merge work corresponding to this merge join operator
        mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
    } else {
        mergeWork = new MergeJoinWork();
        tezWork.add(mergeWork);
        context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
    }
    mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
    mergeWork.setMergeJoinOperator(mergeJoinOp);
    tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
    for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
        tezWork.disconnect(grandParentWork, parentWork);
        tezWork.connect(grandParentWork, mergeWork, edgeProp);
    }
    for (BaseWork childWork : tezWork.getChildren(parentWork)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
        tezWork.disconnect(parentWork, childWork);
        tezWork.connect(mergeWork, childWork, edgeProp);
    }
    tezWork.remove(parentWork);
    DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
    parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
    mergeJoinOp.getParentOperators().remove(dummyOp);
    dummyOp.getChildOperators().clear();
    return true;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) MergeJoinWork(org.apache.hadoop.hive.ql.plan.MergeJoinWork) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) GenTezProcContext(org.apache.hadoop.hive.ql.parse.GenTezProcContext) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 80 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GlobalLimitOptimizer method checkQbpForGlobalLimit.

/**
   * Check the limit number in all sub queries
   *
   * @return if there is one and only one limit for all subqueries, return the limit
   *         if there is no limit, return 0
   *         otherwise, return null
   */
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
    Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
    Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
    // existsOrdering AND existsPartitioning should be false.
    for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
        ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
        if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
            return null;
        }
    }
    // - There cannot exist any (distinct) aggregate.
    for (Operator<?> op : ops.get(GroupByOperator.class)) {
        GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
        if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
            return null;
        }
    }
    // - There cannot exist any sampling predicate.
    for (Operator<?> op : ops.get(FilterOperator.class)) {
        FilterDesc filterConf = ((FilterOperator) op).getConf();
        if (filterConf.getIsSamplingPred()) {
            return null;
        }
    }
    // If there is one and only one limit starting at op, return the limit
    // If there is no limit, return 0
    // Otherwise, return null
    Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
    if (limitOps.size() == 1) {
        return (LimitOperator) limitOps.iterator().next();
    } else if (limitOps.size() == 0) {
        return null;
    }
    return null;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ImmutableSet(com.google.common.collect.ImmutableSet) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)130 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)98 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)91 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)77 ArrayList (java.util.ArrayList)76 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)75 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)65 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)62 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)61 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)57 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)56 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)54 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)45 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)40 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)39 HashMap (java.util.HashMap)36 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)36 LinkedHashMap (java.util.LinkedHashMap)35 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)28 List (java.util.List)22