Search in sources :

Example 81 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class OpProcFactory method createFilter.

protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) {
    RowSchema inputRS = op.getSchema();
    // combine all predicates into a single expression
    List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
    Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
    while (iterator.hasNext()) {
        for (ExprNodeDesc pred : iterator.next()) {
            preds = ExprNodeDescUtils.split(pred, preds);
        }
    }
    if (preds.isEmpty()) {
        return null;
    }
    ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
    if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
        boolean pushFilterToStorage;
        HiveConf hiveConf = owi.getParseContext().getConf();
        pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
        if (pushFilterToStorage) {
            condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
            if (condn == null) {
                // we pushed the whole thing down
                return null;
            }
        }
    }
    // add new filter op
    List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
    op.setChildOperators(null);
    Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
    output.setChildOperators(originalChilren);
    for (Operator<? extends OperatorDesc> ch : originalChilren) {
        List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
        int pos = parentOperators.indexOf(op);
        assert pos != -1;
        parentOperators.remove(pos);
        // add the new op as the old
        parentOperators.add(pos, output);
    }
    if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
        // remove the candidate filter ops
        removeCandidates(op, owi);
    }
    // push down current ppd context to newly added filter
    ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
    if (walkerInfo != null) {
        walkerInfo.getNonFinalCandidates().clear();
        owi.putPrunedPreds(output, walkerInfo);
    }
    return output;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 82 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SparkCompiler method getComponents.

// Tarjan's algo
private Set<Set<Operator<?>>> getComponents(OptimizeSparkProcContext procCtx) {
    AtomicInteger index = new AtomicInteger();
    Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>();
    Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>();
    Stack<Operator<?>> nodes = new Stack<Operator<?>>();
    Set<Set<Operator<?>>> components = new HashSet<Set<Operator<?>>>();
    for (Operator<?> o : procCtx.getParseContext().getTopOps().values()) {
        if (!indexes.containsKey(o)) {
            connect(o, index, nodes, indexes, lowLinks, components);
        }
    }
    return components;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Set(java.util.Set) HashSet(java.util.HashSet) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Stack(java.util.Stack) HashSet(java.util.HashSet)

Example 83 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SparkCompiler method setInputFormat.

@Override
protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof SparkTask) {
        SparkWork work = ((SparkTask) task).getWork();
        List<BaseWork> all = work.getAllWork();
        for (BaseWork w : all) {
            if (w instanceof MapWork) {
                MapWork mapWork = (MapWork) w;
                HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
                if (!opMap.isEmpty()) {
                    for (Operator<? extends OperatorDesc> op : opMap.values()) {
                        setInputFormat(mapWork, op);
                    }
                }
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<? extends Serializable> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<? extends Serializable> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) List(java.util.List) ArrayList(java.util.ArrayList) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 84 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SplitOpTreeForDPP method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    SparkPartitionPruningSinkOperator pruningSinkOp = (SparkPartitionPruningSinkOperator) nd;
    GenSparkProcContext context = (GenSparkProcContext) procCtx;
    // Locate the op where the branch starts
    // This is guaranteed to succeed since the branch always follow the pattern
    // as shown in the first picture above.
    Operator<?> filterOp = pruningSinkOp;
    Operator<?> selOp = null;
    while (filterOp != null) {
        if (filterOp.getNumChild() > 1) {
            break;
        } else {
            selOp = filterOp;
            filterOp = filterOp.getParentOperators().get(0);
        }
    }
    // Check if this is a MapJoin. If so, do not split.
    for (Operator<?> childOp : filterOp.getChildOperators()) {
        if (childOp instanceof ReduceSinkOperator && childOp.getChildOperators().get(0) instanceof MapJoinOperator) {
            context.pruningSinkSet.add(pruningSinkOp);
            return null;
        }
    }
    List<Operator<?>> roots = new LinkedList<Operator<?>>();
    collectRoots(roots, pruningSinkOp);
    List<Operator<?>> savedChildOps = filterOp.getChildOperators();
    filterOp.setChildOperators(Utilities.makeList(selOp));
    // Now clone the tree above selOp
    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
    for (int i = 0; i < roots.size(); i++) {
        TableScanOperator newTs = (TableScanOperator) newRoots.get(i);
        TableScanOperator oldTs = (TableScanOperator) roots.get(i);
        newTs.getConf().setTableMetadata(oldTs.getConf().getTableMetadata());
    }
    context.clonedPruningTableScanSet.addAll(newRoots);
    // Restore broken links between operators, and remove the branch from the original tree
    filterOp.setChildOperators(savedChildOps);
    filterOp.removeChild(selOp);
    // Find the cloned PruningSink and add it to pruningSinkSet
    Set<Operator<?>> sinkSet = new HashSet<Operator<?>>();
    for (Operator<?> root : newRoots) {
        SparkUtilities.collectOp(sinkSet, root, SparkPartitionPruningSinkOperator.class);
    }
    Preconditions.checkArgument(sinkSet.size() == 1, "AssertionError: expected to only contain one SparkPartitionPruningSinkOperator," + " but found " + sinkSet.size());
    SparkPartitionPruningSinkOperator clonedPruningSinkOp = (SparkPartitionPruningSinkOperator) sinkSet.iterator().next();
    clonedPruningSinkOp.getConf().setTableScan(pruningSinkOp.getConf().getTableScan());
    context.pruningSinkSet.add(clonedPruningSinkOp);
    return null;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet)

Example 85 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GenSparkUtils method getEnclosingWork.

/**
   * getEncosingWork finds the BaseWork any given operator belongs to.
   */
public BaseWork getEnclosingWork(Operator<?> op, GenSparkProcContext procCtx) {
    List<Operator<?>> ops = new ArrayList<Operator<?>>();
    OperatorUtils.findRoots(op, ops);
    for (Operator<?> r : ops) {
        BaseWork work = procCtx.rootToWorkMap.get(r);
        if (work != null) {
            return work;
        }
    }
    return null;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ForwardOperator(org.apache.hadoop.hive.ql.exec.ForwardOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) ArrayList(java.util.ArrayList) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)130 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)98 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)91 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)77 ArrayList (java.util.ArrayList)76 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)75 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)65 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)62 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)61 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)57 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)56 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)54 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)45 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)40 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)39 HashMap (java.util.HashMap)36 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)36 LinkedHashMap (java.util.LinkedHashMap)35 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)28 List (java.util.List)22