Search in sources :

Example 86 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenSparkUtils method createMapWork.

public MapWork createMapWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork, PrunedPartitionList partitions, boolean deferSetup) throws SemanticException {
    Preconditions.checkArgument(root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be empty");
    MapWork mapWork = new MapWork("Map " + (++sequenceNumber));
    LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
    // map work starts with table scan operators
    Preconditions.checkArgument(root instanceof TableScanOperator, "AssertionError: expected root to be an instance of TableScanOperator, but was " + root.getClass().getName());
    String alias_id = null;
    if (context.parseContext != null && context.parseContext.getTopOps() != null) {
        for (String currentAliasID : context.parseContext.getTopOps().keySet()) {
            Operator<? extends OperatorDesc> currOp = context.parseContext.getTopOps().get(currentAliasID);
            if (currOp == root) {
                alias_id = currentAliasID;
                break;
            }
        }
    }
    if (alias_id == null)
        alias_id = ((TableScanOperator) root).getConf().getAlias();
    if (!deferSetup) {
        setupMapWork(mapWork, context, partitions, (TableScanOperator) root, alias_id);
    }
    // add new item to the Spark work
    sparkWork.add(mapWork);
    return mapWork;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 87 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class SplitOpTreeForDPP method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    SparkPartitionPruningSinkOperator pruningSinkOp = (SparkPartitionPruningSinkOperator) nd;
    GenSparkProcContext context = (GenSparkProcContext) procCtx;
    for (Operator<?> op : context.pruningSinkSet) {
        if (pruningSinkOp.getOperatorId().equals(op.getOperatorId())) {
            return null;
        }
    }
    // separate tree.  Add the pruning sink operator to context and return
    if (pruningSinkOp.isWithMapjoin()) {
        context.pruningSinkSet.add(pruningSinkOp);
        return null;
    }
    List<Operator<?>> roots = new LinkedList<Operator<?>>();
    collectRoots(roots, pruningSinkOp);
    Operator<?> branchingOp = pruningSinkOp.getBranchingOp();
    List<Operator<?>> savedChildOps = branchingOp.getChildOperators();
    List<Operator<?>> firstNodesOfPruningBranch = findFirstNodesOfPruningBranch(branchingOp);
    branchingOp.setChildOperators(null);
    // Now clone the tree above selOp
    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
    for (int i = 0; i < roots.size(); i++) {
        TableScanOperator newTs = (TableScanOperator) newRoots.get(i);
        TableScanOperator oldTs = (TableScanOperator) roots.get(i);
        newTs.getConf().setTableMetadata(oldTs.getConf().getTableMetadata());
    }
    context.clonedPruningTableScanSet.addAll(newRoots);
    Operator newBranchingOp = null;
    for (int i = 0; i < newRoots.size() && newBranchingOp == null; i++) {
        newBranchingOp = OperatorUtils.findOperatorById(newRoots.get(i), branchingOp.getOperatorId());
    }
    Preconditions.checkNotNull(newBranchingOp, "Cannot find the branching operator in cloned tree.");
    newBranchingOp.setChildOperators(firstNodesOfPruningBranch);
    // Restore broken links between operators, and remove the branch from the original tree
    branchingOp.setChildOperators(savedChildOps);
    for (Operator selOp : firstNodesOfPruningBranch) {
        branchingOp.removeChild(selOp);
    }
    Set<Operator<?>> sinkSet = new LinkedHashSet<>();
    for (Operator<?> sel : firstNodesOfPruningBranch) {
        SparkUtilities.collectOp(sinkSet, sel, SparkPartitionPruningSinkOperator.class);
        sel.setParentOperators(Utilities.makeList(newBranchingOp));
    }
    context.pruningSinkSet.addAll(sinkSet);
    return null;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LinkedHashSet(java.util.LinkedHashSet) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) LinkedList(java.util.LinkedList)

Example 88 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class OpProcFactory method createFilter.

protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) throws SemanticException {
    RowSchema inputRS = op.getSchema();
    // combine all predicates into a single expression
    List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
    Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
    while (iterator.hasNext()) {
        for (ExprNodeDesc pred : iterator.next()) {
            preds = ExprNodeDescUtils.split(pred, preds);
        }
    }
    if (preds.isEmpty()) {
        return null;
    }
    ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
    if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
        boolean pushFilterToStorage;
        HiveConf hiveConf = owi.getParseContext().getConf();
        pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
        if (pushFilterToStorage) {
            condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
            if (condn == null) {
                // we pushed the whole thing down
                return null;
            }
        }
    }
    // add new filter op
    List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
    op.setChildOperators(null);
    Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
    output.setChildOperators(originalChilren);
    for (Operator<? extends OperatorDesc> ch : originalChilren) {
        List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
        int pos = parentOperators.indexOf(op);
        assert pos != -1;
        parentOperators.remove(pos);
        // add the new op as the old
        parentOperators.add(pos, output);
    }
    if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
        // remove the candidate filter ops
        removeCandidates(op, owi);
    }
    // push down current ppd context to newly added filter
    ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
    if (walkerInfo != null) {
        walkerInfo.getNonFinalCandidates().clear();
        owi.putPrunedPreds(output, walkerInfo);
    }
    return output;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)88 Operator (org.apache.hadoop.hive.ql.exec.Operator)35 ArrayList (java.util.ArrayList)33 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)28 Table (org.apache.hadoop.hive.ql.metadata.Table)21 HashMap (java.util.HashMap)20 Path (org.apache.hadoop.fs.Path)20 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)20 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)19 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)19 LinkedHashMap (java.util.LinkedHashMap)18 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)18 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)18 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)15 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)15 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 Map (java.util.Map)13 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)12 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)12